1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #ifndef INCLUDED_PDFI_PDFPARSE_HXX
29*cdf0e10cSrcweir #define INCLUDED_PDFI_PDFPARSE_HXX
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <sal/types.h>
32*cdf0e10cSrcweir #include <rtl/ustring.hxx>
33*cdf0e10cSrcweir #include <rtl/string.hxx>
34*cdf0e10cSrcweir 
35*cdf0e10cSrcweir #include <vector>
36*cdf0e10cSrcweir #include <hash_map>
37*cdf0e10cSrcweir 
38*cdf0e10cSrcweir namespace pdfparse
39*cdf0e10cSrcweir {
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir struct EmitImplData;
42*cdf0e10cSrcweir struct PDFContainer;
43*cdf0e10cSrcweir class EmitContext
44*cdf0e10cSrcweir {
45*cdf0e10cSrcweir     public:
46*cdf0e10cSrcweir     virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
47*cdf0e10cSrcweir     virtual unsigned int getCurPos() = 0;
48*cdf0e10cSrcweir     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
49*cdf0e10cSrcweir     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
50*cdf0e10cSrcweir 
51*cdf0e10cSrcweir     EmitContext( const PDFContainer* pTop = NULL );
52*cdf0e10cSrcweir     virtual ~EmitContext();
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir     // set this to deflate contained streams
55*cdf0e10cSrcweir     bool m_bDeflate;
56*cdf0e10cSrcweir     // set this to decrypt the PDF file
57*cdf0e10cSrcweir     bool m_bDecrypt;
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir     private:
60*cdf0e10cSrcweir     friend struct PDFEntry;
61*cdf0e10cSrcweir     EmitImplData* m_pImplData;
62*cdf0e10cSrcweir };
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir struct PDFEntry
65*cdf0e10cSrcweir {
66*cdf0e10cSrcweir     PDFEntry() {}
67*cdf0e10cSrcweir     virtual ~PDFEntry();
68*cdf0e10cSrcweir 
69*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const = 0;
70*cdf0e10cSrcweir     virtual PDFEntry* clone() const = 0;
71*cdf0e10cSrcweir 
72*cdf0e10cSrcweir     protected:
73*cdf0e10cSrcweir     EmitImplData* getEmitData( EmitContext& rContext ) const;
74*cdf0e10cSrcweir     void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
75*cdf0e10cSrcweir };
76*cdf0e10cSrcweir 
77*cdf0e10cSrcweir struct PDFComment : public PDFEntry
78*cdf0e10cSrcweir {
79*cdf0e10cSrcweir     rtl::OString  m_aComment;
80*cdf0e10cSrcweir 
81*cdf0e10cSrcweir     PDFComment( const rtl::OString& rComment )
82*cdf0e10cSrcweir     : PDFEntry(), m_aComment( rComment ) {}
83*cdf0e10cSrcweir     virtual ~PDFComment();
84*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
85*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
86*cdf0e10cSrcweir };
87*cdf0e10cSrcweir 
88*cdf0e10cSrcweir struct PDFValue : public PDFEntry
89*cdf0e10cSrcweir {
90*cdf0e10cSrcweir     // abstract base class for simple values
91*cdf0e10cSrcweir     PDFValue() : PDFEntry() {}
92*cdf0e10cSrcweir     virtual ~PDFValue();
93*cdf0e10cSrcweir };
94*cdf0e10cSrcweir 
95*cdf0e10cSrcweir struct PDFName : public PDFValue
96*cdf0e10cSrcweir {
97*cdf0e10cSrcweir     rtl::OString  m_aName;
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir     PDFName( const rtl::OString& rName )
100*cdf0e10cSrcweir     : PDFValue(), m_aName( rName ) {}
101*cdf0e10cSrcweir     virtual ~PDFName();
102*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
103*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
104*cdf0e10cSrcweir 
105*cdf0e10cSrcweir     rtl::OUString getFilteredName() const;
106*cdf0e10cSrcweir };
107*cdf0e10cSrcweir 
108*cdf0e10cSrcweir struct PDFString : public PDFValue
109*cdf0e10cSrcweir {
110*cdf0e10cSrcweir     rtl::OString  m_aString;
111*cdf0e10cSrcweir 
112*cdf0e10cSrcweir     PDFString( const rtl::OString& rString )
113*cdf0e10cSrcweir     : PDFValue(), m_aString( rString ) {}
114*cdf0e10cSrcweir     virtual ~PDFString();
115*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
116*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
117*cdf0e10cSrcweir 
118*cdf0e10cSrcweir     rtl::OString getFilteredString() const;
119*cdf0e10cSrcweir };
120*cdf0e10cSrcweir 
121*cdf0e10cSrcweir struct PDFNumber : public PDFValue
122*cdf0e10cSrcweir {
123*cdf0e10cSrcweir     double m_fValue;
124*cdf0e10cSrcweir 
125*cdf0e10cSrcweir     PDFNumber( double fVal )
126*cdf0e10cSrcweir     : PDFValue(), m_fValue( fVal ) {}
127*cdf0e10cSrcweir     virtual ~PDFNumber();
128*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
129*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
130*cdf0e10cSrcweir };
131*cdf0e10cSrcweir 
132*cdf0e10cSrcweir struct PDFBool : public PDFValue
133*cdf0e10cSrcweir {
134*cdf0e10cSrcweir     bool m_bValue;
135*cdf0e10cSrcweir 
136*cdf0e10cSrcweir     PDFBool( bool bVal )
137*cdf0e10cSrcweir     : PDFValue(), m_bValue( bVal ) {}
138*cdf0e10cSrcweir     virtual ~PDFBool();
139*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
140*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
141*cdf0e10cSrcweir };
142*cdf0e10cSrcweir 
143*cdf0e10cSrcweir struct PDFObjectRef : public PDFValue
144*cdf0e10cSrcweir {
145*cdf0e10cSrcweir     unsigned int    m_nNumber;
146*cdf0e10cSrcweir     unsigned int    m_nGeneration;
147*cdf0e10cSrcweir 
148*cdf0e10cSrcweir     PDFObjectRef( unsigned int nNr, unsigned int nGen )
149*cdf0e10cSrcweir     : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
150*cdf0e10cSrcweir     virtual ~PDFObjectRef();
151*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
152*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
153*cdf0e10cSrcweir };
154*cdf0e10cSrcweir 
155*cdf0e10cSrcweir struct PDFNull : public PDFValue
156*cdf0e10cSrcweir {
157*cdf0e10cSrcweir     PDFNull() {}
158*cdf0e10cSrcweir     virtual ~PDFNull();
159*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
160*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
161*cdf0e10cSrcweir };
162*cdf0e10cSrcweir 
163*cdf0e10cSrcweir struct PDFObject;
164*cdf0e10cSrcweir struct PDFContainer : public PDFEntry
165*cdf0e10cSrcweir {
166*cdf0e10cSrcweir     sal_Int32              m_nOffset;
167*cdf0e10cSrcweir     std::vector<PDFEntry*> m_aSubElements;
168*cdf0e10cSrcweir 
169*cdf0e10cSrcweir     // this is an abstract base class for identifying
170*cdf0e10cSrcweir     // entries that can contain sub elements besides comments
171*cdf0e10cSrcweir     PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
172*cdf0e10cSrcweir     virtual ~PDFContainer();
173*cdf0e10cSrcweir     virtual bool emitSubElements( EmitContext& rWriteContext ) const;
174*cdf0e10cSrcweir     virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
175*cdf0e10cSrcweir 
176*cdf0e10cSrcweir     PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
177*cdf0e10cSrcweir     PDFObject* findObject( PDFObjectRef* pRef ) const
178*cdf0e10cSrcweir     { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
179*cdf0e10cSrcweir };
180*cdf0e10cSrcweir 
181*cdf0e10cSrcweir struct PDFArray : public PDFContainer
182*cdf0e10cSrcweir {
183*cdf0e10cSrcweir     PDFArray() {}
184*cdf0e10cSrcweir     virtual ~PDFArray();
185*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
186*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
187*cdf0e10cSrcweir };
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir struct PDFDict : public PDFContainer
190*cdf0e10cSrcweir {
191*cdf0e10cSrcweir     typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
192*cdf0e10cSrcweir     Map m_aMap;
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir     PDFDict() {}
195*cdf0e10cSrcweir     virtual ~PDFDict();
196*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
197*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
198*cdf0e10cSrcweir 
199*cdf0e10cSrcweir     // inserting a value of NULL will remove rName and the previous value
200*cdf0e10cSrcweir     // from the dictionary
201*cdf0e10cSrcweir     void insertValue( const rtl::OString& rName, PDFEntry* pValue );
202*cdf0e10cSrcweir     // removes a name/value pair from the dict
203*cdf0e10cSrcweir     void eraseValue( const rtl::OString& rName );
204*cdf0e10cSrcweir     // builds new map as of sub elements
205*cdf0e10cSrcweir     // returns NULL if successfull, else the first offending element
206*cdf0e10cSrcweir     PDFEntry* buildMap();
207*cdf0e10cSrcweir };
208*cdf0e10cSrcweir 
209*cdf0e10cSrcweir struct PDFStream : public PDFEntry
210*cdf0e10cSrcweir {
211*cdf0e10cSrcweir     unsigned int    m_nBeginOffset;
212*cdf0e10cSrcweir     unsigned int    m_nEndOffset; // offset of the byte after the stream
213*cdf0e10cSrcweir     PDFDict*        m_pDict;
214*cdf0e10cSrcweir 
215*cdf0e10cSrcweir     PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
216*cdf0e10cSrcweir     : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
217*cdf0e10cSrcweir     virtual ~PDFStream();
218*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
219*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir     unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
222*cdf0e10cSrcweir };
223*cdf0e10cSrcweir 
224*cdf0e10cSrcweir struct PDFTrailer : public PDFContainer
225*cdf0e10cSrcweir {
226*cdf0e10cSrcweir     PDFDict*        m_pDict;
227*cdf0e10cSrcweir 
228*cdf0e10cSrcweir     PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
229*cdf0e10cSrcweir     virtual ~PDFTrailer();
230*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
231*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
232*cdf0e10cSrcweir };
233*cdf0e10cSrcweir 
234*cdf0e10cSrcweir struct PDFFileImplData;
235*cdf0e10cSrcweir struct PDFFile : public PDFContainer
236*cdf0e10cSrcweir {
237*cdf0e10cSrcweir     private:
238*cdf0e10cSrcweir     mutable PDFFileImplData*    m_pData;
239*cdf0e10cSrcweir     PDFFileImplData*            impl_getData() const;
240*cdf0e10cSrcweir     public:
241*cdf0e10cSrcweir     unsigned int        m_nMajor;           // PDF major
242*cdf0e10cSrcweir     unsigned int        m_nMinor;           // PDF minor
243*cdf0e10cSrcweir 
244*cdf0e10cSrcweir     PDFFile()
245*cdf0e10cSrcweir     : PDFContainer(),
246*cdf0e10cSrcweir       m_pData( NULL ),
247*cdf0e10cSrcweir       m_nMajor( 0 ), m_nMinor( 0 )
248*cdf0e10cSrcweir     {}
249*cdf0e10cSrcweir     virtual ~PDFFile();
250*cdf0e10cSrcweir 
251*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
252*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
253*cdf0e10cSrcweir 
254*cdf0e10cSrcweir     bool isEncrypted() const;
255*cdf0e10cSrcweir     // this method checks whether rPwd is compatible with
256*cdf0e10cSrcweir     // either user or owner password and sets up decrypt data in that case
257*cdf0e10cSrcweir     // returns true if decryption can be done
258*cdf0e10cSrcweir     bool setupDecryptionData( const rtl::OString& rPwd ) const;
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir     bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
261*cdf0e10cSrcweir                   sal_uInt8* pOutBuffer,
262*cdf0e10cSrcweir                   unsigned int nObject, unsigned int nGeneration ) const;
263*cdf0e10cSrcweir 
264*cdf0e10cSrcweir     rtl::OUString getDecryptionKey() const;
265*cdf0e10cSrcweir };
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir struct PDFObject : public PDFContainer
268*cdf0e10cSrcweir {
269*cdf0e10cSrcweir     PDFEntry*       m_pObject;
270*cdf0e10cSrcweir     PDFStream*      m_pStream;
271*cdf0e10cSrcweir     unsigned int    m_nNumber;
272*cdf0e10cSrcweir     unsigned int    m_nGeneration;
273*cdf0e10cSrcweir 
274*cdf0e10cSrcweir     PDFObject( unsigned int nNr, unsigned int nGen )
275*cdf0e10cSrcweir     : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
276*cdf0e10cSrcweir     virtual ~PDFObject();
277*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
278*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
279*cdf0e10cSrcweir 
280*cdf0e10cSrcweir     // writes only the contained stream, deflated if necessary
281*cdf0e10cSrcweir     bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
282*cdf0e10cSrcweir 
283*cdf0e10cSrcweir     private:
284*cdf0e10cSrcweir     // returns true if stream is deflated
285*cdf0e10cSrcweir     // fills *ppStream and *pBytes with start of stream and count of bytes
286*cdf0e10cSrcweir     // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
287*cdf0e10cSrcweir     // fills in NULL and 0 in case of error
288*cdf0e10cSrcweir     bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
289*cdf0e10cSrcweir };
290*cdf0e10cSrcweir 
291*cdf0e10cSrcweir struct PDFPart : public PDFContainer
292*cdf0e10cSrcweir {
293*cdf0e10cSrcweir     PDFPart() : PDFContainer() {}
294*cdf0e10cSrcweir     virtual ~PDFPart();
295*cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
296*cdf0e10cSrcweir     virtual PDFEntry* clone() const;
297*cdf0e10cSrcweir };
298*cdf0e10cSrcweir 
299*cdf0e10cSrcweir class PDFReader
300*cdf0e10cSrcweir {
301*cdf0e10cSrcweir     public:
302*cdf0e10cSrcweir     PDFReader() {}
303*cdf0e10cSrcweir     ~PDFReader() {}
304*cdf0e10cSrcweir 
305*cdf0e10cSrcweir     PDFEntry* read( const char* pFileName );
306*cdf0e10cSrcweir     PDFEntry* read( const char* pBuffer, unsigned int nLen );
307*cdf0e10cSrcweir };
308*cdf0e10cSrcweir 
309*cdf0e10cSrcweir } // namespace
310*cdf0e10cSrcweir 
311*cdf0e10cSrcweir #endif
312