1*06bcd5d2SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*06bcd5d2SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*06bcd5d2SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*06bcd5d2SAndrew Rist  * distributed with this work for additional information
6*06bcd5d2SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*06bcd5d2SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*06bcd5d2SAndrew Rist  * "License"); you may not use this file except in compliance
9*06bcd5d2SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*06bcd5d2SAndrew Rist  *
11*06bcd5d2SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*06bcd5d2SAndrew Rist  *
13*06bcd5d2SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*06bcd5d2SAndrew Rist  * software distributed under the License is distributed on an
15*06bcd5d2SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*06bcd5d2SAndrew Rist  * KIND, either express or implied.  See the License for the
17*06bcd5d2SAndrew Rist  * specific language governing permissions and limitations
18*06bcd5d2SAndrew Rist  * under the License.
19*06bcd5d2SAndrew Rist  *
20*06bcd5d2SAndrew Rist  *************************************************************/
21*06bcd5d2SAndrew Rist 
22*06bcd5d2SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #ifndef INCLUDED_PDFI_PDFPARSE_HXX
25cdf0e10cSrcweir #define INCLUDED_PDFI_PDFPARSE_HXX
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <sal/types.h>
28cdf0e10cSrcweir #include <rtl/ustring.hxx>
29cdf0e10cSrcweir #include <rtl/string.hxx>
30cdf0e10cSrcweir 
31cdf0e10cSrcweir #include <vector>
32cdf0e10cSrcweir #include <hash_map>
33cdf0e10cSrcweir 
34cdf0e10cSrcweir namespace pdfparse
35cdf0e10cSrcweir {
36cdf0e10cSrcweir 
37cdf0e10cSrcweir struct EmitImplData;
38cdf0e10cSrcweir struct PDFContainer;
39cdf0e10cSrcweir class EmitContext
40cdf0e10cSrcweir {
41cdf0e10cSrcweir     public:
42cdf0e10cSrcweir     virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
43cdf0e10cSrcweir     virtual unsigned int getCurPos() = 0;
44cdf0e10cSrcweir     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
45cdf0e10cSrcweir     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
46cdf0e10cSrcweir 
47cdf0e10cSrcweir     EmitContext( const PDFContainer* pTop = NULL );
48cdf0e10cSrcweir     virtual ~EmitContext();
49cdf0e10cSrcweir 
50cdf0e10cSrcweir     // set this to deflate contained streams
51cdf0e10cSrcweir     bool m_bDeflate;
52cdf0e10cSrcweir     // set this to decrypt the PDF file
53cdf0e10cSrcweir     bool m_bDecrypt;
54cdf0e10cSrcweir 
55cdf0e10cSrcweir     private:
56cdf0e10cSrcweir     friend struct PDFEntry;
57cdf0e10cSrcweir     EmitImplData* m_pImplData;
58cdf0e10cSrcweir };
59cdf0e10cSrcweir 
60cdf0e10cSrcweir struct PDFEntry
61cdf0e10cSrcweir {
PDFEntrypdfparse::PDFEntry62cdf0e10cSrcweir     PDFEntry() {}
63cdf0e10cSrcweir     virtual ~PDFEntry();
64cdf0e10cSrcweir 
65cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const = 0;
66cdf0e10cSrcweir     virtual PDFEntry* clone() const = 0;
67cdf0e10cSrcweir 
68cdf0e10cSrcweir     protected:
69cdf0e10cSrcweir     EmitImplData* getEmitData( EmitContext& rContext ) const;
70cdf0e10cSrcweir     void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
71cdf0e10cSrcweir };
72cdf0e10cSrcweir 
73cdf0e10cSrcweir struct PDFComment : public PDFEntry
74cdf0e10cSrcweir {
75cdf0e10cSrcweir     rtl::OString  m_aComment;
76cdf0e10cSrcweir 
PDFCommentpdfparse::PDFComment77cdf0e10cSrcweir     PDFComment( const rtl::OString& rComment )
78cdf0e10cSrcweir     : PDFEntry(), m_aComment( rComment ) {}
79cdf0e10cSrcweir     virtual ~PDFComment();
80cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
81cdf0e10cSrcweir     virtual PDFEntry* clone() const;
82cdf0e10cSrcweir };
83cdf0e10cSrcweir 
84cdf0e10cSrcweir struct PDFValue : public PDFEntry
85cdf0e10cSrcweir {
86cdf0e10cSrcweir     // abstract base class for simple values
PDFValuepdfparse::PDFValue87cdf0e10cSrcweir     PDFValue() : PDFEntry() {}
88cdf0e10cSrcweir     virtual ~PDFValue();
89cdf0e10cSrcweir };
90cdf0e10cSrcweir 
91cdf0e10cSrcweir struct PDFName : public PDFValue
92cdf0e10cSrcweir {
93cdf0e10cSrcweir     rtl::OString  m_aName;
94cdf0e10cSrcweir 
PDFNamepdfparse::PDFName95cdf0e10cSrcweir     PDFName( const rtl::OString& rName )
96cdf0e10cSrcweir     : PDFValue(), m_aName( rName ) {}
97cdf0e10cSrcweir     virtual ~PDFName();
98cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
99cdf0e10cSrcweir     virtual PDFEntry* clone() const;
100cdf0e10cSrcweir 
101cdf0e10cSrcweir     rtl::OUString getFilteredName() const;
102cdf0e10cSrcweir };
103cdf0e10cSrcweir 
104cdf0e10cSrcweir struct PDFString : public PDFValue
105cdf0e10cSrcweir {
106cdf0e10cSrcweir     rtl::OString  m_aString;
107cdf0e10cSrcweir 
PDFStringpdfparse::PDFString108cdf0e10cSrcweir     PDFString( const rtl::OString& rString )
109cdf0e10cSrcweir     : PDFValue(), m_aString( rString ) {}
110cdf0e10cSrcweir     virtual ~PDFString();
111cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
112cdf0e10cSrcweir     virtual PDFEntry* clone() const;
113cdf0e10cSrcweir 
114cdf0e10cSrcweir     rtl::OString getFilteredString() const;
115cdf0e10cSrcweir };
116cdf0e10cSrcweir 
117cdf0e10cSrcweir struct PDFNumber : public PDFValue
118cdf0e10cSrcweir {
119cdf0e10cSrcweir     double m_fValue;
120cdf0e10cSrcweir 
PDFNumberpdfparse::PDFNumber121cdf0e10cSrcweir     PDFNumber( double fVal )
122cdf0e10cSrcweir     : PDFValue(), m_fValue( fVal ) {}
123cdf0e10cSrcweir     virtual ~PDFNumber();
124cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
125cdf0e10cSrcweir     virtual PDFEntry* clone() const;
126cdf0e10cSrcweir };
127cdf0e10cSrcweir 
128cdf0e10cSrcweir struct PDFBool : public PDFValue
129cdf0e10cSrcweir {
130cdf0e10cSrcweir     bool m_bValue;
131cdf0e10cSrcweir 
PDFBoolpdfparse::PDFBool132cdf0e10cSrcweir     PDFBool( bool bVal )
133cdf0e10cSrcweir     : PDFValue(), m_bValue( bVal ) {}
134cdf0e10cSrcweir     virtual ~PDFBool();
135cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
136cdf0e10cSrcweir     virtual PDFEntry* clone() const;
137cdf0e10cSrcweir };
138cdf0e10cSrcweir 
139cdf0e10cSrcweir struct PDFObjectRef : public PDFValue
140cdf0e10cSrcweir {
141cdf0e10cSrcweir     unsigned int    m_nNumber;
142cdf0e10cSrcweir     unsigned int    m_nGeneration;
143cdf0e10cSrcweir 
PDFObjectRefpdfparse::PDFObjectRef144cdf0e10cSrcweir     PDFObjectRef( unsigned int nNr, unsigned int nGen )
145cdf0e10cSrcweir     : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
146cdf0e10cSrcweir     virtual ~PDFObjectRef();
147cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
148cdf0e10cSrcweir     virtual PDFEntry* clone() const;
149cdf0e10cSrcweir };
150cdf0e10cSrcweir 
151cdf0e10cSrcweir struct PDFNull : public PDFValue
152cdf0e10cSrcweir {
PDFNullpdfparse::PDFNull153cdf0e10cSrcweir     PDFNull() {}
154cdf0e10cSrcweir     virtual ~PDFNull();
155cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
156cdf0e10cSrcweir     virtual PDFEntry* clone() const;
157cdf0e10cSrcweir };
158cdf0e10cSrcweir 
159cdf0e10cSrcweir struct PDFObject;
160cdf0e10cSrcweir struct PDFContainer : public PDFEntry
161cdf0e10cSrcweir {
162cdf0e10cSrcweir     sal_Int32              m_nOffset;
163cdf0e10cSrcweir     std::vector<PDFEntry*> m_aSubElements;
164cdf0e10cSrcweir 
165cdf0e10cSrcweir     // this is an abstract base class for identifying
166cdf0e10cSrcweir     // entries that can contain sub elements besides comments
PDFContainerpdfparse::PDFContainer167cdf0e10cSrcweir     PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
168cdf0e10cSrcweir     virtual ~PDFContainer();
169cdf0e10cSrcweir     virtual bool emitSubElements( EmitContext& rWriteContext ) const;
170cdf0e10cSrcweir     virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
171cdf0e10cSrcweir 
172cdf0e10cSrcweir     PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
findObjectpdfparse::PDFContainer173cdf0e10cSrcweir     PDFObject* findObject( PDFObjectRef* pRef ) const
174cdf0e10cSrcweir     { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
175cdf0e10cSrcweir };
176cdf0e10cSrcweir 
177cdf0e10cSrcweir struct PDFArray : public PDFContainer
178cdf0e10cSrcweir {
PDFArraypdfparse::PDFArray179cdf0e10cSrcweir     PDFArray() {}
180cdf0e10cSrcweir     virtual ~PDFArray();
181cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
182cdf0e10cSrcweir     virtual PDFEntry* clone() const;
183cdf0e10cSrcweir };
184cdf0e10cSrcweir 
185cdf0e10cSrcweir struct PDFDict : public PDFContainer
186cdf0e10cSrcweir {
187cdf0e10cSrcweir     typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
188cdf0e10cSrcweir     Map m_aMap;
189cdf0e10cSrcweir 
PDFDictpdfparse::PDFDict190cdf0e10cSrcweir     PDFDict() {}
191cdf0e10cSrcweir     virtual ~PDFDict();
192cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
193cdf0e10cSrcweir     virtual PDFEntry* clone() const;
194cdf0e10cSrcweir 
195cdf0e10cSrcweir     // inserting a value of NULL will remove rName and the previous value
196cdf0e10cSrcweir     // from the dictionary
197cdf0e10cSrcweir     void insertValue( const rtl::OString& rName, PDFEntry* pValue );
198cdf0e10cSrcweir     // removes a name/value pair from the dict
199cdf0e10cSrcweir     void eraseValue( const rtl::OString& rName );
200cdf0e10cSrcweir     // builds new map as of sub elements
201cdf0e10cSrcweir     // returns NULL if successfull, else the first offending element
202cdf0e10cSrcweir     PDFEntry* buildMap();
203cdf0e10cSrcweir };
204cdf0e10cSrcweir 
205cdf0e10cSrcweir struct PDFStream : public PDFEntry
206cdf0e10cSrcweir {
207cdf0e10cSrcweir     unsigned int    m_nBeginOffset;
208cdf0e10cSrcweir     unsigned int    m_nEndOffset; // offset of the byte after the stream
209cdf0e10cSrcweir     PDFDict*        m_pDict;
210cdf0e10cSrcweir 
PDFStreampdfparse::PDFStream211cdf0e10cSrcweir     PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
212cdf0e10cSrcweir     : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
213cdf0e10cSrcweir     virtual ~PDFStream();
214cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
215cdf0e10cSrcweir     virtual PDFEntry* clone() const;
216cdf0e10cSrcweir 
217cdf0e10cSrcweir     unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
218cdf0e10cSrcweir };
219cdf0e10cSrcweir 
220cdf0e10cSrcweir struct PDFTrailer : public PDFContainer
221cdf0e10cSrcweir {
222cdf0e10cSrcweir     PDFDict*        m_pDict;
223cdf0e10cSrcweir 
PDFTrailerpdfparse::PDFTrailer224cdf0e10cSrcweir     PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
225cdf0e10cSrcweir     virtual ~PDFTrailer();
226cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
227cdf0e10cSrcweir     virtual PDFEntry* clone() const;
228cdf0e10cSrcweir };
229cdf0e10cSrcweir 
230cdf0e10cSrcweir struct PDFFileImplData;
231cdf0e10cSrcweir struct PDFFile : public PDFContainer
232cdf0e10cSrcweir {
233cdf0e10cSrcweir     private:
234cdf0e10cSrcweir     mutable PDFFileImplData*    m_pData;
235cdf0e10cSrcweir     PDFFileImplData*            impl_getData() const;
236cdf0e10cSrcweir     public:
237cdf0e10cSrcweir     unsigned int        m_nMajor;           // PDF major
238cdf0e10cSrcweir     unsigned int        m_nMinor;           // PDF minor
239cdf0e10cSrcweir 
PDFFilepdfparse::PDFFile240cdf0e10cSrcweir     PDFFile()
241cdf0e10cSrcweir     : PDFContainer(),
242cdf0e10cSrcweir       m_pData( NULL ),
243cdf0e10cSrcweir       m_nMajor( 0 ), m_nMinor( 0 )
244cdf0e10cSrcweir     {}
245cdf0e10cSrcweir     virtual ~PDFFile();
246cdf0e10cSrcweir 
247cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
248cdf0e10cSrcweir     virtual PDFEntry* clone() const;
249cdf0e10cSrcweir 
250cdf0e10cSrcweir     bool isEncrypted() const;
251cdf0e10cSrcweir     // this method checks whether rPwd is compatible with
252cdf0e10cSrcweir     // either user or owner password and sets up decrypt data in that case
253cdf0e10cSrcweir     // returns true if decryption can be done
254cdf0e10cSrcweir     bool setupDecryptionData( const rtl::OString& rPwd ) const;
255cdf0e10cSrcweir 
256cdf0e10cSrcweir     bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
257cdf0e10cSrcweir                   sal_uInt8* pOutBuffer,
258cdf0e10cSrcweir                   unsigned int nObject, unsigned int nGeneration ) const;
259cdf0e10cSrcweir 
260cdf0e10cSrcweir     rtl::OUString getDecryptionKey() const;
261cdf0e10cSrcweir };
262cdf0e10cSrcweir 
263cdf0e10cSrcweir struct PDFObject : public PDFContainer
264cdf0e10cSrcweir {
265cdf0e10cSrcweir     PDFEntry*       m_pObject;
266cdf0e10cSrcweir     PDFStream*      m_pStream;
267cdf0e10cSrcweir     unsigned int    m_nNumber;
268cdf0e10cSrcweir     unsigned int    m_nGeneration;
269cdf0e10cSrcweir 
PDFObjectpdfparse::PDFObject270cdf0e10cSrcweir     PDFObject( unsigned int nNr, unsigned int nGen )
271cdf0e10cSrcweir     : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
272cdf0e10cSrcweir     virtual ~PDFObject();
273cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
274cdf0e10cSrcweir     virtual PDFEntry* clone() const;
275cdf0e10cSrcweir 
276cdf0e10cSrcweir     // writes only the contained stream, deflated if necessary
277cdf0e10cSrcweir     bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
278cdf0e10cSrcweir 
279cdf0e10cSrcweir     private:
280cdf0e10cSrcweir     // returns true if stream is deflated
281cdf0e10cSrcweir     // fills *ppStream and *pBytes with start of stream and count of bytes
282cdf0e10cSrcweir     // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
283cdf0e10cSrcweir     // fills in NULL and 0 in case of error
284cdf0e10cSrcweir     bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
285cdf0e10cSrcweir };
286cdf0e10cSrcweir 
287cdf0e10cSrcweir struct PDFPart : public PDFContainer
288cdf0e10cSrcweir {
PDFPartpdfparse::PDFPart289cdf0e10cSrcweir     PDFPart() : PDFContainer() {}
290cdf0e10cSrcweir     virtual ~PDFPart();
291cdf0e10cSrcweir     virtual bool emit( EmitContext& rWriteContext ) const;
292cdf0e10cSrcweir     virtual PDFEntry* clone() const;
293cdf0e10cSrcweir };
294cdf0e10cSrcweir 
295cdf0e10cSrcweir class PDFReader
296cdf0e10cSrcweir {
297cdf0e10cSrcweir     public:
PDFReader()298cdf0e10cSrcweir     PDFReader() {}
~PDFReader()299cdf0e10cSrcweir     ~PDFReader() {}
300cdf0e10cSrcweir 
301cdf0e10cSrcweir     PDFEntry* read( const char* pFileName );
302cdf0e10cSrcweir     PDFEntry* read( const char* pBuffer, unsigned int nLen );
303cdf0e10cSrcweir };
304cdf0e10cSrcweir 
305cdf0e10cSrcweir } // namespace
306cdf0e10cSrcweir 
307cdf0e10cSrcweir #endif
308