1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef INCLUDED_PDFI_PDFPARSE_HXX
25 #define INCLUDED_PDFI_PDFPARSE_HXX
26 
27 #include <sal/types.h>
28 #include <rtl/ustring.hxx>
29 #include <rtl/string.hxx>
30 
31 #include <vector>
32 #include <hash_map>
33 
34 namespace pdfparse
35 {
36 
37 struct EmitImplData;
38 struct PDFContainer;
39 class EmitContext
40 {
41     public:
42     virtual bool write( const void* pBuf, unsigned int nLen ) = 0;
43     virtual unsigned int getCurPos() = 0;
44     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0;
45     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0;
46 
47     EmitContext( const PDFContainer* pTop = NULL );
48     virtual ~EmitContext();
49 
50     // set this to deflate contained streams
51     bool m_bDeflate;
52     // set this to decrypt the PDF file
53     bool m_bDecrypt;
54 
55     private:
56     friend struct PDFEntry;
57     EmitImplData* m_pImplData;
58 };
59 
60 struct PDFEntry
61 {
PDFEntrypdfparse::PDFEntry62     PDFEntry() {}
63     virtual ~PDFEntry();
64 
65     virtual bool emit( EmitContext& rWriteContext ) const = 0;
66     virtual PDFEntry* clone() const = 0;
67 
68     protected:
69     EmitImplData* getEmitData( EmitContext& rContext ) const;
70     void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const;
71 };
72 
73 struct PDFComment : public PDFEntry
74 {
75     rtl::OString  m_aComment;
76 
PDFCommentpdfparse::PDFComment77     PDFComment( const rtl::OString& rComment )
78     : PDFEntry(), m_aComment( rComment ) {}
79     virtual ~PDFComment();
80     virtual bool emit( EmitContext& rWriteContext ) const;
81     virtual PDFEntry* clone() const;
82 };
83 
84 struct PDFValue : public PDFEntry
85 {
86     // abstract base class for simple values
PDFValuepdfparse::PDFValue87     PDFValue() : PDFEntry() {}
88     virtual ~PDFValue();
89 };
90 
91 struct PDFName : public PDFValue
92 {
93     rtl::OString  m_aName;
94 
PDFNamepdfparse::PDFName95     PDFName( const rtl::OString& rName )
96     : PDFValue(), m_aName( rName ) {}
97     virtual ~PDFName();
98     virtual bool emit( EmitContext& rWriteContext ) const;
99     virtual PDFEntry* clone() const;
100 
101     rtl::OUString getFilteredName() const;
102 };
103 
104 struct PDFString : public PDFValue
105 {
106     rtl::OString  m_aString;
107 
PDFStringpdfparse::PDFString108     PDFString( const rtl::OString& rString )
109     : PDFValue(), m_aString( rString ) {}
110     virtual ~PDFString();
111     virtual bool emit( EmitContext& rWriteContext ) const;
112     virtual PDFEntry* clone() const;
113 
114     rtl::OString getFilteredString() const;
115 };
116 
117 struct PDFNumber : public PDFValue
118 {
119     double m_fValue;
120 
PDFNumberpdfparse::PDFNumber121     PDFNumber( double fVal )
122     : PDFValue(), m_fValue( fVal ) {}
123     virtual ~PDFNumber();
124     virtual bool emit( EmitContext& rWriteContext ) const;
125     virtual PDFEntry* clone() const;
126 };
127 
128 struct PDFBool : public PDFValue
129 {
130     bool m_bValue;
131 
PDFBoolpdfparse::PDFBool132     PDFBool( bool bVal )
133     : PDFValue(), m_bValue( bVal ) {}
134     virtual ~PDFBool();
135     virtual bool emit( EmitContext& rWriteContext ) const;
136     virtual PDFEntry* clone() const;
137 };
138 
139 struct PDFObjectRef : public PDFValue
140 {
141     unsigned int    m_nNumber;
142     unsigned int    m_nGeneration;
143 
PDFObjectRefpdfparse::PDFObjectRef144     PDFObjectRef( unsigned int nNr, unsigned int nGen )
145     : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {}
146     virtual ~PDFObjectRef();
147     virtual bool emit( EmitContext& rWriteContext ) const;
148     virtual PDFEntry* clone() const;
149 };
150 
151 struct PDFNull : public PDFValue
152 {
PDFNullpdfparse::PDFNull153     PDFNull() {}
154     virtual ~PDFNull();
155     virtual bool emit( EmitContext& rWriteContext ) const;
156     virtual PDFEntry* clone() const;
157 };
158 
159 struct PDFObject;
160 struct PDFContainer : public PDFEntry
161 {
162     sal_Int32              m_nOffset;
163     std::vector<PDFEntry*> m_aSubElements;
164 
165     // this is an abstract base class for identifying
166     // entries that can contain sub elements besides comments
PDFContainerpdfparse::PDFContainer167     PDFContainer() : PDFEntry(), m_nOffset( 0 ) {}
168     virtual ~PDFContainer();
169     virtual bool emitSubElements( EmitContext& rWriteContext ) const;
170     virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const;
171 
172     PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const;
findObjectpdfparse::PDFContainer173     PDFObject* findObject( PDFObjectRef* pRef ) const
174     { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); }
175 };
176 
177 struct PDFArray : public PDFContainer
178 {
PDFArraypdfparse::PDFArray179     PDFArray() {}
180     virtual ~PDFArray();
181     virtual bool emit( EmitContext& rWriteContext ) const;
182     virtual PDFEntry* clone() const;
183 };
184 
185 struct PDFDict : public PDFContainer
186 {
187     typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map;
188     Map m_aMap;
189 
PDFDictpdfparse::PDFDict190     PDFDict() {}
191     virtual ~PDFDict();
192     virtual bool emit( EmitContext& rWriteContext ) const;
193     virtual PDFEntry* clone() const;
194 
195     // inserting a value of NULL will remove rName and the previous value
196     // from the dictionary
197     void insertValue( const rtl::OString& rName, PDFEntry* pValue );
198     // removes a name/value pair from the dict
199     void eraseValue( const rtl::OString& rName );
200     // builds new map as of sub elements
201     // returns NULL if successfull, else the first offending element
202     PDFEntry* buildMap();
203 };
204 
205 struct PDFStream : public PDFEntry
206 {
207     unsigned int    m_nBeginOffset;
208     unsigned int    m_nEndOffset; // offset of the byte after the stream
209     PDFDict*        m_pDict;
210 
PDFStreampdfparse::PDFStream211     PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict )
212     : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {}
213     virtual ~PDFStream();
214     virtual bool emit( EmitContext& rWriteContext ) const;
215     virtual PDFEntry* clone() const;
216 
217     unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict
218 };
219 
220 struct PDFTrailer : public PDFContainer
221 {
222     PDFDict*        m_pDict;
223 
PDFTrailerpdfparse::PDFTrailer224     PDFTrailer() : PDFContainer(), m_pDict( NULL ) {}
225     virtual ~PDFTrailer();
226     virtual bool emit( EmitContext& rWriteContext ) const;
227     virtual PDFEntry* clone() const;
228 };
229 
230 struct PDFFileImplData;
231 struct PDFFile : public PDFContainer
232 {
233     private:
234     mutable PDFFileImplData*    m_pData;
235     PDFFileImplData*            impl_getData() const;
236     public:
237     unsigned int        m_nMajor;           // PDF major
238     unsigned int        m_nMinor;           // PDF minor
239 
PDFFilepdfparse::PDFFile240     PDFFile()
241     : PDFContainer(),
242       m_pData( NULL ),
243       m_nMajor( 0 ), m_nMinor( 0 )
244     {}
245     virtual ~PDFFile();
246 
247     virtual bool emit( EmitContext& rWriteContext ) const;
248     virtual PDFEntry* clone() const;
249 
250     bool isEncrypted() const;
251     // this method checks whether rPwd is compatible with
252     // either user or owner password and sets up decrypt data in that case
253     // returns true if decryption can be done
254     bool setupDecryptionData( const rtl::OString& rPwd ) const;
255 
256     bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen,
257                   sal_uInt8* pOutBuffer,
258                   unsigned int nObject, unsigned int nGeneration ) const;
259 
260     rtl::OUString getDecryptionKey() const;
261 };
262 
263 struct PDFObject : public PDFContainer
264 {
265     PDFEntry*       m_pObject;
266     PDFStream*      m_pStream;
267     unsigned int    m_nNumber;
268     unsigned int    m_nGeneration;
269 
PDFObjectpdfparse::PDFObject270     PDFObject( unsigned int nNr, unsigned int nGen )
271     : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {}
272     virtual ~PDFObject();
273     virtual bool emit( EmitContext& rWriteContext ) const;
274     virtual PDFEntry* clone() const;
275 
276     // writes only the contained stream, deflated if necessary
277     bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const;
278 
279     private:
280     // returns true if stream is deflated
281     // fills *ppStream and *pBytes with start of stream and count of bytes
282     // memory returned in *ppStream must be freed with rtl_freeMemory afterwards
283     // fills in NULL and 0 in case of error
284     bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const;
285 };
286 
287 struct PDFPart : public PDFContainer
288 {
PDFPartpdfparse::PDFPart289     PDFPart() : PDFContainer() {}
290     virtual ~PDFPart();
291     virtual bool emit( EmitContext& rWriteContext ) const;
292     virtual PDFEntry* clone() const;
293 };
294 
295 class PDFReader
296 {
297     public:
PDFReader()298     PDFReader() {}
~PDFReader()299     ~PDFReader() {}
300 
301     PDFEntry* read( const char* pFileName );
302     PDFEntry* read( const char* pBuffer, unsigned int nLen );
303 };
304 
305 } // namespace
306 
307 #endif
308