1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 #ifndef INCLUDED_PDFI_PDFPARSE_HXX 25 #define INCLUDED_PDFI_PDFPARSE_HXX 26 27 #include <sal/types.h> 28 #include <rtl/ustring.hxx> 29 #include <rtl/string.hxx> 30 31 #include <vector> 32 #include <hash_map> 33 34 namespace pdfparse 35 { 36 37 struct EmitImplData; 38 struct PDFContainer; 39 class EmitContext 40 { 41 public: 42 virtual bool write( const void* pBuf, unsigned int nLen ) = 0; 43 virtual unsigned int getCurPos() = 0; 44 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0; 45 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0; 46 47 EmitContext( const PDFContainer* pTop = NULL ); 48 virtual ~EmitContext(); 49 50 // set this to deflate contained streams 51 bool m_bDeflate; 52 // set this to decrypt the PDF file 53 bool m_bDecrypt; 54 55 private: 56 friend struct PDFEntry; 57 EmitImplData* m_pImplData; 58 }; 59 60 struct PDFEntry 61 { PDFEntrypdfparse::PDFEntry62 PDFEntry() {} 63 virtual ~PDFEntry(); 64 65 virtual bool emit( EmitContext& rWriteContext ) const = 0; 66 virtual PDFEntry* clone() const = 0; 67 68 protected: 69 EmitImplData* getEmitData( EmitContext& rContext ) const; 70 void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const; 71 }; 72 73 struct PDFComment : public PDFEntry 74 { 75 rtl::OString m_aComment; 76 PDFCommentpdfparse::PDFComment77 PDFComment( const rtl::OString& rComment ) 78 : PDFEntry(), m_aComment( rComment ) {} 79 virtual ~PDFComment(); 80 virtual bool emit( EmitContext& rWriteContext ) const; 81 virtual PDFEntry* clone() const; 82 }; 83 84 struct PDFValue : public PDFEntry 85 { 86 // abstract base class for simple values PDFValuepdfparse::PDFValue87 PDFValue() : PDFEntry() {} 88 virtual ~PDFValue(); 89 }; 90 91 struct PDFName : public PDFValue 92 { 93 rtl::OString m_aName; 94 PDFNamepdfparse::PDFName95 PDFName( const rtl::OString& rName ) 96 : PDFValue(), m_aName( rName ) {} 97 virtual ~PDFName(); 98 virtual bool emit( EmitContext& rWriteContext ) const; 99 virtual PDFEntry* clone() const; 100 101 rtl::OUString getFilteredName() const; 102 }; 103 104 struct PDFString : public PDFValue 105 { 106 rtl::OString m_aString; 107 PDFStringpdfparse::PDFString108 PDFString( const rtl::OString& rString ) 109 : PDFValue(), m_aString( rString ) {} 110 virtual ~PDFString(); 111 virtual bool emit( EmitContext& rWriteContext ) const; 112 virtual PDFEntry* clone() const; 113 114 rtl::OString getFilteredString() const; 115 }; 116 117 struct PDFNumber : public PDFValue 118 { 119 double m_fValue; 120 PDFNumberpdfparse::PDFNumber121 PDFNumber( double fVal ) 122 : PDFValue(), m_fValue( fVal ) {} 123 virtual ~PDFNumber(); 124 virtual bool emit( EmitContext& rWriteContext ) const; 125 virtual PDFEntry* clone() const; 126 }; 127 128 struct PDFBool : public PDFValue 129 { 130 bool m_bValue; 131 PDFBoolpdfparse::PDFBool132 PDFBool( bool bVal ) 133 : PDFValue(), m_bValue( bVal ) {} 134 virtual ~PDFBool(); 135 virtual bool emit( EmitContext& rWriteContext ) const; 136 virtual PDFEntry* clone() const; 137 }; 138 139 struct PDFObjectRef : public PDFValue 140 { 141 unsigned int m_nNumber; 142 unsigned int m_nGeneration; 143 PDFObjectRefpdfparse::PDFObjectRef144 PDFObjectRef( unsigned int nNr, unsigned int nGen ) 145 : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {} 146 virtual ~PDFObjectRef(); 147 virtual bool emit( EmitContext& rWriteContext ) const; 148 virtual PDFEntry* clone() const; 149 }; 150 151 struct PDFNull : public PDFValue 152 { PDFNullpdfparse::PDFNull153 PDFNull() {} 154 virtual ~PDFNull(); 155 virtual bool emit( EmitContext& rWriteContext ) const; 156 virtual PDFEntry* clone() const; 157 }; 158 159 struct PDFObject; 160 struct PDFContainer : public PDFEntry 161 { 162 sal_Int32 m_nOffset; 163 std::vector<PDFEntry*> m_aSubElements; 164 165 // this is an abstract base class for identifying 166 // entries that can contain sub elements besides comments PDFContainerpdfparse::PDFContainer167 PDFContainer() : PDFEntry(), m_nOffset( 0 ) {} 168 virtual ~PDFContainer(); 169 virtual bool emitSubElements( EmitContext& rWriteContext ) const; 170 virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const; 171 172 PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const; findObjectpdfparse::PDFContainer173 PDFObject* findObject( PDFObjectRef* pRef ) const 174 { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); } 175 }; 176 177 struct PDFArray : public PDFContainer 178 { PDFArraypdfparse::PDFArray179 PDFArray() {} 180 virtual ~PDFArray(); 181 virtual bool emit( EmitContext& rWriteContext ) const; 182 virtual PDFEntry* clone() const; 183 }; 184 185 struct PDFDict : public PDFContainer 186 { 187 typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map; 188 Map m_aMap; 189 PDFDictpdfparse::PDFDict190 PDFDict() {} 191 virtual ~PDFDict(); 192 virtual bool emit( EmitContext& rWriteContext ) const; 193 virtual PDFEntry* clone() const; 194 195 // inserting a value of NULL will remove rName and the previous value 196 // from the dictionary 197 void insertValue( const rtl::OString& rName, PDFEntry* pValue ); 198 // removes a name/value pair from the dict 199 void eraseValue( const rtl::OString& rName ); 200 // builds new map as of sub elements 201 // returns NULL if successful, else the first offending element 202 PDFEntry* buildMap(); 203 }; 204 205 struct PDFStream : public PDFEntry 206 { 207 unsigned int m_nBeginOffset; 208 unsigned int m_nEndOffset; // offset of the byte after the stream 209 PDFDict* m_pDict; 210 PDFStreampdfparse::PDFStream211 PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict ) 212 : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {} 213 virtual ~PDFStream(); 214 virtual bool emit( EmitContext& rWriteContext ) const; 215 virtual PDFEntry* clone() const; 216 217 unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict 218 }; 219 220 struct PDFTrailer : public PDFContainer 221 { 222 PDFDict* m_pDict; 223 PDFTrailerpdfparse::PDFTrailer224 PDFTrailer() : PDFContainer(), m_pDict( NULL ) {} 225 virtual ~PDFTrailer(); 226 virtual bool emit( EmitContext& rWriteContext ) const; 227 virtual PDFEntry* clone() const; 228 }; 229 230 struct PDFFileImplData; 231 struct PDFFile : public PDFContainer 232 { 233 private: 234 mutable PDFFileImplData* m_pData; 235 PDFFileImplData* impl_getData() const; 236 public: 237 unsigned int m_nMajor; // PDF major 238 unsigned int m_nMinor; // PDF minor 239 PDFFilepdfparse::PDFFile240 PDFFile() 241 : PDFContainer(), 242 m_pData( NULL ), 243 m_nMajor( 0 ), m_nMinor( 0 ) 244 {} 245 virtual ~PDFFile(); 246 247 virtual bool emit( EmitContext& rWriteContext ) const; 248 virtual PDFEntry* clone() const; 249 250 bool isEncrypted() const; 251 // this method checks whether rPwd is compatible with 252 // either user or owner password and sets up decrypt data in that case 253 // returns true if decryption can be done 254 bool setupDecryptionData( const rtl::OString& rPwd ) const; 255 256 bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, 257 sal_uInt8* pOutBuffer, 258 unsigned int nObject, unsigned int nGeneration ) const; 259 260 rtl::OUString getDecryptionKey() const; 261 }; 262 263 struct PDFObject : public PDFContainer 264 { 265 PDFEntry* m_pObject; 266 PDFStream* m_pStream; 267 unsigned int m_nNumber; 268 unsigned int m_nGeneration; 269 PDFObjectpdfparse::PDFObject270 PDFObject( unsigned int nNr, unsigned int nGen ) 271 : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {} 272 virtual ~PDFObject(); 273 virtual bool emit( EmitContext& rWriteContext ) const; 274 virtual PDFEntry* clone() const; 275 276 // writes only the contained stream, deflated if necessary 277 bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const; 278 279 private: 280 // returns true if stream is deflated 281 // fills *ppStream and *pBytes with start of stream and count of bytes 282 // memory returned in *ppStream must be freed with rtl_freeMemory afterwards 283 // fills in NULL and 0 in case of error 284 bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const; 285 }; 286 287 struct PDFPart : public PDFContainer 288 { PDFPartpdfparse::PDFPart289 PDFPart() : PDFContainer() {} 290 virtual ~PDFPart(); 291 virtual bool emit( EmitContext& rWriteContext ) const; 292 virtual PDFEntry* clone() const; 293 }; 294 295 class PDFReader 296 { 297 public: PDFReader()298 PDFReader() {} ~PDFReader()299 ~PDFReader() {} 300 301 PDFEntry* read( const char* pFileName ); 302 PDFEntry* read( const char* pBuffer, unsigned int nLen ); 303 }; 304 305 } // namespace 306 307 #endif 308