1*06bcd5d2SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*06bcd5d2SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*06bcd5d2SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*06bcd5d2SAndrew Rist * distributed with this work for additional information 6*06bcd5d2SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*06bcd5d2SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*06bcd5d2SAndrew Rist * "License"); you may not use this file except in compliance 9*06bcd5d2SAndrew Rist * with the License. You may obtain a copy of the License at 10*06bcd5d2SAndrew Rist * 11*06bcd5d2SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*06bcd5d2SAndrew Rist * 13*06bcd5d2SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*06bcd5d2SAndrew Rist * software distributed under the License is distributed on an 15*06bcd5d2SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*06bcd5d2SAndrew Rist * KIND, either express or implied. See the License for the 17*06bcd5d2SAndrew Rist * specific language governing permissions and limitations 18*06bcd5d2SAndrew Rist * under the License. 19*06bcd5d2SAndrew Rist * 20*06bcd5d2SAndrew Rist *************************************************************/ 21*06bcd5d2SAndrew Rist 22*06bcd5d2SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir #ifndef INCLUDED_PDFI_PDFPARSE_HXX 25cdf0e10cSrcweir #define INCLUDED_PDFI_PDFPARSE_HXX 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <sal/types.h> 28cdf0e10cSrcweir #include <rtl/ustring.hxx> 29cdf0e10cSrcweir #include <rtl/string.hxx> 30cdf0e10cSrcweir 31cdf0e10cSrcweir #include <vector> 32cdf0e10cSrcweir #include <hash_map> 33cdf0e10cSrcweir 34cdf0e10cSrcweir namespace pdfparse 35cdf0e10cSrcweir { 36cdf0e10cSrcweir 37cdf0e10cSrcweir struct EmitImplData; 38cdf0e10cSrcweir struct PDFContainer; 39cdf0e10cSrcweir class EmitContext 40cdf0e10cSrcweir { 41cdf0e10cSrcweir public: 42cdf0e10cSrcweir virtual bool write( const void* pBuf, unsigned int nLen ) = 0; 43cdf0e10cSrcweir virtual unsigned int getCurPos() = 0; 44cdf0e10cSrcweir virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0; 45cdf0e10cSrcweir virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0; 46cdf0e10cSrcweir 47cdf0e10cSrcweir EmitContext( const PDFContainer* pTop = NULL ); 48cdf0e10cSrcweir virtual ~EmitContext(); 49cdf0e10cSrcweir 50cdf0e10cSrcweir // set this to deflate contained streams 51cdf0e10cSrcweir bool m_bDeflate; 52cdf0e10cSrcweir // set this to decrypt the PDF file 53cdf0e10cSrcweir bool m_bDecrypt; 54cdf0e10cSrcweir 55cdf0e10cSrcweir private: 56cdf0e10cSrcweir friend struct PDFEntry; 57cdf0e10cSrcweir EmitImplData* m_pImplData; 58cdf0e10cSrcweir }; 59cdf0e10cSrcweir 60cdf0e10cSrcweir struct PDFEntry 61cdf0e10cSrcweir { PDFEntrypdfparse::PDFEntry62cdf0e10cSrcweir PDFEntry() {} 63cdf0e10cSrcweir virtual ~PDFEntry(); 64cdf0e10cSrcweir 65cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const = 0; 66cdf0e10cSrcweir virtual PDFEntry* clone() const = 0; 67cdf0e10cSrcweir 68cdf0e10cSrcweir protected: 69cdf0e10cSrcweir EmitImplData* getEmitData( EmitContext& rContext ) const; 70cdf0e10cSrcweir void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const; 71cdf0e10cSrcweir }; 72cdf0e10cSrcweir 73cdf0e10cSrcweir struct PDFComment : public PDFEntry 74cdf0e10cSrcweir { 75cdf0e10cSrcweir rtl::OString m_aComment; 76cdf0e10cSrcweir PDFCommentpdfparse::PDFComment77cdf0e10cSrcweir PDFComment( const rtl::OString& rComment ) 78cdf0e10cSrcweir : PDFEntry(), m_aComment( rComment ) {} 79cdf0e10cSrcweir virtual ~PDFComment(); 80cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 81cdf0e10cSrcweir virtual PDFEntry* clone() const; 82cdf0e10cSrcweir }; 83cdf0e10cSrcweir 84cdf0e10cSrcweir struct PDFValue : public PDFEntry 85cdf0e10cSrcweir { 86cdf0e10cSrcweir // abstract base class for simple values PDFValuepdfparse::PDFValue87cdf0e10cSrcweir PDFValue() : PDFEntry() {} 88cdf0e10cSrcweir virtual ~PDFValue(); 89cdf0e10cSrcweir }; 90cdf0e10cSrcweir 91cdf0e10cSrcweir struct PDFName : public PDFValue 92cdf0e10cSrcweir { 93cdf0e10cSrcweir rtl::OString m_aName; 94cdf0e10cSrcweir PDFNamepdfparse::PDFName95cdf0e10cSrcweir PDFName( const rtl::OString& rName ) 96cdf0e10cSrcweir : PDFValue(), m_aName( rName ) {} 97cdf0e10cSrcweir virtual ~PDFName(); 98cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 99cdf0e10cSrcweir virtual PDFEntry* clone() const; 100cdf0e10cSrcweir 101cdf0e10cSrcweir rtl::OUString getFilteredName() const; 102cdf0e10cSrcweir }; 103cdf0e10cSrcweir 104cdf0e10cSrcweir struct PDFString : public PDFValue 105cdf0e10cSrcweir { 106cdf0e10cSrcweir rtl::OString m_aString; 107cdf0e10cSrcweir PDFStringpdfparse::PDFString108cdf0e10cSrcweir PDFString( const rtl::OString& rString ) 109cdf0e10cSrcweir : PDFValue(), m_aString( rString ) {} 110cdf0e10cSrcweir virtual ~PDFString(); 111cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 112cdf0e10cSrcweir virtual PDFEntry* clone() const; 113cdf0e10cSrcweir 114cdf0e10cSrcweir rtl::OString getFilteredString() const; 115cdf0e10cSrcweir }; 116cdf0e10cSrcweir 117cdf0e10cSrcweir struct PDFNumber : public PDFValue 118cdf0e10cSrcweir { 119cdf0e10cSrcweir double m_fValue; 120cdf0e10cSrcweir PDFNumberpdfparse::PDFNumber121cdf0e10cSrcweir PDFNumber( double fVal ) 122cdf0e10cSrcweir : PDFValue(), m_fValue( fVal ) {} 123cdf0e10cSrcweir virtual ~PDFNumber(); 124cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 125cdf0e10cSrcweir virtual PDFEntry* clone() const; 126cdf0e10cSrcweir }; 127cdf0e10cSrcweir 128cdf0e10cSrcweir struct PDFBool : public PDFValue 129cdf0e10cSrcweir { 130cdf0e10cSrcweir bool m_bValue; 131cdf0e10cSrcweir PDFBoolpdfparse::PDFBool132cdf0e10cSrcweir PDFBool( bool bVal ) 133cdf0e10cSrcweir : PDFValue(), m_bValue( bVal ) {} 134cdf0e10cSrcweir virtual ~PDFBool(); 135cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 136cdf0e10cSrcweir virtual PDFEntry* clone() const; 137cdf0e10cSrcweir }; 138cdf0e10cSrcweir 139cdf0e10cSrcweir struct PDFObjectRef : public PDFValue 140cdf0e10cSrcweir { 141cdf0e10cSrcweir unsigned int m_nNumber; 142cdf0e10cSrcweir unsigned int m_nGeneration; 143cdf0e10cSrcweir PDFObjectRefpdfparse::PDFObjectRef144cdf0e10cSrcweir PDFObjectRef( unsigned int nNr, unsigned int nGen ) 145cdf0e10cSrcweir : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {} 146cdf0e10cSrcweir virtual ~PDFObjectRef(); 147cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 148cdf0e10cSrcweir virtual PDFEntry* clone() const; 149cdf0e10cSrcweir }; 150cdf0e10cSrcweir 151cdf0e10cSrcweir struct PDFNull : public PDFValue 152cdf0e10cSrcweir { PDFNullpdfparse::PDFNull153cdf0e10cSrcweir PDFNull() {} 154cdf0e10cSrcweir virtual ~PDFNull(); 155cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 156cdf0e10cSrcweir virtual PDFEntry* clone() const; 157cdf0e10cSrcweir }; 158cdf0e10cSrcweir 159cdf0e10cSrcweir struct PDFObject; 160cdf0e10cSrcweir struct PDFContainer : public PDFEntry 161cdf0e10cSrcweir { 162cdf0e10cSrcweir sal_Int32 m_nOffset; 163cdf0e10cSrcweir std::vector<PDFEntry*> m_aSubElements; 164cdf0e10cSrcweir 165cdf0e10cSrcweir // this is an abstract base class for identifying 166cdf0e10cSrcweir // entries that can contain sub elements besides comments PDFContainerpdfparse::PDFContainer167cdf0e10cSrcweir PDFContainer() : PDFEntry(), m_nOffset( 0 ) {} 168cdf0e10cSrcweir virtual ~PDFContainer(); 169cdf0e10cSrcweir virtual bool emitSubElements( EmitContext& rWriteContext ) const; 170cdf0e10cSrcweir virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const; 171cdf0e10cSrcweir 172cdf0e10cSrcweir PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const; findObjectpdfparse::PDFContainer173cdf0e10cSrcweir PDFObject* findObject( PDFObjectRef* pRef ) const 174cdf0e10cSrcweir { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); } 175cdf0e10cSrcweir }; 176cdf0e10cSrcweir 177cdf0e10cSrcweir struct PDFArray : public PDFContainer 178cdf0e10cSrcweir { PDFArraypdfparse::PDFArray179cdf0e10cSrcweir PDFArray() {} 180cdf0e10cSrcweir virtual ~PDFArray(); 181cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 182cdf0e10cSrcweir virtual PDFEntry* clone() const; 183cdf0e10cSrcweir }; 184cdf0e10cSrcweir 185cdf0e10cSrcweir struct PDFDict : public PDFContainer 186cdf0e10cSrcweir { 187cdf0e10cSrcweir typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map; 188cdf0e10cSrcweir Map m_aMap; 189cdf0e10cSrcweir PDFDictpdfparse::PDFDict190cdf0e10cSrcweir PDFDict() {} 191cdf0e10cSrcweir virtual ~PDFDict(); 192cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 193cdf0e10cSrcweir virtual PDFEntry* clone() const; 194cdf0e10cSrcweir 195cdf0e10cSrcweir // inserting a value of NULL will remove rName and the previous value 196cdf0e10cSrcweir // from the dictionary 197cdf0e10cSrcweir void insertValue( const rtl::OString& rName, PDFEntry* pValue ); 198cdf0e10cSrcweir // removes a name/value pair from the dict 199cdf0e10cSrcweir void eraseValue( const rtl::OString& rName ); 200cdf0e10cSrcweir // builds new map as of sub elements 201cdf0e10cSrcweir // returns NULL if successfull, else the first offending element 202cdf0e10cSrcweir PDFEntry* buildMap(); 203cdf0e10cSrcweir }; 204cdf0e10cSrcweir 205cdf0e10cSrcweir struct PDFStream : public PDFEntry 206cdf0e10cSrcweir { 207cdf0e10cSrcweir unsigned int m_nBeginOffset; 208cdf0e10cSrcweir unsigned int m_nEndOffset; // offset of the byte after the stream 209cdf0e10cSrcweir PDFDict* m_pDict; 210cdf0e10cSrcweir PDFStreampdfparse::PDFStream211cdf0e10cSrcweir PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict ) 212cdf0e10cSrcweir : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {} 213cdf0e10cSrcweir virtual ~PDFStream(); 214cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 215cdf0e10cSrcweir virtual PDFEntry* clone() const; 216cdf0e10cSrcweir 217cdf0e10cSrcweir unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict 218cdf0e10cSrcweir }; 219cdf0e10cSrcweir 220cdf0e10cSrcweir struct PDFTrailer : public PDFContainer 221cdf0e10cSrcweir { 222cdf0e10cSrcweir PDFDict* m_pDict; 223cdf0e10cSrcweir PDFTrailerpdfparse::PDFTrailer224cdf0e10cSrcweir PDFTrailer() : PDFContainer(), m_pDict( NULL ) {} 225cdf0e10cSrcweir virtual ~PDFTrailer(); 226cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 227cdf0e10cSrcweir virtual PDFEntry* clone() const; 228cdf0e10cSrcweir }; 229cdf0e10cSrcweir 230cdf0e10cSrcweir struct PDFFileImplData; 231cdf0e10cSrcweir struct PDFFile : public PDFContainer 232cdf0e10cSrcweir { 233cdf0e10cSrcweir private: 234cdf0e10cSrcweir mutable PDFFileImplData* m_pData; 235cdf0e10cSrcweir PDFFileImplData* impl_getData() const; 236cdf0e10cSrcweir public: 237cdf0e10cSrcweir unsigned int m_nMajor; // PDF major 238cdf0e10cSrcweir unsigned int m_nMinor; // PDF minor 239cdf0e10cSrcweir PDFFilepdfparse::PDFFile240cdf0e10cSrcweir PDFFile() 241cdf0e10cSrcweir : PDFContainer(), 242cdf0e10cSrcweir m_pData( NULL ), 243cdf0e10cSrcweir m_nMajor( 0 ), m_nMinor( 0 ) 244cdf0e10cSrcweir {} 245cdf0e10cSrcweir virtual ~PDFFile(); 246cdf0e10cSrcweir 247cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 248cdf0e10cSrcweir virtual PDFEntry* clone() const; 249cdf0e10cSrcweir 250cdf0e10cSrcweir bool isEncrypted() const; 251cdf0e10cSrcweir // this method checks whether rPwd is compatible with 252cdf0e10cSrcweir // either user or owner password and sets up decrypt data in that case 253cdf0e10cSrcweir // returns true if decryption can be done 254cdf0e10cSrcweir bool setupDecryptionData( const rtl::OString& rPwd ) const; 255cdf0e10cSrcweir 256cdf0e10cSrcweir bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, 257cdf0e10cSrcweir sal_uInt8* pOutBuffer, 258cdf0e10cSrcweir unsigned int nObject, unsigned int nGeneration ) const; 259cdf0e10cSrcweir 260cdf0e10cSrcweir rtl::OUString getDecryptionKey() const; 261cdf0e10cSrcweir }; 262cdf0e10cSrcweir 263cdf0e10cSrcweir struct PDFObject : public PDFContainer 264cdf0e10cSrcweir { 265cdf0e10cSrcweir PDFEntry* m_pObject; 266cdf0e10cSrcweir PDFStream* m_pStream; 267cdf0e10cSrcweir unsigned int m_nNumber; 268cdf0e10cSrcweir unsigned int m_nGeneration; 269cdf0e10cSrcweir PDFObjectpdfparse::PDFObject270cdf0e10cSrcweir PDFObject( unsigned int nNr, unsigned int nGen ) 271cdf0e10cSrcweir : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {} 272cdf0e10cSrcweir virtual ~PDFObject(); 273cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 274cdf0e10cSrcweir virtual PDFEntry* clone() const; 275cdf0e10cSrcweir 276cdf0e10cSrcweir // writes only the contained stream, deflated if necessary 277cdf0e10cSrcweir bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const; 278cdf0e10cSrcweir 279cdf0e10cSrcweir private: 280cdf0e10cSrcweir // returns true if stream is deflated 281cdf0e10cSrcweir // fills *ppStream and *pBytes with start of stream and count of bytes 282cdf0e10cSrcweir // memory returned in *ppStream must be freed with rtl_freeMemory afterwards 283cdf0e10cSrcweir // fills in NULL and 0 in case of error 284cdf0e10cSrcweir bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const; 285cdf0e10cSrcweir }; 286cdf0e10cSrcweir 287cdf0e10cSrcweir struct PDFPart : public PDFContainer 288cdf0e10cSrcweir { PDFPartpdfparse::PDFPart289cdf0e10cSrcweir PDFPart() : PDFContainer() {} 290cdf0e10cSrcweir virtual ~PDFPart(); 291cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 292cdf0e10cSrcweir virtual PDFEntry* clone() const; 293cdf0e10cSrcweir }; 294cdf0e10cSrcweir 295cdf0e10cSrcweir class PDFReader 296cdf0e10cSrcweir { 297cdf0e10cSrcweir public: PDFReader()298cdf0e10cSrcweir PDFReader() {} ~PDFReader()299cdf0e10cSrcweir ~PDFReader() {} 300cdf0e10cSrcweir 301cdf0e10cSrcweir PDFEntry* read( const char* pFileName ); 302cdf0e10cSrcweir PDFEntry* read( const char* pBuffer, unsigned int nLen ); 303cdf0e10cSrcweir }; 304cdf0e10cSrcweir 305cdf0e10cSrcweir } // namespace 306cdf0e10cSrcweir 307cdf0e10cSrcweir #endif 308