1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #ifndef INCLUDED_PDFI_PDFPARSE_HXX 29*cdf0e10cSrcweir #define INCLUDED_PDFI_PDFPARSE_HXX 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include <sal/types.h> 32*cdf0e10cSrcweir #include <rtl/ustring.hxx> 33*cdf0e10cSrcweir #include <rtl/string.hxx> 34*cdf0e10cSrcweir 35*cdf0e10cSrcweir #include <vector> 36*cdf0e10cSrcweir #include <hash_map> 37*cdf0e10cSrcweir 38*cdf0e10cSrcweir namespace pdfparse 39*cdf0e10cSrcweir { 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir struct EmitImplData; 42*cdf0e10cSrcweir struct PDFContainer; 43*cdf0e10cSrcweir class EmitContext 44*cdf0e10cSrcweir { 45*cdf0e10cSrcweir public: 46*cdf0e10cSrcweir virtual bool write( const void* pBuf, unsigned int nLen ) = 0; 47*cdf0e10cSrcweir virtual unsigned int getCurPos() = 0; 48*cdf0e10cSrcweir virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) = 0; 49*cdf0e10cSrcweir virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) = 0; 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir EmitContext( const PDFContainer* pTop = NULL ); 52*cdf0e10cSrcweir virtual ~EmitContext(); 53*cdf0e10cSrcweir 54*cdf0e10cSrcweir // set this to deflate contained streams 55*cdf0e10cSrcweir bool m_bDeflate; 56*cdf0e10cSrcweir // set this to decrypt the PDF file 57*cdf0e10cSrcweir bool m_bDecrypt; 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir private: 60*cdf0e10cSrcweir friend struct PDFEntry; 61*cdf0e10cSrcweir EmitImplData* m_pImplData; 62*cdf0e10cSrcweir }; 63*cdf0e10cSrcweir 64*cdf0e10cSrcweir struct PDFEntry 65*cdf0e10cSrcweir { 66*cdf0e10cSrcweir PDFEntry() {} 67*cdf0e10cSrcweir virtual ~PDFEntry(); 68*cdf0e10cSrcweir 69*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const = 0; 70*cdf0e10cSrcweir virtual PDFEntry* clone() const = 0; 71*cdf0e10cSrcweir 72*cdf0e10cSrcweir protected: 73*cdf0e10cSrcweir EmitImplData* getEmitData( EmitContext& rContext ) const; 74*cdf0e10cSrcweir void setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const; 75*cdf0e10cSrcweir }; 76*cdf0e10cSrcweir 77*cdf0e10cSrcweir struct PDFComment : public PDFEntry 78*cdf0e10cSrcweir { 79*cdf0e10cSrcweir rtl::OString m_aComment; 80*cdf0e10cSrcweir 81*cdf0e10cSrcweir PDFComment( const rtl::OString& rComment ) 82*cdf0e10cSrcweir : PDFEntry(), m_aComment( rComment ) {} 83*cdf0e10cSrcweir virtual ~PDFComment(); 84*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 85*cdf0e10cSrcweir virtual PDFEntry* clone() const; 86*cdf0e10cSrcweir }; 87*cdf0e10cSrcweir 88*cdf0e10cSrcweir struct PDFValue : public PDFEntry 89*cdf0e10cSrcweir { 90*cdf0e10cSrcweir // abstract base class for simple values 91*cdf0e10cSrcweir PDFValue() : PDFEntry() {} 92*cdf0e10cSrcweir virtual ~PDFValue(); 93*cdf0e10cSrcweir }; 94*cdf0e10cSrcweir 95*cdf0e10cSrcweir struct PDFName : public PDFValue 96*cdf0e10cSrcweir { 97*cdf0e10cSrcweir rtl::OString m_aName; 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir PDFName( const rtl::OString& rName ) 100*cdf0e10cSrcweir : PDFValue(), m_aName( rName ) {} 101*cdf0e10cSrcweir virtual ~PDFName(); 102*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 103*cdf0e10cSrcweir virtual PDFEntry* clone() const; 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir rtl::OUString getFilteredName() const; 106*cdf0e10cSrcweir }; 107*cdf0e10cSrcweir 108*cdf0e10cSrcweir struct PDFString : public PDFValue 109*cdf0e10cSrcweir { 110*cdf0e10cSrcweir rtl::OString m_aString; 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir PDFString( const rtl::OString& rString ) 113*cdf0e10cSrcweir : PDFValue(), m_aString( rString ) {} 114*cdf0e10cSrcweir virtual ~PDFString(); 115*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 116*cdf0e10cSrcweir virtual PDFEntry* clone() const; 117*cdf0e10cSrcweir 118*cdf0e10cSrcweir rtl::OString getFilteredString() const; 119*cdf0e10cSrcweir }; 120*cdf0e10cSrcweir 121*cdf0e10cSrcweir struct PDFNumber : public PDFValue 122*cdf0e10cSrcweir { 123*cdf0e10cSrcweir double m_fValue; 124*cdf0e10cSrcweir 125*cdf0e10cSrcweir PDFNumber( double fVal ) 126*cdf0e10cSrcweir : PDFValue(), m_fValue( fVal ) {} 127*cdf0e10cSrcweir virtual ~PDFNumber(); 128*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 129*cdf0e10cSrcweir virtual PDFEntry* clone() const; 130*cdf0e10cSrcweir }; 131*cdf0e10cSrcweir 132*cdf0e10cSrcweir struct PDFBool : public PDFValue 133*cdf0e10cSrcweir { 134*cdf0e10cSrcweir bool m_bValue; 135*cdf0e10cSrcweir 136*cdf0e10cSrcweir PDFBool( bool bVal ) 137*cdf0e10cSrcweir : PDFValue(), m_bValue( bVal ) {} 138*cdf0e10cSrcweir virtual ~PDFBool(); 139*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 140*cdf0e10cSrcweir virtual PDFEntry* clone() const; 141*cdf0e10cSrcweir }; 142*cdf0e10cSrcweir 143*cdf0e10cSrcweir struct PDFObjectRef : public PDFValue 144*cdf0e10cSrcweir { 145*cdf0e10cSrcweir unsigned int m_nNumber; 146*cdf0e10cSrcweir unsigned int m_nGeneration; 147*cdf0e10cSrcweir 148*cdf0e10cSrcweir PDFObjectRef( unsigned int nNr, unsigned int nGen ) 149*cdf0e10cSrcweir : PDFValue(), m_nNumber( nNr ), m_nGeneration( nGen ) {} 150*cdf0e10cSrcweir virtual ~PDFObjectRef(); 151*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 152*cdf0e10cSrcweir virtual PDFEntry* clone() const; 153*cdf0e10cSrcweir }; 154*cdf0e10cSrcweir 155*cdf0e10cSrcweir struct PDFNull : public PDFValue 156*cdf0e10cSrcweir { 157*cdf0e10cSrcweir PDFNull() {} 158*cdf0e10cSrcweir virtual ~PDFNull(); 159*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 160*cdf0e10cSrcweir virtual PDFEntry* clone() const; 161*cdf0e10cSrcweir }; 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir struct PDFObject; 164*cdf0e10cSrcweir struct PDFContainer : public PDFEntry 165*cdf0e10cSrcweir { 166*cdf0e10cSrcweir sal_Int32 m_nOffset; 167*cdf0e10cSrcweir std::vector<PDFEntry*> m_aSubElements; 168*cdf0e10cSrcweir 169*cdf0e10cSrcweir // this is an abstract base class for identifying 170*cdf0e10cSrcweir // entries that can contain sub elements besides comments 171*cdf0e10cSrcweir PDFContainer() : PDFEntry(), m_nOffset( 0 ) {} 172*cdf0e10cSrcweir virtual ~PDFContainer(); 173*cdf0e10cSrcweir virtual bool emitSubElements( EmitContext& rWriteContext ) const; 174*cdf0e10cSrcweir virtual void cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const; 175*cdf0e10cSrcweir 176*cdf0e10cSrcweir PDFObject* findObject( unsigned int nNumber, unsigned int nGeneration ) const; 177*cdf0e10cSrcweir PDFObject* findObject( PDFObjectRef* pRef ) const 178*cdf0e10cSrcweir { return findObject( pRef->m_nNumber, pRef->m_nGeneration ); } 179*cdf0e10cSrcweir }; 180*cdf0e10cSrcweir 181*cdf0e10cSrcweir struct PDFArray : public PDFContainer 182*cdf0e10cSrcweir { 183*cdf0e10cSrcweir PDFArray() {} 184*cdf0e10cSrcweir virtual ~PDFArray(); 185*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 186*cdf0e10cSrcweir virtual PDFEntry* clone() const; 187*cdf0e10cSrcweir }; 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir struct PDFDict : public PDFContainer 190*cdf0e10cSrcweir { 191*cdf0e10cSrcweir typedef std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash> Map; 192*cdf0e10cSrcweir Map m_aMap; 193*cdf0e10cSrcweir 194*cdf0e10cSrcweir PDFDict() {} 195*cdf0e10cSrcweir virtual ~PDFDict(); 196*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 197*cdf0e10cSrcweir virtual PDFEntry* clone() const; 198*cdf0e10cSrcweir 199*cdf0e10cSrcweir // inserting a value of NULL will remove rName and the previous value 200*cdf0e10cSrcweir // from the dictionary 201*cdf0e10cSrcweir void insertValue( const rtl::OString& rName, PDFEntry* pValue ); 202*cdf0e10cSrcweir // removes a name/value pair from the dict 203*cdf0e10cSrcweir void eraseValue( const rtl::OString& rName ); 204*cdf0e10cSrcweir // builds new map as of sub elements 205*cdf0e10cSrcweir // returns NULL if successfull, else the first offending element 206*cdf0e10cSrcweir PDFEntry* buildMap(); 207*cdf0e10cSrcweir }; 208*cdf0e10cSrcweir 209*cdf0e10cSrcweir struct PDFStream : public PDFEntry 210*cdf0e10cSrcweir { 211*cdf0e10cSrcweir unsigned int m_nBeginOffset; 212*cdf0e10cSrcweir unsigned int m_nEndOffset; // offset of the byte after the stream 213*cdf0e10cSrcweir PDFDict* m_pDict; 214*cdf0e10cSrcweir 215*cdf0e10cSrcweir PDFStream( unsigned int nBegin, unsigned int nEnd, PDFDict* pStreamDict ) 216*cdf0e10cSrcweir : PDFEntry(), m_nBeginOffset( nBegin ), m_nEndOffset( nEnd ), m_pDict( pStreamDict ) {} 217*cdf0e10cSrcweir virtual ~PDFStream(); 218*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 219*cdf0e10cSrcweir virtual PDFEntry* clone() const; 220*cdf0e10cSrcweir 221*cdf0e10cSrcweir unsigned int getDictLength( const PDFContainer* pObjectContainer = NULL ) const; // get contents of the "Length" entry of the dict 222*cdf0e10cSrcweir }; 223*cdf0e10cSrcweir 224*cdf0e10cSrcweir struct PDFTrailer : public PDFContainer 225*cdf0e10cSrcweir { 226*cdf0e10cSrcweir PDFDict* m_pDict; 227*cdf0e10cSrcweir 228*cdf0e10cSrcweir PDFTrailer() : PDFContainer(), m_pDict( NULL ) {} 229*cdf0e10cSrcweir virtual ~PDFTrailer(); 230*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 231*cdf0e10cSrcweir virtual PDFEntry* clone() const; 232*cdf0e10cSrcweir }; 233*cdf0e10cSrcweir 234*cdf0e10cSrcweir struct PDFFileImplData; 235*cdf0e10cSrcweir struct PDFFile : public PDFContainer 236*cdf0e10cSrcweir { 237*cdf0e10cSrcweir private: 238*cdf0e10cSrcweir mutable PDFFileImplData* m_pData; 239*cdf0e10cSrcweir PDFFileImplData* impl_getData() const; 240*cdf0e10cSrcweir public: 241*cdf0e10cSrcweir unsigned int m_nMajor; // PDF major 242*cdf0e10cSrcweir unsigned int m_nMinor; // PDF minor 243*cdf0e10cSrcweir 244*cdf0e10cSrcweir PDFFile() 245*cdf0e10cSrcweir : PDFContainer(), 246*cdf0e10cSrcweir m_pData( NULL ), 247*cdf0e10cSrcweir m_nMajor( 0 ), m_nMinor( 0 ) 248*cdf0e10cSrcweir {} 249*cdf0e10cSrcweir virtual ~PDFFile(); 250*cdf0e10cSrcweir 251*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 252*cdf0e10cSrcweir virtual PDFEntry* clone() const; 253*cdf0e10cSrcweir 254*cdf0e10cSrcweir bool isEncrypted() const; 255*cdf0e10cSrcweir // this method checks whether rPwd is compatible with 256*cdf0e10cSrcweir // either user or owner password and sets up decrypt data in that case 257*cdf0e10cSrcweir // returns true if decryption can be done 258*cdf0e10cSrcweir bool setupDecryptionData( const rtl::OString& rPwd ) const; 259*cdf0e10cSrcweir 260*cdf0e10cSrcweir bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, 261*cdf0e10cSrcweir sal_uInt8* pOutBuffer, 262*cdf0e10cSrcweir unsigned int nObject, unsigned int nGeneration ) const; 263*cdf0e10cSrcweir 264*cdf0e10cSrcweir rtl::OUString getDecryptionKey() const; 265*cdf0e10cSrcweir }; 266*cdf0e10cSrcweir 267*cdf0e10cSrcweir struct PDFObject : public PDFContainer 268*cdf0e10cSrcweir { 269*cdf0e10cSrcweir PDFEntry* m_pObject; 270*cdf0e10cSrcweir PDFStream* m_pStream; 271*cdf0e10cSrcweir unsigned int m_nNumber; 272*cdf0e10cSrcweir unsigned int m_nGeneration; 273*cdf0e10cSrcweir 274*cdf0e10cSrcweir PDFObject( unsigned int nNr, unsigned int nGen ) 275*cdf0e10cSrcweir : m_pObject( NULL ), m_pStream( NULL ), m_nNumber( nNr ), m_nGeneration( nGen ) {} 276*cdf0e10cSrcweir virtual ~PDFObject(); 277*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 278*cdf0e10cSrcweir virtual PDFEntry* clone() const; 279*cdf0e10cSrcweir 280*cdf0e10cSrcweir // writes only the contained stream, deflated if necessary 281*cdf0e10cSrcweir bool writeStream( EmitContext& rContext, const PDFFile* pPDFFile ) const; 282*cdf0e10cSrcweir 283*cdf0e10cSrcweir private: 284*cdf0e10cSrcweir // returns true if stream is deflated 285*cdf0e10cSrcweir // fills *ppStream and *pBytes with start of stream and count of bytes 286*cdf0e10cSrcweir // memory returned in *ppStream must be freed with rtl_freeMemory afterwards 287*cdf0e10cSrcweir // fills in NULL and 0 in case of error 288*cdf0e10cSrcweir bool getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const; 289*cdf0e10cSrcweir }; 290*cdf0e10cSrcweir 291*cdf0e10cSrcweir struct PDFPart : public PDFContainer 292*cdf0e10cSrcweir { 293*cdf0e10cSrcweir PDFPart() : PDFContainer() {} 294*cdf0e10cSrcweir virtual ~PDFPart(); 295*cdf0e10cSrcweir virtual bool emit( EmitContext& rWriteContext ) const; 296*cdf0e10cSrcweir virtual PDFEntry* clone() const; 297*cdf0e10cSrcweir }; 298*cdf0e10cSrcweir 299*cdf0e10cSrcweir class PDFReader 300*cdf0e10cSrcweir { 301*cdf0e10cSrcweir public: 302*cdf0e10cSrcweir PDFReader() {} 303*cdf0e10cSrcweir ~PDFReader() {} 304*cdf0e10cSrcweir 305*cdf0e10cSrcweir PDFEntry* read( const char* pFileName ); 306*cdf0e10cSrcweir PDFEntry* read( const char* pBuffer, unsigned int nLen ); 307*cdf0e10cSrcweir }; 308*cdf0e10cSrcweir 309*cdf0e10cSrcweir } // namespace 310*cdf0e10cSrcweir 311*cdf0e10cSrcweir #endif 312