1*b1cdbd2cSJim Jagielski /**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski // MARKER(update_precomp.py): autogen include statement, do not remove
25*b1cdbd2cSJim Jagielski #include "precompiled_sdext.hxx"
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski #include <stdio.h>
28*b1cdbd2cSJim Jagielski #include <sal/main.h>
29*b1cdbd2cSJim Jagielski #include <osl/file.h>
30*b1cdbd2cSJim Jagielski #include <osl/thread.h>
31*b1cdbd2cSJim Jagielski #include <rtl/alloc.h>
32*b1cdbd2cSJim Jagielski #include <rtl/ustring.hxx>
33*b1cdbd2cSJim Jagielski #include <rtl/strbuf.hxx>
34*b1cdbd2cSJim Jagielski
35*b1cdbd2cSJim Jagielski #include "pdfparse.hxx"
36*b1cdbd2cSJim Jagielski
37*b1cdbd2cSJim Jagielski using namespace rtl;
38*b1cdbd2cSJim Jagielski using namespace pdfparse;
39*b1cdbd2cSJim Jagielski
printHelp(const char * pExe)40*b1cdbd2cSJim Jagielski void printHelp( const char* pExe )
41*b1cdbd2cSJim Jagielski {
42*b1cdbd2cSJim Jagielski fprintf( stdout,
43*b1cdbd2cSJim Jagielski "USAGE: %s [-h,--help]\n"
44*b1cdbd2cSJim Jagielski " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
45*b1cdbd2cSJim Jagielski " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
46*b1cdbd2cSJim Jagielski " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
47*b1cdbd2cSJim Jagielski " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
48*b1cdbd2cSJim Jagielski " -h, --help: show help\n"
49*b1cdbd2cSJim Jagielski " -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
50*b1cdbd2cSJim Jagielski " and prints the mimetype found to stdout\n"
51*b1cdbd2cSJim Jagielski " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
52*b1cdbd2cSJim Jagielski " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
53*b1cdbd2cSJim Jagielski " object numbers, where object number and generation number are separated by \':\'\n"
54*b1cdbd2cSJim Jagielski " an omitted generation number defaults to 0\n"
55*b1cdbd2cSJim Jagielski " -pw, --password: use password for decryption\n"
56*b1cdbd2cSJim Jagielski "\n"
57*b1cdbd2cSJim Jagielski "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
58*b1cdbd2cSJim Jagielski , pExe, pExe, pExe, pExe, pExe );
59*b1cdbd2cSJim Jagielski }
60*b1cdbd2cSJim Jagielski
61*b1cdbd2cSJim Jagielski class FileEmitContext : public EmitContext
62*b1cdbd2cSJim Jagielski {
63*b1cdbd2cSJim Jagielski oslFileHandle m_aHandle;
64*b1cdbd2cSJim Jagielski oslFileHandle m_aReadHandle;
65*b1cdbd2cSJim Jagielski unsigned int m_nReadLen;
66*b1cdbd2cSJim Jagielski
67*b1cdbd2cSJim Jagielski void openReadFile( const char* pOrigName );
68*b1cdbd2cSJim Jagielski
69*b1cdbd2cSJim Jagielski public:
70*b1cdbd2cSJim Jagielski FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
71*b1cdbd2cSJim Jagielski virtual ~FileEmitContext();
72*b1cdbd2cSJim Jagielski
73*b1cdbd2cSJim Jagielski virtual bool write( const void* pBuf, unsigned int nLen ) throw();
74*b1cdbd2cSJim Jagielski virtual unsigned int getCurPos() throw();
75*b1cdbd2cSJim Jagielski virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw();
76*b1cdbd2cSJim Jagielski virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw();
77*b1cdbd2cSJim Jagielski };
78*b1cdbd2cSJim Jagielski
FileEmitContext(const char * pFileName,const char * pOrigName,const PDFContainer * pTop)79*b1cdbd2cSJim Jagielski FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
80*b1cdbd2cSJim Jagielski : EmitContext( pTop ),
81*b1cdbd2cSJim Jagielski m_aHandle( NULL ),
82*b1cdbd2cSJim Jagielski m_aReadHandle( NULL ),
83*b1cdbd2cSJim Jagielski m_nReadLen( 0 )
84*b1cdbd2cSJim Jagielski {
85*b1cdbd2cSJim Jagielski OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) );
86*b1cdbd2cSJim Jagielski OUString aURL;
87*b1cdbd2cSJim Jagielski if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
88*b1cdbd2cSJim Jagielski {
89*b1cdbd2cSJim Jagielski fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
90*b1cdbd2cSJim Jagielski return;
91*b1cdbd2cSJim Jagielski }
92*b1cdbd2cSJim Jagielski
93*b1cdbd2cSJim Jagielski if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
94*b1cdbd2cSJim Jagielski {
95*b1cdbd2cSJim Jagielski if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
96*b1cdbd2cSJim Jagielski {
97*b1cdbd2cSJim Jagielski fprintf( stderr, "could not truncate %s\n", pFileName );
98*b1cdbd2cSJim Jagielski osl_closeFile( m_aHandle );
99*b1cdbd2cSJim Jagielski m_aHandle = NULL;
100*b1cdbd2cSJim Jagielski }
101*b1cdbd2cSJim Jagielski }
102*b1cdbd2cSJim Jagielski else if( osl_openFile( aURL.pData, &m_aHandle,
103*b1cdbd2cSJim Jagielski osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
104*b1cdbd2cSJim Jagielski {
105*b1cdbd2cSJim Jagielski fprintf( stderr, "could not open %s\n", pFileName );
106*b1cdbd2cSJim Jagielski return;
107*b1cdbd2cSJim Jagielski }
108*b1cdbd2cSJim Jagielski m_bDeflate = true;
109*b1cdbd2cSJim Jagielski
110*b1cdbd2cSJim Jagielski openReadFile( pOrigName );
111*b1cdbd2cSJim Jagielski }
112*b1cdbd2cSJim Jagielski
~FileEmitContext()113*b1cdbd2cSJim Jagielski FileEmitContext::~FileEmitContext()
114*b1cdbd2cSJim Jagielski {
115*b1cdbd2cSJim Jagielski if( m_aHandle )
116*b1cdbd2cSJim Jagielski osl_closeFile( m_aHandle );
117*b1cdbd2cSJim Jagielski if( m_aReadHandle )
118*b1cdbd2cSJim Jagielski osl_closeFile( m_aReadHandle );
119*b1cdbd2cSJim Jagielski }
120*b1cdbd2cSJim Jagielski
openReadFile(const char * pInFile)121*b1cdbd2cSJim Jagielski void FileEmitContext::openReadFile( const char* pInFile )
122*b1cdbd2cSJim Jagielski {
123*b1cdbd2cSJim Jagielski OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) );
124*b1cdbd2cSJim Jagielski OUString aURL;
125*b1cdbd2cSJim Jagielski if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
126*b1cdbd2cSJim Jagielski {
127*b1cdbd2cSJim Jagielski fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
128*b1cdbd2cSJim Jagielski return;
129*b1cdbd2cSJim Jagielski }
130*b1cdbd2cSJim Jagielski
131*b1cdbd2cSJim Jagielski if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
132*b1cdbd2cSJim Jagielski {
133*b1cdbd2cSJim Jagielski fprintf( stderr, "could not open %s\n", pInFile );
134*b1cdbd2cSJim Jagielski return;
135*b1cdbd2cSJim Jagielski }
136*b1cdbd2cSJim Jagielski
137*b1cdbd2cSJim Jagielski if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
138*b1cdbd2cSJim Jagielski {
139*b1cdbd2cSJim Jagielski fprintf( stderr, "could not seek to end of %s\n", pInFile );
140*b1cdbd2cSJim Jagielski osl_closeFile( m_aReadHandle );
141*b1cdbd2cSJim Jagielski return;
142*b1cdbd2cSJim Jagielski }
143*b1cdbd2cSJim Jagielski
144*b1cdbd2cSJim Jagielski sal_uInt64 nFileSize = 0;
145*b1cdbd2cSJim Jagielski if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
146*b1cdbd2cSJim Jagielski {
147*b1cdbd2cSJim Jagielski fprintf( stderr, "could not get end pos of %s\n", pInFile );
148*b1cdbd2cSJim Jagielski osl_closeFile( m_aReadHandle );
149*b1cdbd2cSJim Jagielski return;
150*b1cdbd2cSJim Jagielski }
151*b1cdbd2cSJim Jagielski
152*b1cdbd2cSJim Jagielski m_nReadLen = static_cast<unsigned int>(nFileSize);
153*b1cdbd2cSJim Jagielski }
154*b1cdbd2cSJim Jagielski
write(const void * pBuf,unsigned int nLen)155*b1cdbd2cSJim Jagielski bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw()
156*b1cdbd2cSJim Jagielski {
157*b1cdbd2cSJim Jagielski if( ! m_aHandle )
158*b1cdbd2cSJim Jagielski return false;
159*b1cdbd2cSJim Jagielski
160*b1cdbd2cSJim Jagielski sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
161*b1cdbd2cSJim Jagielski sal_uInt64 nWritten = 0;
162*b1cdbd2cSJim Jagielski return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
163*b1cdbd2cSJim Jagielski && nWrite == nWritten;
164*b1cdbd2cSJim Jagielski }
165*b1cdbd2cSJim Jagielski
getCurPos()166*b1cdbd2cSJim Jagielski unsigned int FileEmitContext::getCurPos() throw()
167*b1cdbd2cSJim Jagielski {
168*b1cdbd2cSJim Jagielski sal_uInt64 nFileSize = 0;
169*b1cdbd2cSJim Jagielski if( m_aHandle )
170*b1cdbd2cSJim Jagielski {
171*b1cdbd2cSJim Jagielski if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
172*b1cdbd2cSJim Jagielski nFileSize = 0;
173*b1cdbd2cSJim Jagielski }
174*b1cdbd2cSJim Jagielski return static_cast<unsigned int>(nFileSize);
175*b1cdbd2cSJim Jagielski }
176*b1cdbd2cSJim Jagielski
copyOrigBytes(unsigned int nOrigOffset,unsigned int nLen)177*b1cdbd2cSJim Jagielski bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
178*b1cdbd2cSJim Jagielski {
179*b1cdbd2cSJim Jagielski if( nOrigOffset + nLen > m_nReadLen )
180*b1cdbd2cSJim Jagielski return false;
181*b1cdbd2cSJim Jagielski
182*b1cdbd2cSJim Jagielski if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
183*b1cdbd2cSJim Jagielski {
184*b1cdbd2cSJim Jagielski fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
185*b1cdbd2cSJim Jagielski return false;
186*b1cdbd2cSJim Jagielski }
187*b1cdbd2cSJim Jagielski void* pBuf = rtl_allocateMemory( nLen );
188*b1cdbd2cSJim Jagielski if( ! pBuf )
189*b1cdbd2cSJim Jagielski return false;
190*b1cdbd2cSJim Jagielski sal_uInt64 nBytesRead = 0;
191*b1cdbd2cSJim Jagielski if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
192*b1cdbd2cSJim Jagielski || nBytesRead != static_cast<sal_uInt64>(nLen) )
193*b1cdbd2cSJim Jagielski {
194*b1cdbd2cSJim Jagielski fprintf( stderr, "could not read %u bytes\n", nLen );
195*b1cdbd2cSJim Jagielski rtl_freeMemory( pBuf );
196*b1cdbd2cSJim Jagielski return false;
197*b1cdbd2cSJim Jagielski }
198*b1cdbd2cSJim Jagielski bool bRet = write( pBuf, nLen );
199*b1cdbd2cSJim Jagielski rtl_freeMemory( pBuf );
200*b1cdbd2cSJim Jagielski return bRet;
201*b1cdbd2cSJim Jagielski }
202*b1cdbd2cSJim Jagielski
readOrigBytes(unsigned int nOrigOffset,unsigned int nLen,void * pBuf)203*b1cdbd2cSJim Jagielski unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
204*b1cdbd2cSJim Jagielski {
205*b1cdbd2cSJim Jagielski if( nOrigOffset + nLen > m_nReadLen )
206*b1cdbd2cSJim Jagielski return 0;
207*b1cdbd2cSJim Jagielski
208*b1cdbd2cSJim Jagielski if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
209*b1cdbd2cSJim Jagielski {
210*b1cdbd2cSJim Jagielski fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
211*b1cdbd2cSJim Jagielski return 0;
212*b1cdbd2cSJim Jagielski }
213*b1cdbd2cSJim Jagielski sal_uInt64 nBytesRead = 0;
214*b1cdbd2cSJim Jagielski if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
215*b1cdbd2cSJim Jagielski return 0;
216*b1cdbd2cSJim Jagielski return static_cast<unsigned int>(nBytesRead);
217*b1cdbd2cSJim Jagielski }
218*b1cdbd2cSJim Jagielski
219*b1cdbd2cSJim Jagielski typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
220*b1cdbd2cSJim Jagielski
handleFile(const char * pInFile,const char * pOutFile,const char * pPassword,PDFFileHdl pHdl)221*b1cdbd2cSJim Jagielski int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
222*b1cdbd2cSJim Jagielski {
223*b1cdbd2cSJim Jagielski
224*b1cdbd2cSJim Jagielski PDFReader aParser;
225*b1cdbd2cSJim Jagielski int nRet = 0;
226*b1cdbd2cSJim Jagielski PDFEntry* pEntry = aParser.read( pInFile );
227*b1cdbd2cSJim Jagielski if( pEntry )
228*b1cdbd2cSJim Jagielski {
229*b1cdbd2cSJim Jagielski PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry);
230*b1cdbd2cSJim Jagielski if( pPDFFile )
231*b1cdbd2cSJim Jagielski {
232*b1cdbd2cSJim Jagielski fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
233*b1cdbd2cSJim Jagielski if( pPassword )
234*b1cdbd2cSJim Jagielski fprintf( stdout, "password %s\n",
235*b1cdbd2cSJim Jagielski pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
236*b1cdbd2cSJim Jagielski nRet = pHdl( pInFile, pOutFile, pPDFFile );
237*b1cdbd2cSJim Jagielski }
238*b1cdbd2cSJim Jagielski else
239*b1cdbd2cSJim Jagielski nRet = 20;
240*b1cdbd2cSJim Jagielski delete pEntry;
241*b1cdbd2cSJim Jagielski }
242*b1cdbd2cSJim Jagielski return nRet;
243*b1cdbd2cSJim Jagielski }
244*b1cdbd2cSJim Jagielski
write_unzipFile(const char * pInFile,const char * pOutFile,PDFFile * pPDFFile)245*b1cdbd2cSJim Jagielski int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
246*b1cdbd2cSJim Jagielski {
247*b1cdbd2cSJim Jagielski FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
248*b1cdbd2cSJim Jagielski aContext.m_bDecrypt = pPDFFile->isEncrypted();
249*b1cdbd2cSJim Jagielski pPDFFile->emit(aContext);
250*b1cdbd2cSJim Jagielski return 0;
251*b1cdbd2cSJim Jagielski }
252*b1cdbd2cSJim Jagielski
write_addStreamArray(const char * pOutFile,PDFArray * pStreams,PDFFile * pPDFFile,const char * pInFile)253*b1cdbd2cSJim Jagielski int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
254*b1cdbd2cSJim Jagielski {
255*b1cdbd2cSJim Jagielski int nRet = 0;
256*b1cdbd2cSJim Jagielski unsigned int nArrayElements = pStreams->m_aSubElements.size();
257*b1cdbd2cSJim Jagielski for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
258*b1cdbd2cSJim Jagielski {
259*b1cdbd2cSJim Jagielski PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]);
260*b1cdbd2cSJim Jagielski PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]);
261*b1cdbd2cSJim Jagielski if( ! pMimeType )
262*b1cdbd2cSJim Jagielski fprintf( stderr, "error: no mimetype element\n" );
263*b1cdbd2cSJim Jagielski if( ! pStreamRef )
264*b1cdbd2cSJim Jagielski fprintf( stderr, "error: no stream ref element\n" );
265*b1cdbd2cSJim Jagielski if( pMimeType && pStreamRef )
266*b1cdbd2cSJim Jagielski {
267*b1cdbd2cSJim Jagielski fprintf( stdout, "found stream %d %d with mimetype %s\n",
268*b1cdbd2cSJim Jagielski pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
269*b1cdbd2cSJim Jagielski pMimeType->m_aName.getStr() );
270*b1cdbd2cSJim Jagielski PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
271*b1cdbd2cSJim Jagielski if( pObject )
272*b1cdbd2cSJim Jagielski {
273*b1cdbd2cSJim Jagielski rtl::OStringBuffer aOutStream( pOutFile );
274*b1cdbd2cSJim Jagielski aOutStream.append( "_stream_" );
275*b1cdbd2cSJim Jagielski aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
276*b1cdbd2cSJim Jagielski aOutStream.append( "_" );
277*b1cdbd2cSJim Jagielski aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
278*b1cdbd2cSJim Jagielski FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
279*b1cdbd2cSJim Jagielski aContext.m_bDecrypt = pPDFFile->isEncrypted();
280*b1cdbd2cSJim Jagielski pObject->writeStream( aContext, pPDFFile );
281*b1cdbd2cSJim Jagielski }
282*b1cdbd2cSJim Jagielski else
283*b1cdbd2cSJim Jagielski {
284*b1cdbd2cSJim Jagielski fprintf( stderr, "object not found\n" );
285*b1cdbd2cSJim Jagielski nRet = 121;
286*b1cdbd2cSJim Jagielski }
287*b1cdbd2cSJim Jagielski }
288*b1cdbd2cSJim Jagielski else
289*b1cdbd2cSJim Jagielski nRet = 120;
290*b1cdbd2cSJim Jagielski }
291*b1cdbd2cSJim Jagielski return nRet;
292*b1cdbd2cSJim Jagielski }
293*b1cdbd2cSJim Jagielski
write_addStreams(const char * pInFile,const char * pOutFile,PDFFile * pPDFFile)294*b1cdbd2cSJim Jagielski int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
295*b1cdbd2cSJim Jagielski {
296*b1cdbd2cSJim Jagielski // find all trailers
297*b1cdbd2cSJim Jagielski int nRet = 0;
298*b1cdbd2cSJim Jagielski unsigned int nElements = pPDFFile->m_aSubElements.size();
299*b1cdbd2cSJim Jagielski for( unsigned i = 0; i < nElements && nRet == 0; i++ )
300*b1cdbd2cSJim Jagielski {
301*b1cdbd2cSJim Jagielski PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]);
302*b1cdbd2cSJim Jagielski if( pTrailer && pTrailer->m_pDict )
303*b1cdbd2cSJim Jagielski {
304*b1cdbd2cSJim Jagielski // search for AdditionalStreams entry
305*b1cdbd2cSJim Jagielski std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator add_stream;
306*b1cdbd2cSJim Jagielski add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" );
307*b1cdbd2cSJim Jagielski if( add_stream != pTrailer->m_pDict->m_aMap.end() )
308*b1cdbd2cSJim Jagielski {
309*b1cdbd2cSJim Jagielski PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
310*b1cdbd2cSJim Jagielski if( pStreams )
311*b1cdbd2cSJim Jagielski nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
312*b1cdbd2cSJim Jagielski }
313*b1cdbd2cSJim Jagielski }
314*b1cdbd2cSJim Jagielski }
315*b1cdbd2cSJim Jagielski return nRet;
316*b1cdbd2cSJim Jagielski }
317*b1cdbd2cSJim Jagielski
write_fonts(const char * i_pInFile,const char * i_pOutFile,PDFFile * i_pPDFFile)318*b1cdbd2cSJim Jagielski int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
319*b1cdbd2cSJim Jagielski {
320*b1cdbd2cSJim Jagielski int nRet = 0;
321*b1cdbd2cSJim Jagielski unsigned int nElements = i_pPDFFile->m_aSubElements.size();
322*b1cdbd2cSJim Jagielski for( unsigned i = 0; i < nElements && nRet == 0; i++ )
323*b1cdbd2cSJim Jagielski {
324*b1cdbd2cSJim Jagielski // search FontDescriptors
325*b1cdbd2cSJim Jagielski PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
326*b1cdbd2cSJim Jagielski if( ! pObj )
327*b1cdbd2cSJim Jagielski continue;
328*b1cdbd2cSJim Jagielski PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
329*b1cdbd2cSJim Jagielski if( ! pDict )
330*b1cdbd2cSJim Jagielski continue;
331*b1cdbd2cSJim Jagielski
332*b1cdbd2cSJim Jagielski std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it =
333*b1cdbd2cSJim Jagielski pDict->m_aMap.find( "Type" );
334*b1cdbd2cSJim Jagielski if( map_it == pDict->m_aMap.end() )
335*b1cdbd2cSJim Jagielski continue;
336*b1cdbd2cSJim Jagielski
337*b1cdbd2cSJim Jagielski PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
338*b1cdbd2cSJim Jagielski if( ! pName )
339*b1cdbd2cSJim Jagielski continue;
340*b1cdbd2cSJim Jagielski if( ! pName->m_aName.equals( "FontDescriptor" ) )
341*b1cdbd2cSJim Jagielski continue;
342*b1cdbd2cSJim Jagielski
343*b1cdbd2cSJim Jagielski // the font name will be helpful, also there must be one in
344*b1cdbd2cSJim Jagielski // a font descriptor
345*b1cdbd2cSJim Jagielski map_it = pDict->m_aMap.find( "FontName" );
346*b1cdbd2cSJim Jagielski if( map_it == pDict->m_aMap.end() )
347*b1cdbd2cSJim Jagielski continue;
348*b1cdbd2cSJim Jagielski pName = dynamic_cast<PDFName*>(map_it->second);
349*b1cdbd2cSJim Jagielski if( ! pName )
350*b1cdbd2cSJim Jagielski continue;
351*b1cdbd2cSJim Jagielski rtl::OString aFontName( pName->m_aName );
352*b1cdbd2cSJim Jagielski
353*b1cdbd2cSJim Jagielski PDFObjectRef* pStreamRef = 0;
354*b1cdbd2cSJim Jagielski const char* pFileType = NULL;
355*b1cdbd2cSJim Jagielski // we have a font descriptor, try for a type 1 font
356*b1cdbd2cSJim Jagielski map_it = pDict->m_aMap.find( "FontFile" );
357*b1cdbd2cSJim Jagielski if( map_it != pDict->m_aMap.end() )
358*b1cdbd2cSJim Jagielski {
359*b1cdbd2cSJim Jagielski pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
360*b1cdbd2cSJim Jagielski if( pStreamRef )
361*b1cdbd2cSJim Jagielski pFileType = "pfa";
362*b1cdbd2cSJim Jagielski }
363*b1cdbd2cSJim Jagielski
364*b1cdbd2cSJim Jagielski // perhaps it's a truetype file ?
365*b1cdbd2cSJim Jagielski if( ! pStreamRef )
366*b1cdbd2cSJim Jagielski {
367*b1cdbd2cSJim Jagielski map_it = pDict->m_aMap.find( "FontFile2" );
368*b1cdbd2cSJim Jagielski if( map_it != pDict->m_aMap.end() )
369*b1cdbd2cSJim Jagielski {
370*b1cdbd2cSJim Jagielski pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
371*b1cdbd2cSJim Jagielski if( pStreamRef )
372*b1cdbd2cSJim Jagielski pFileType = "ttf";
373*b1cdbd2cSJim Jagielski }
374*b1cdbd2cSJim Jagielski }
375*b1cdbd2cSJim Jagielski
376*b1cdbd2cSJim Jagielski if( ! pStreamRef )
377*b1cdbd2cSJim Jagielski continue;
378*b1cdbd2cSJim Jagielski
379*b1cdbd2cSJim Jagielski PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
380*b1cdbd2cSJim Jagielski if( ! pStream )
381*b1cdbd2cSJim Jagielski continue;
382*b1cdbd2cSJim Jagielski
383*b1cdbd2cSJim Jagielski rtl::OStringBuffer aOutStream( i_pOutFile );
384*b1cdbd2cSJim Jagielski aOutStream.append( "_font_" );
385*b1cdbd2cSJim Jagielski aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
386*b1cdbd2cSJim Jagielski aOutStream.append( "_" );
387*b1cdbd2cSJim Jagielski aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
388*b1cdbd2cSJim Jagielski aOutStream.append( "_" );
389*b1cdbd2cSJim Jagielski aOutStream.append( aFontName );
390*b1cdbd2cSJim Jagielski if( pFileType )
391*b1cdbd2cSJim Jagielski {
392*b1cdbd2cSJim Jagielski aOutStream.append( "." );
393*b1cdbd2cSJim Jagielski aOutStream.append( pFileType );
394*b1cdbd2cSJim Jagielski }
395*b1cdbd2cSJim Jagielski FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
396*b1cdbd2cSJim Jagielski aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
397*b1cdbd2cSJim Jagielski pStream->writeStream( aContext, i_pPDFFile );
398*b1cdbd2cSJim Jagielski }
399*b1cdbd2cSJim Jagielski return nRet;
400*b1cdbd2cSJim Jagielski }
401*b1cdbd2cSJim Jagielski
402*b1cdbd2cSJim Jagielski std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
403*b1cdbd2cSJim Jagielski
write_objects(const char * i_pInFile,const char * i_pOutFile,PDFFile * i_pPDFFile)404*b1cdbd2cSJim Jagielski int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
405*b1cdbd2cSJim Jagielski {
406*b1cdbd2cSJim Jagielski int nRet = 0;
407*b1cdbd2cSJim Jagielski unsigned int nElements = s_aEmitObjects.size();
408*b1cdbd2cSJim Jagielski for( unsigned i = 0; i < nElements && nRet == 0; i++ )
409*b1cdbd2cSJim Jagielski {
410*b1cdbd2cSJim Jagielski sal_Int32 nObject = s_aEmitObjects[i].first;
411*b1cdbd2cSJim Jagielski sal_Int32 nGeneration = s_aEmitObjects[i].second;
412*b1cdbd2cSJim Jagielski PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
413*b1cdbd2cSJim Jagielski if( ! pStream )
414*b1cdbd2cSJim Jagielski {
415*b1cdbd2cSJim Jagielski fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
416*b1cdbd2cSJim Jagielski continue;
417*b1cdbd2cSJim Jagielski }
418*b1cdbd2cSJim Jagielski
419*b1cdbd2cSJim Jagielski rtl::OStringBuffer aOutStream( i_pOutFile );
420*b1cdbd2cSJim Jagielski aOutStream.append( "_stream_" );
421*b1cdbd2cSJim Jagielski aOutStream.append( nObject );
422*b1cdbd2cSJim Jagielski aOutStream.append( "_" );
423*b1cdbd2cSJim Jagielski aOutStream.append( nGeneration );
424*b1cdbd2cSJim Jagielski FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
425*b1cdbd2cSJim Jagielski aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
426*b1cdbd2cSJim Jagielski pStream->writeStream( aContext, i_pPDFFile );
427*b1cdbd2cSJim Jagielski }
428*b1cdbd2cSJim Jagielski return nRet;
429*b1cdbd2cSJim Jagielski }
430*b1cdbd2cSJim Jagielski
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)431*b1cdbd2cSJim Jagielski SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
432*b1cdbd2cSJim Jagielski {
433*b1cdbd2cSJim Jagielski const char* pInFile = NULL;
434*b1cdbd2cSJim Jagielski const char* pOutFile = NULL;
435*b1cdbd2cSJim Jagielski const char* pPassword = NULL;
436*b1cdbd2cSJim Jagielski OStringBuffer aOutFile( 256 );
437*b1cdbd2cSJim Jagielski PDFFileHdl aHdl = write_unzipFile;
438*b1cdbd2cSJim Jagielski
439*b1cdbd2cSJim Jagielski for( int nArg = 1; nArg < argc; nArg++ )
440*b1cdbd2cSJim Jagielski {
441*b1cdbd2cSJim Jagielski if( argv[nArg][0] == '-' )
442*b1cdbd2cSJim Jagielski {
443*b1cdbd2cSJim Jagielski if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
444*b1cdbd2cSJim Jagielski ! rtl_str_compare( "--password" , argv[nArg] ) )
445*b1cdbd2cSJim Jagielski {
446*b1cdbd2cSJim Jagielski if( nArg == argc-1 )
447*b1cdbd2cSJim Jagielski {
448*b1cdbd2cSJim Jagielski fprintf( stderr, "no password given\n" );
449*b1cdbd2cSJim Jagielski return 1;
450*b1cdbd2cSJim Jagielski }
451*b1cdbd2cSJim Jagielski nArg++;
452*b1cdbd2cSJim Jagielski pPassword = argv[nArg];
453*b1cdbd2cSJim Jagielski }
454*b1cdbd2cSJim Jagielski else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
455*b1cdbd2cSJim Jagielski ! rtl_str_compare( "--help", argv[nArg] ) )
456*b1cdbd2cSJim Jagielski {
457*b1cdbd2cSJim Jagielski printHelp( argv[0] );
458*b1cdbd2cSJim Jagielski return 0;
459*b1cdbd2cSJim Jagielski }
460*b1cdbd2cSJim Jagielski else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
461*b1cdbd2cSJim Jagielski ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
462*b1cdbd2cSJim Jagielski {
463*b1cdbd2cSJim Jagielski aHdl = write_addStreams;
464*b1cdbd2cSJim Jagielski }
465*b1cdbd2cSJim Jagielski else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
466*b1cdbd2cSJim Jagielski ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
467*b1cdbd2cSJim Jagielski {
468*b1cdbd2cSJim Jagielski aHdl = write_fonts;
469*b1cdbd2cSJim Jagielski }
470*b1cdbd2cSJim Jagielski else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
471*b1cdbd2cSJim Jagielski ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
472*b1cdbd2cSJim Jagielski {
473*b1cdbd2cSJim Jagielski aHdl = write_objects;
474*b1cdbd2cSJim Jagielski nArg++;
475*b1cdbd2cSJim Jagielski if( nArg < argc )
476*b1cdbd2cSJim Jagielski {
477*b1cdbd2cSJim Jagielski rtl::OString aObjs( argv[nArg] );
478*b1cdbd2cSJim Jagielski sal_Int32 nIndex = 0;
479*b1cdbd2cSJim Jagielski while( nIndex != -1 )
480*b1cdbd2cSJim Jagielski {
481*b1cdbd2cSJim Jagielski rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) );
482*b1cdbd2cSJim Jagielski sal_Int32 nObject = 0;
483*b1cdbd2cSJim Jagielski sal_Int32 nGeneration = 0;
484*b1cdbd2cSJim Jagielski sal_Int32 nGenIndex = 0;
485*b1cdbd2cSJim Jagielski nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
486*b1cdbd2cSJim Jagielski if( nGenIndex != -1 )
487*b1cdbd2cSJim Jagielski nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
488*b1cdbd2cSJim Jagielski s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
489*b1cdbd2cSJim Jagielski }
490*b1cdbd2cSJim Jagielski }
491*b1cdbd2cSJim Jagielski }
492*b1cdbd2cSJim Jagielski else
493*b1cdbd2cSJim Jagielski {
494*b1cdbd2cSJim Jagielski fprintf( stderr, "unrecognized option \"%s\"\n",
495*b1cdbd2cSJim Jagielski argv[nArg] );
496*b1cdbd2cSJim Jagielski printHelp( argv[0] );
497*b1cdbd2cSJim Jagielski return 1;
498*b1cdbd2cSJim Jagielski }
499*b1cdbd2cSJim Jagielski }
500*b1cdbd2cSJim Jagielski else if( pInFile == NULL )
501*b1cdbd2cSJim Jagielski pInFile = argv[nArg];
502*b1cdbd2cSJim Jagielski else if( pOutFile == NULL )
503*b1cdbd2cSJim Jagielski pOutFile = argv[nArg];
504*b1cdbd2cSJim Jagielski }
505*b1cdbd2cSJim Jagielski if( ! pInFile )
506*b1cdbd2cSJim Jagielski {
507*b1cdbd2cSJim Jagielski fprintf( stderr, "no input file given\n" );
508*b1cdbd2cSJim Jagielski return 10;
509*b1cdbd2cSJim Jagielski }
510*b1cdbd2cSJim Jagielski if( ! pOutFile )
511*b1cdbd2cSJim Jagielski {
512*b1cdbd2cSJim Jagielski OString aFile( pInFile );
513*b1cdbd2cSJim Jagielski if( aFile.getLength() > 0 )
514*b1cdbd2cSJim Jagielski {
515*b1cdbd2cSJim Jagielski if( aFile.getLength() > 4 )
516*b1cdbd2cSJim Jagielski {
517*b1cdbd2cSJim Jagielski if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) )
518*b1cdbd2cSJim Jagielski aOutFile.append( pInFile, aFile.getLength() - 4 );
519*b1cdbd2cSJim Jagielski else
520*b1cdbd2cSJim Jagielski aOutFile.append( aFile );
521*b1cdbd2cSJim Jagielski }
522*b1cdbd2cSJim Jagielski aOutFile.append( "_unzip.pdf" );
523*b1cdbd2cSJim Jagielski pOutFile = aOutFile.getStr();
524*b1cdbd2cSJim Jagielski }
525*b1cdbd2cSJim Jagielski else
526*b1cdbd2cSJim Jagielski {
527*b1cdbd2cSJim Jagielski fprintf( stderr, "no output file given\n" );
528*b1cdbd2cSJim Jagielski return 11;
529*b1cdbd2cSJim Jagielski }
530*b1cdbd2cSJim Jagielski }
531*b1cdbd2cSJim Jagielski
532*b1cdbd2cSJim Jagielski return handleFile( pInFile, pOutFile, pPassword, aHdl );
533*b1cdbd2cSJim Jagielski }
534*b1cdbd2cSJim Jagielski
535