1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sdext.hxx"
26 
27 #if defined __SUNPRO_CC
28 #pragma disable_warn
29 #elif defined _MSC_VER
30 #pragma warning(push, 1)
31 #endif
32 
33 #include "pdfparse.hxx"
34 
35 // workaround windows compiler: do not include multi_pass.hpp
36 //#include <boost/spirit.hpp>
37 #include <boost/spirit/include/classic_core.hpp>
38 #include <boost/spirit/include/classic_utility.hpp>
39 #include <boost/spirit/include/classic_error_handling.hpp>
40 #include <boost/spirit/include/classic_file_iterator.hpp>
41 #include <boost/bind.hpp>
42 #include <string>
43 
44 #include <rtl/strbuf.hxx>
45 #include <rtl/memory.h>
46 #include <rtl/alloc.h>
47 
48 // disable warnings again because someone along the line has enabled them
49 #if defined __SUNPRO_CC
50 #pragma disable_warn
51 #elif defined _MSC_VER
52 #pragma warning(push, 1)
53 #endif
54 
55 using namespace boost::spirit;
56 using namespace rtl;
57 using namespace pdfparse;
58 
59 class StringEmitContext : public EmitContext
60 {
61     OStringBuffer m_aBuf;
62     public:
StringEmitContext()63     StringEmitContext() : EmitContext(), m_aBuf(256) {}
~StringEmitContext()64     virtual ~StringEmitContext() {}
write(const void * pBuf,unsigned int nLen)65     virtual bool write( const void* pBuf, unsigned int nLen ) throw()
66     {
67         m_aBuf.append( (const sal_Char*)pBuf, nLen );
68         return true;
69     }
getCurPos()70     virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
copyOrigBytes(unsigned int nOrigOffset,unsigned int nLen)71     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
72     { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
73              write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
readOrigBytes(unsigned int nOrigOffset,unsigned int nLen,void * pBuf)74     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
75     {
76         if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
77         {
78             rtl_copyMemory( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
79             return nLen;
80         }
81         return 0;
82     }
83 
getString()84     OString getString() { return m_aBuf.makeStringAndClear(); }
85 };
86 
87 template< class iteratorT >
88 class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
89 {
90 public:
91 
PDFGrammar(const iteratorT & first)92     PDFGrammar( const iteratorT& first )
93     : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
~PDFGrammar()94     ~PDFGrammar()
95     {
96         if( !m_aObjectStack.empty() )
97             delete m_aObjectStack.front();
98     }
99 
100     double m_fDouble;
101     std::vector< unsigned int > m_aUIntStack;
102     std::vector< PDFEntry* >    m_aObjectStack;
103     rtl::OString                m_aErrorString;
104     iteratorT                   m_aGlobalBegin;
105 
106 public:
107     struct pdf_string_parser
108     {
109         typedef nil_t result_t;
110         template <typename ScannerT>
111         std::ptrdiff_t
operator ()PDFGrammar::pdf_string_parser112         operator()(ScannerT const& scan, result_t& result) const
113         {
114             std::ptrdiff_t len = 0;
115 
116             int nBraceLevel = 0;
117             while( ! scan.at_end() )
118             {
119                 char c = *scan;
120                 if( c == ')' )
121                 {
122                     nBraceLevel--;
123                     if( nBraceLevel < 0 )
124                         break;
125                 }
126                 else if( c == '(' )
127                     nBraceLevel++;
128                 else if( c == '\\' ) // ignore escaped braces
129                 {
130                     ++len;
131                     ++scan;
132                     if( scan.at_end() )
133                         break;
134                 }
135                 ++len;
136                 ++scan;
137             }
138             return scan.at_end() ? -1 : len;
139         }
140     };
141 
142     template< typename ScannerT >
143     struct definition
144     {
definitionPDFGrammar::definition145         definition( const PDFGrammar<iteratorT>& rSelf )
146         {
147             PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
148 
149             // workaround workshop compiler: comment_p doesn't work
150             // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
151             comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
152 
153             boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
154 
155             // workaround workshop compiler: confix_p doesn't work
156             //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
157             stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
158 
159             name        = lexeme_d[
160                             ch_p('/')
161                             >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
162                                [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
163 
164             // workaround workshop compiler: confix_p doesn't work
165             //stringtype  = ( confix_p("(",*anychar_p, ")") |
166             //                confix_p("<",*xdigit_p,  ">") )
167             //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
168 
169             stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
170                             ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
171                           [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
172 
173             null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
174 
175             #ifdef USE_ASSIGN_ACTOR
176             objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
177                             >> uint_p[push_back_a(pSelf->m_aUIntStack)]
178                             >> ch_p('R')
179                             >> eps_p
180                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
181             #else
182             objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
183                             >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
184                             >> ch_p('R')
185                             >> eps_p
186                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
187             #endif
188 
189             #ifdef USE_ASSIGN_ACTOR
190             simple_type = objectref | name |
191                           ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
192                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
193                           | stringtype | boolean | null_object;
194             #else
195             simple_type = objectref | name |
196                           ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
197                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
198                           | stringtype | boolean | null_object;
199             #endif
200 
201             dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
202             dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
203 
204             array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
205             array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
206 
207             #ifdef USE_ASSIGN_ACTOR
208             object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
209                           >> uint_p[push_back_a(pSelf->m_aUIntStack)]
210                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
211             #else
212             object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
213                           >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
214                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
215             #endif
216             object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
217 
218             xref        = str_p( "xref" ) >> uint_p >> uint_p
219                           >> lexeme_d[
220                                 +( repeat_p(10)[digit_p]
221                                    >> blank_p
222                                    >> repeat_p(5)[digit_p]
223                                    >> blank_p
224                                    >> ( ch_p('n') | ch_p('f') )
225                                    >> repeat_p(2)[space_p]
226                                  ) ];
227 
228             dict_element= dict_begin | comment | simple_type
229                           | array_begin | array_end | dict_end;
230 
231             object      = object_begin
232                           >> *dict_element
233                           >> !stream
234                           >> object_end;
235 
236             trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
237                           >> *dict_element
238                           >> str_p("startxref")
239                           >> uint_p
240                           >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
241 
242             #ifdef USE_ASSIGN_ACTOR
243             pdfrule     = ! (lexeme_d[
244                                 str_p( "%PDF-" )
245                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
246                                 >> ch_p('.')
247                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
248                                 >> *((~ch_p('\r') & ~ch_p('\n')))
249                                 >> eol_p
250                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
251                           >> *( comment | object | ( xref >> trailer ) );
252             #else
253             pdfrule     = ! (lexeme_d[
254                                 str_p( "%PDF-" )
255                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
256                                 >> ch_p('.')
257                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
258                                 >> *((~ch_p('\r') & ~ch_p('\n')))
259                                 >> eol_p
260                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
261                           >> *( comment | object | ( xref >> trailer ) );
262             #endif
263         }
264         rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
265                          objectref, array, value, dict_element, dict_begin, dict_end,
266                          array_begin, array_end, object, object_begin, object_end,
267                          xref, trailer, pdfrule;
268 
startPDFGrammar::definition269         const rule< ScannerT >& start() const { return pdfrule; }
270     };
271 
272     #ifndef USE_ASSIGN_ACTOR
push_back_action_uint(unsigned int i)273     void push_back_action_uint( unsigned int i )
274     {
275         m_aUIntStack.push_back( i );
276     }
assign_action_double(double d)277     void assign_action_double( double d )
278     {
279         m_fDouble = d;
280     }
281     #endif
282 
parseError(const char * pMessage,iteratorT pLocation)283     void parseError( const char* pMessage, iteratorT pLocation )
284     {
285         throw_( pLocation, pMessage );
286     }
287 
iteratorToString(iteratorT first,iteratorT last) const288     rtl::OString iteratorToString( iteratorT first, iteratorT last ) const
289     {
290         rtl::OStringBuffer aStr( 32 );
291         while( first != last )
292         {
293             aStr.append( *first );
294             ++first;
295         }
296         return aStr.makeStringAndClear();
297     }
298 
haveFile(iteratorT pBegin,iteratorT)299     void haveFile( iteratorT pBegin, iteratorT /*pEnd*/ )
300     {
301         if( m_aObjectStack.empty() )
302         {
303             PDFFile* pFile = new PDFFile();
304             pFile->m_nMinor = m_aUIntStack.back();
305             m_aUIntStack.pop_back();
306             pFile->m_nMajor = m_aUIntStack.back();
307             m_aUIntStack.pop_back();
308             m_aObjectStack.push_back( pFile );
309         }
310         else
311             parseError( "found file header in unusual place", pBegin );
312     }
313 
pushComment(iteratorT first,iteratorT last)314     void pushComment( iteratorT first, iteratorT last )
315     {
316         // add a comment to the current stack element
317         PDFComment* pComment =
318             new PDFComment(iteratorToString(first,last));
319         if( m_aObjectStack.empty() )
320             m_aObjectStack.push_back( new PDFPart() );
321         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
322         if( pContainer == NULL )
323             parseError( "comment without container", first );
324         pContainer->m_aSubElements.push_back( pComment );
325     }
326 
insertNewValue(PDFEntry * pNewValue,iteratorT pPos)327     void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
328     {
329         PDFContainer* pContainer = NULL;
330         const char* pMsg = NULL;
331         if( ! m_aObjectStack.empty() &&
332             (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
333         {
334             if( dynamic_cast<PDFDict*>(pContainer) == NULL      &&
335                 dynamic_cast<PDFArray*>(pContainer) == NULL )
336             {
337                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
338                 if( pObj )
339                 {
340                     if( pObj->m_pObject == NULL )
341                         pObj->m_pObject = pNewValue;
342                     else
343                     {
344                         pMsg = "second value for object";
345                         pContainer = NULL;
346                     }
347                 }
348                 else if( dynamic_cast<PDFDict*>(pNewValue) )
349                 {
350                     PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
351                     if( pTrailer )
352                     {
353                         if( pTrailer->m_pDict == NULL )
354                             pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
355                         else
356                             pContainer = NULL;
357                     }
358                     else
359                         pContainer = NULL;
360                 }
361                 else
362                     pContainer = NULL;
363             }
364         }
365         if( pContainer )
366             pContainer->m_aSubElements.push_back( pNewValue );
367         else
368         {
369             if( ! pMsg )
370             {
371                 if( dynamic_cast<PDFContainer*>(pNewValue) )
372                     pMsg = "array without container";
373                 else
374                     pMsg = "value without container";
375             }
376             delete pNewValue;
377             parseError( pMsg, pPos );
378         }
379     }
380 
pushName(iteratorT first,iteratorT last)381     void pushName( iteratorT first, iteratorT last )
382     {
383         insertNewValue( new PDFName(iteratorToString(first,last)), first );
384     }
385 
pushDouble(iteratorT first,iteratorT)386     void pushDouble( iteratorT first, iteratorT /*last*/ )
387     {
388         insertNewValue( new PDFNumber(m_fDouble), first );
389     }
390 
pushString(iteratorT first,iteratorT last)391     void pushString( iteratorT first, iteratorT last )
392     {
393         insertNewValue( new PDFString(iteratorToString(first,last)), first );
394     }
395 
pushBool(iteratorT first,iteratorT last)396     void pushBool( iteratorT first, iteratorT last )
397     {
398         insertNewValue( new PDFBool( (last-first == 4) ), first );
399     }
400 
pushNull(iteratorT first,iteratorT)401     void pushNull( iteratorT first, iteratorT )
402     {
403         insertNewValue( new PDFNull(), first );
404     }
405 
406 
beginObject(iteratorT first,iteratorT)407     void beginObject( iteratorT first, iteratorT /*last*/ )
408     {
409         if( m_aObjectStack.empty() )
410             m_aObjectStack.push_back( new PDFPart() );
411 
412         unsigned int nGeneration = m_aUIntStack.back();
413         m_aUIntStack.pop_back();
414         unsigned int nObject = m_aUIntStack.back();
415         m_aUIntStack.pop_back();
416 
417         PDFObject* pObj = new PDFObject( nObject, nGeneration );
418         pObj->m_nOffset = first - m_aGlobalBegin;
419 
420         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
421         if( pContainer &&
422             ( dynamic_cast<PDFFile*>(pContainer) ||
423               dynamic_cast<PDFPart*>(pContainer) ) )
424         {
425             pContainer->m_aSubElements.push_back( pObj );
426             m_aObjectStack.push_back( pObj );
427         }
428         else
429             parseError( "object in wrong place", first );
430     }
431 
endObject(iteratorT first,iteratorT)432     void endObject( iteratorT first, iteratorT )
433     {
434         if( m_aObjectStack.empty() )
435             parseError( "endobj without obj", first );
436         else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
437             parseError( "spurious endobj", first );
438         else
439             m_aObjectStack.pop_back();
440     }
441 
pushObjectRef(iteratorT first,iteratorT)442     void pushObjectRef( iteratorT first, iteratorT )
443     {
444         unsigned int nGeneration = m_aUIntStack.back();
445         m_aUIntStack.pop_back();
446         unsigned int nObject = m_aUIntStack.back();
447         m_aUIntStack.pop_back();
448         insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
449     }
450 
beginDict(iteratorT first,iteratorT)451     void beginDict( iteratorT first, iteratorT )
452     {
453         PDFDict* pDict = new PDFDict();
454         pDict->m_nOffset = first - m_aGlobalBegin;
455 
456         insertNewValue( pDict, first );
457         // will not come here if insertion fails (exception)
458         m_aObjectStack.push_back( pDict );
459     }
endDict(iteratorT first,iteratorT)460     void endDict( iteratorT first, iteratorT )
461     {
462         PDFDict* pDict = NULL;
463         if( m_aObjectStack.empty() )
464             parseError( "dictionary end without begin", first );
465         else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
466             parseError( "spurious dictionary end", first );
467         else
468             m_aObjectStack.pop_back();
469 
470         PDFEntry* pOffender = pDict->buildMap();
471         if( pOffender )
472         {
473             StringEmitContext aCtx;
474             aCtx.write( "offending dictionary element: ", 30 );
475             pOffender->emit( aCtx );
476             m_aErrorString = aCtx.getString();
477             parseError( m_aErrorString.getStr(), first );
478         }
479     }
480 
beginArray(iteratorT first,iteratorT)481     void beginArray( iteratorT first, iteratorT )
482     {
483         PDFArray* pArray = new PDFArray();
484         pArray->m_nOffset = first - m_aGlobalBegin;
485 
486         insertNewValue( pArray, first );
487         // will not come here if insertion fails (exception)
488         m_aObjectStack.push_back( pArray );
489     }
490 
endArray(iteratorT first,iteratorT)491     void endArray( iteratorT first, iteratorT )
492     {
493         if( m_aObjectStack.empty() )
494             parseError( "array end without begin", first );
495         else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
496             parseError( "spurious array end", first );
497         else
498             m_aObjectStack.pop_back();
499     }
500 
emitStream(iteratorT first,iteratorT last)501     void emitStream( iteratorT first, iteratorT last )
502     {
503         if( m_aObjectStack.empty() )
504             parseError( "stream without object", first );
505         PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
506         if( pObj && pObj->m_pObject )
507         {
508             if( pObj->m_pStream )
509                 parseError( "multiple streams in object", first );
510 
511             PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
512             if( pDict )
513             {
514                 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
515 
516                 pObj->m_pStream = pStream;
517                 pObj->m_aSubElements.push_back( pStream );
518             }
519         }
520         else
521             parseError( "stream without object", first );
522     }
523 
beginTrailer(iteratorT first,iteratorT)524     void beginTrailer( iteratorT first, iteratorT )
525     {
526         if( m_aObjectStack.empty() )
527             m_aObjectStack.push_back( new PDFPart() );
528 
529         PDFTrailer* pTrailer = new PDFTrailer();
530         pTrailer->m_nOffset = first - m_aGlobalBegin;
531 
532         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
533         if( pContainer &&
534             ( dynamic_cast<PDFFile*>(pContainer) ||
535               dynamic_cast<PDFPart*>(pContainer) ) )
536         {
537             pContainer->m_aSubElements.push_back( pTrailer );
538             m_aObjectStack.push_back( pTrailer );
539         }
540         else
541             parseError( "trailer in wrong place", first );
542     }
543 
endTrailer(iteratorT first,iteratorT)544     void endTrailer( iteratorT first, iteratorT )
545     {
546         if( m_aObjectStack.empty() )
547             parseError( "%%EOF without trailer", first );
548         else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
549             parseError( "spurious %%EOF", first );
550         else
551             m_aObjectStack.pop_back();
552     }
553 };
554 
read(const char * pBuffer,unsigned int nLen)555 PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
556 {
557     PDFGrammar<const char*> aGrammar( pBuffer );
558 
559     try
560     {
561         boost::spirit::parse_info<const char*> aInfo =
562             boost::spirit::parse( pBuffer,
563                                   pBuffer+nLen,
564                                   aGrammar,
565                                   boost::spirit::space_p );
566         #if OSL_DEBUG_LEVEL > 1
567         fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n",
568                  aInfo.stop, pBuffer, aInfo.stop - pBuffer,
569                  aInfo.hit ? "true" : "false",
570                  aInfo.full ? "true" : "false",
571                  (int)aInfo.length );
572         #endif
573     }
574     catch( parser_error<const char*, const char*>& rError )
575     {
576         #if OSL_DEBUG_LEVEL > 1
577         fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
578                  rError.descriptor, rError.where - pBuffer );
579         unsigned int nElem = aGrammar.m_aObjectStack.size();
580         for( unsigned int i = 0; i < nElem; i++ )
581         {
582             fprintf( stderr, "   %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
583         }
584         #endif
585     }
586 
587     PDFEntry* pRet = NULL;
588     unsigned int nEntries = aGrammar.m_aObjectStack.size();
589     if( nEntries == 1 )
590     {
591         pRet = aGrammar.m_aObjectStack.back();
592         aGrammar.m_aObjectStack.pop_back();
593     }
594     #if OSL_DEBUG_LEVEL > 1
595     else if( nEntries > 1 )
596         fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
597     #endif
598 
599     return pRet;
600 }
601 
read(const char * pFileName)602 PDFEntry* PDFReader::read( const char* pFileName )
603 {
604     #ifdef WIN32
605     /* #i106583#
606        since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
607        C++ stdlib istream_iterator does not allow "-" apparently
608        using spirit 2.0 doesn't work in our environment with the MSC
609 
610        So for the time being bite the bullet and read the whole file.
611        FIXME: give Spirit 2.x another try when we upgrade boost again.
612     */
613     PDFEntry* pRet = NULL;
614     FILE* fp = fopen( pFileName, "rb" );
615     if( fp )
616     {
617         fseek( fp, 0, SEEK_END );
618         unsigned int nLen = (unsigned int)ftell( fp );
619         fseek( fp, 0, SEEK_SET );
620         char* pBuf = (char*)rtl_allocateMemory( nLen );
621         if( pBuf )
622         {
623             fread( pBuf, 1, nLen, fp );
624             pRet = read( pBuf, nLen );
625             rtl_freeMemory( pBuf );
626         }
627         fclose( fp );
628     }
629     return pRet;
630     #else
631     file_iterator<> file_start( pFileName );
632     if( ! file_start )
633         return NULL;
634     file_iterator<> file_end = file_start.make_end();
635     PDFGrammar< file_iterator<> > aGrammar( file_start );
636 
637     try
638     {
639         boost::spirit::parse_info< file_iterator<> > aInfo =
640             boost::spirit::parse( file_start,
641                                   file_end,
642                                   aGrammar,
643                                   boost::spirit::space_p );
644         #if OSL_DEBUG_LEVEL > 1
645         fprintf( stderr, "parseinfo: stop at offset = %d, hit = %s, full = %s, length = %d\n",
646                  aInfo.stop - file_start,
647                  aInfo.hit ? "true" : "false",
648                  aInfo.full ? "true" : "false",
649                  (int)aInfo.length );
650         #endif
651     }
652     catch( parser_error< const char*, file_iterator<> >& rError )
653     {
654         #if OSL_DEBUG_LEVEL > 1
655         fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
656                  rError.descriptor, rError.where - file_start );
657         unsigned int nElem = aGrammar.m_aObjectStack.size();
658         for( unsigned int i = 0; i < nElem; i++ )
659         {
660             fprintf( stderr, "   %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
661         }
662         #endif
663     }
664 
665     PDFEntry* pRet = NULL;
666     unsigned int nEntries = aGrammar.m_aObjectStack.size();
667     if( nEntries == 1 )
668     {
669         pRet = aGrammar.m_aObjectStack.back();
670         aGrammar.m_aObjectStack.pop_back();
671     }
672     #if OSL_DEBUG_LEVEL > 1
673     else if( nEntries > 1 )
674     {
675         fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
676         for( unsigned int i = 0; i < nEntries; i++ )
677         {
678             fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
679             PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
680             if( pObj )
681                 fprintf( stderr, "   -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration );
682             else
683                 fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
684         }
685     }
686     #endif
687     return pRet;
688     #endif // WIN32
689 }
690 
691 #if defined __SUNPRO_CC
692 #pragma enable_warn
693 #elif defined _MSC_VER
694 #pragma warning(pop)
695 #endif
696 
697 
698