1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sdext.hxx"
26
27 #if defined __SUNPRO_CC
28 #pragma disable_warn
29 #elif defined _MSC_VER
30 #pragma warning(push, 1)
31 #endif
32
33 #include "pdfparse.hxx"
34
35 // workaround windows compiler: do not include multi_pass.hpp
36 //#include <boost/spirit.hpp>
37 #include <boost/spirit/include/classic_core.hpp>
38 #include <boost/spirit/include/classic_utility.hpp>
39 #include <boost/spirit/include/classic_error_handling.hpp>
40 #include <boost/spirit/include/classic_file_iterator.hpp>
41 #include <boost/bind.hpp>
42 #include <string>
43
44 #include <rtl/strbuf.hxx>
45 #include <rtl/memory.h>
46 #include <rtl/alloc.h>
47
48 // disable warnings again because someone along the line has enabled them
49 #if defined __SUNPRO_CC
50 #pragma disable_warn
51 #elif defined _MSC_VER
52 #pragma warning(push, 1)
53 #endif
54
55 using namespace boost::spirit;
56 using namespace rtl;
57 using namespace pdfparse;
58
59 class StringEmitContext : public EmitContext
60 {
61 OStringBuffer m_aBuf;
62 public:
StringEmitContext()63 StringEmitContext() : EmitContext(), m_aBuf(256) {}
~StringEmitContext()64 virtual ~StringEmitContext() {}
write(const void * pBuf,unsigned int nLen)65 virtual bool write( const void* pBuf, unsigned int nLen ) throw()
66 {
67 m_aBuf.append( (const sal_Char*)pBuf, nLen );
68 return true;
69 }
getCurPos()70 virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
copyOrigBytes(unsigned int nOrigOffset,unsigned int nLen)71 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
72 { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
73 write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
readOrigBytes(unsigned int nOrigOffset,unsigned int nLen,void * pBuf)74 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
75 {
76 if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
77 {
78 rtl_copyMemory( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
79 return nLen;
80 }
81 return 0;
82 }
83
getString()84 OString getString() { return m_aBuf.makeStringAndClear(); }
85 };
86
87 template< class iteratorT >
88 class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
89 {
90 public:
91
PDFGrammar(const iteratorT & first)92 PDFGrammar( const iteratorT& first )
93 : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
~PDFGrammar()94 ~PDFGrammar()
95 {
96 if( !m_aObjectStack.empty() )
97 delete m_aObjectStack.front();
98 }
99
100 double m_fDouble;
101 std::vector< unsigned int > m_aUIntStack;
102 std::vector< PDFEntry* > m_aObjectStack;
103 rtl::OString m_aErrorString;
104 iteratorT m_aGlobalBegin;
105
106 public:
107 struct pdf_string_parser
108 {
109 typedef nil_t result_t;
110 template <typename ScannerT>
111 std::ptrdiff_t
operator ()PDFGrammar::pdf_string_parser112 operator()(ScannerT const& scan, result_t& result) const
113 {
114 std::ptrdiff_t len = 0;
115
116 int nBraceLevel = 0;
117 while( ! scan.at_end() )
118 {
119 char c = *scan;
120 if( c == ')' )
121 {
122 nBraceLevel--;
123 if( nBraceLevel < 0 )
124 break;
125 }
126 else if( c == '(' )
127 nBraceLevel++;
128 else if( c == '\\' ) // ignore escaped braces
129 {
130 ++len;
131 ++scan;
132 if( scan.at_end() )
133 break;
134 }
135 ++len;
136 ++scan;
137 }
138 return scan.at_end() ? -1 : len;
139 }
140 };
141
142 template< typename ScannerT >
143 struct definition
144 {
definitionPDFGrammar::definition145 definition( const PDFGrammar<iteratorT>& rSelf )
146 {
147 PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
148
149 // workaround workshop compiler: comment_p doesn't work
150 // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
151 comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
152
153 boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
154
155 // workaround workshop compiler: confix_p doesn't work
156 //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
157 stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
158
159 name = lexeme_d[
160 ch_p('/')
161 >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
162 [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
163
164 // workaround workshop compiler: confix_p doesn't work
165 //stringtype = ( confix_p("(",*anychar_p, ")") |
166 // confix_p("<",*xdigit_p, ">") )
167 // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
168
169 stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
170 ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
171 [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
172
173 null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
174
175 #ifdef USE_ASSIGN_ACTOR
176 objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
177 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
178 >> ch_p('R')
179 >> eps_p
180 )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
181 #else
182 objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
183 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
184 >> ch_p('R')
185 >> eps_p
186 )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
187 #endif
188
189 #ifdef USE_ASSIGN_ACTOR
190 simple_type = objectref | name |
191 ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
192 [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
193 | stringtype | boolean | null_object;
194 #else
195 simple_type = objectref | name |
196 ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
197 [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
198 | stringtype | boolean | null_object;
199 #endif
200
201 dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
202 dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
203
204 array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
205 array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
206
207 #ifdef USE_ASSIGN_ACTOR
208 object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
209 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
210 >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
211 #else
212 object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
213 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
214 >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
215 #endif
216 object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
217
218 xref = str_p( "xref" ) >> uint_p >> uint_p
219 >> lexeme_d[
220 +( repeat_p(10)[digit_p]
221 >> blank_p
222 >> repeat_p(5)[digit_p]
223 >> blank_p
224 >> ( ch_p('n') | ch_p('f') )
225 >> repeat_p(2)[space_p]
226 ) ];
227
228 dict_element= dict_begin | comment | simple_type
229 | array_begin | array_end | dict_end;
230
231 object = object_begin
232 >> *dict_element
233 >> !stream
234 >> object_end;
235
236 trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
237 >> *dict_element
238 >> str_p("startxref")
239 >> uint_p
240 >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
241
242 #ifdef USE_ASSIGN_ACTOR
243 pdfrule = ! (lexeme_d[
244 str_p( "%PDF-" )
245 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
246 >> ch_p('.')
247 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
248 >> *((~ch_p('\r') & ~ch_p('\n')))
249 >> eol_p
250 ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
251 >> *( comment | object | ( xref >> trailer ) );
252 #else
253 pdfrule = ! (lexeme_d[
254 str_p( "%PDF-" )
255 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
256 >> ch_p('.')
257 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
258 >> *((~ch_p('\r') & ~ch_p('\n')))
259 >> eol_p
260 ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
261 >> *( comment | object | ( xref >> trailer ) );
262 #endif
263 }
264 rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
265 objectref, array, value, dict_element, dict_begin, dict_end,
266 array_begin, array_end, object, object_begin, object_end,
267 xref, trailer, pdfrule;
268
startPDFGrammar::definition269 const rule< ScannerT >& start() const { return pdfrule; }
270 };
271
272 #ifndef USE_ASSIGN_ACTOR
push_back_action_uint(unsigned int i)273 void push_back_action_uint( unsigned int i )
274 {
275 m_aUIntStack.push_back( i );
276 }
assign_action_double(double d)277 void assign_action_double( double d )
278 {
279 m_fDouble = d;
280 }
281 #endif
282
parseError(const char * pMessage,iteratorT pLocation)283 void parseError( const char* pMessage, iteratorT pLocation )
284 {
285 throw_( pLocation, pMessage );
286 }
287
iteratorToString(iteratorT first,iteratorT last) const288 rtl::OString iteratorToString( iteratorT first, iteratorT last ) const
289 {
290 rtl::OStringBuffer aStr( 32 );
291 while( first != last )
292 {
293 aStr.append( *first );
294 ++first;
295 }
296 return aStr.makeStringAndClear();
297 }
298
haveFile(iteratorT pBegin,iteratorT)299 void haveFile( iteratorT pBegin, iteratorT /*pEnd*/ )
300 {
301 if( m_aObjectStack.empty() )
302 {
303 PDFFile* pFile = new PDFFile();
304 pFile->m_nMinor = m_aUIntStack.back();
305 m_aUIntStack.pop_back();
306 pFile->m_nMajor = m_aUIntStack.back();
307 m_aUIntStack.pop_back();
308 m_aObjectStack.push_back( pFile );
309 }
310 else
311 parseError( "found file header in unusual place", pBegin );
312 }
313
pushComment(iteratorT first,iteratorT last)314 void pushComment( iteratorT first, iteratorT last )
315 {
316 // add a comment to the current stack element
317 PDFComment* pComment =
318 new PDFComment(iteratorToString(first,last));
319 if( m_aObjectStack.empty() )
320 m_aObjectStack.push_back( new PDFPart() );
321 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
322 if( pContainer == NULL )
323 parseError( "comment without container", first );
324 pContainer->m_aSubElements.push_back( pComment );
325 }
326
insertNewValue(PDFEntry * pNewValue,iteratorT pPos)327 void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
328 {
329 PDFContainer* pContainer = NULL;
330 const char* pMsg = NULL;
331 if( ! m_aObjectStack.empty() &&
332 (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
333 {
334 if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
335 dynamic_cast<PDFArray*>(pContainer) == NULL )
336 {
337 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
338 if( pObj )
339 {
340 if( pObj->m_pObject == NULL )
341 pObj->m_pObject = pNewValue;
342 else
343 {
344 pMsg = "second value for object";
345 pContainer = NULL;
346 }
347 }
348 else if( dynamic_cast<PDFDict*>(pNewValue) )
349 {
350 PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
351 if( pTrailer )
352 {
353 if( pTrailer->m_pDict == NULL )
354 pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
355 else
356 pContainer = NULL;
357 }
358 else
359 pContainer = NULL;
360 }
361 else
362 pContainer = NULL;
363 }
364 }
365 if( pContainer )
366 pContainer->m_aSubElements.push_back( pNewValue );
367 else
368 {
369 if( ! pMsg )
370 {
371 if( dynamic_cast<PDFContainer*>(pNewValue) )
372 pMsg = "array without container";
373 else
374 pMsg = "value without container";
375 }
376 delete pNewValue;
377 parseError( pMsg, pPos );
378 }
379 }
380
pushName(iteratorT first,iteratorT last)381 void pushName( iteratorT first, iteratorT last )
382 {
383 insertNewValue( new PDFName(iteratorToString(first,last)), first );
384 }
385
pushDouble(iteratorT first,iteratorT)386 void pushDouble( iteratorT first, iteratorT /*last*/ )
387 {
388 insertNewValue( new PDFNumber(m_fDouble), first );
389 }
390
pushString(iteratorT first,iteratorT last)391 void pushString( iteratorT first, iteratorT last )
392 {
393 insertNewValue( new PDFString(iteratorToString(first,last)), first );
394 }
395
pushBool(iteratorT first,iteratorT last)396 void pushBool( iteratorT first, iteratorT last )
397 {
398 insertNewValue( new PDFBool( (last-first == 4) ), first );
399 }
400
pushNull(iteratorT first,iteratorT)401 void pushNull( iteratorT first, iteratorT )
402 {
403 insertNewValue( new PDFNull(), first );
404 }
405
406
beginObject(iteratorT first,iteratorT)407 void beginObject( iteratorT first, iteratorT /*last*/ )
408 {
409 if( m_aObjectStack.empty() )
410 m_aObjectStack.push_back( new PDFPart() );
411
412 unsigned int nGeneration = m_aUIntStack.back();
413 m_aUIntStack.pop_back();
414 unsigned int nObject = m_aUIntStack.back();
415 m_aUIntStack.pop_back();
416
417 PDFObject* pObj = new PDFObject( nObject, nGeneration );
418 pObj->m_nOffset = first - m_aGlobalBegin;
419
420 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
421 if( pContainer &&
422 ( dynamic_cast<PDFFile*>(pContainer) ||
423 dynamic_cast<PDFPart*>(pContainer) ) )
424 {
425 pContainer->m_aSubElements.push_back( pObj );
426 m_aObjectStack.push_back( pObj );
427 }
428 else
429 parseError( "object in wrong place", first );
430 }
431
endObject(iteratorT first,iteratorT)432 void endObject( iteratorT first, iteratorT )
433 {
434 if( m_aObjectStack.empty() )
435 parseError( "endobj without obj", first );
436 else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
437 parseError( "spurious endobj", first );
438 else
439 m_aObjectStack.pop_back();
440 }
441
pushObjectRef(iteratorT first,iteratorT)442 void pushObjectRef( iteratorT first, iteratorT )
443 {
444 unsigned int nGeneration = m_aUIntStack.back();
445 m_aUIntStack.pop_back();
446 unsigned int nObject = m_aUIntStack.back();
447 m_aUIntStack.pop_back();
448 insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
449 }
450
beginDict(iteratorT first,iteratorT)451 void beginDict( iteratorT first, iteratorT )
452 {
453 PDFDict* pDict = new PDFDict();
454 pDict->m_nOffset = first - m_aGlobalBegin;
455
456 insertNewValue( pDict, first );
457 // will not come here if insertion fails (exception)
458 m_aObjectStack.push_back( pDict );
459 }
endDict(iteratorT first,iteratorT)460 void endDict( iteratorT first, iteratorT )
461 {
462 PDFDict* pDict = NULL;
463 if( m_aObjectStack.empty() )
464 parseError( "dictionary end without begin", first );
465 else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
466 parseError( "spurious dictionary end", first );
467 else
468 m_aObjectStack.pop_back();
469
470 PDFEntry* pOffender = pDict->buildMap();
471 if( pOffender )
472 {
473 StringEmitContext aCtx;
474 aCtx.write( "offending dictionary element: ", 30 );
475 pOffender->emit( aCtx );
476 m_aErrorString = aCtx.getString();
477 parseError( m_aErrorString.getStr(), first );
478 }
479 }
480
beginArray(iteratorT first,iteratorT)481 void beginArray( iteratorT first, iteratorT )
482 {
483 PDFArray* pArray = new PDFArray();
484 pArray->m_nOffset = first - m_aGlobalBegin;
485
486 insertNewValue( pArray, first );
487 // will not come here if insertion fails (exception)
488 m_aObjectStack.push_back( pArray );
489 }
490
endArray(iteratorT first,iteratorT)491 void endArray( iteratorT first, iteratorT )
492 {
493 if( m_aObjectStack.empty() )
494 parseError( "array end without begin", first );
495 else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
496 parseError( "spurious array end", first );
497 else
498 m_aObjectStack.pop_back();
499 }
500
emitStream(iteratorT first,iteratorT last)501 void emitStream( iteratorT first, iteratorT last )
502 {
503 if( m_aObjectStack.empty() )
504 parseError( "stream without object", first );
505 PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
506 if( pObj && pObj->m_pObject )
507 {
508 if( pObj->m_pStream )
509 parseError( "multiple streams in object", first );
510
511 PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
512 if( pDict )
513 {
514 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
515
516 pObj->m_pStream = pStream;
517 pObj->m_aSubElements.push_back( pStream );
518 }
519 }
520 else
521 parseError( "stream without object", first );
522 }
523
beginTrailer(iteratorT first,iteratorT)524 void beginTrailer( iteratorT first, iteratorT )
525 {
526 if( m_aObjectStack.empty() )
527 m_aObjectStack.push_back( new PDFPart() );
528
529 PDFTrailer* pTrailer = new PDFTrailer();
530 pTrailer->m_nOffset = first - m_aGlobalBegin;
531
532 PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
533 if( pContainer &&
534 ( dynamic_cast<PDFFile*>(pContainer) ||
535 dynamic_cast<PDFPart*>(pContainer) ) )
536 {
537 pContainer->m_aSubElements.push_back( pTrailer );
538 m_aObjectStack.push_back( pTrailer );
539 }
540 else
541 parseError( "trailer in wrong place", first );
542 }
543
endTrailer(iteratorT first,iteratorT)544 void endTrailer( iteratorT first, iteratorT )
545 {
546 if( m_aObjectStack.empty() )
547 parseError( "%%EOF without trailer", first );
548 else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
549 parseError( "spurious %%EOF", first );
550 else
551 m_aObjectStack.pop_back();
552 }
553 };
554
read(const char * pBuffer,unsigned int nLen)555 PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
556 {
557 PDFGrammar<const char*> aGrammar( pBuffer );
558
559 try
560 {
561 boost::spirit::parse_info<const char*> aInfo =
562 boost::spirit::parse( pBuffer,
563 pBuffer+nLen,
564 aGrammar,
565 boost::spirit::space_p );
566 #if OSL_DEBUG_LEVEL > 1
567 fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n",
568 aInfo.stop, pBuffer, aInfo.stop - pBuffer,
569 aInfo.hit ? "true" : "false",
570 aInfo.full ? "true" : "false",
571 (int)aInfo.length );
572 #endif
573 }
574 catch( parser_error<const char*, const char*>& rError )
575 {
576 #if OSL_DEBUG_LEVEL > 1
577 fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
578 rError.descriptor, rError.where - pBuffer );
579 unsigned int nElem = aGrammar.m_aObjectStack.size();
580 for( unsigned int i = 0; i < nElem; i++ )
581 {
582 fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
583 }
584 #endif
585 }
586
587 PDFEntry* pRet = NULL;
588 unsigned int nEntries = aGrammar.m_aObjectStack.size();
589 if( nEntries == 1 )
590 {
591 pRet = aGrammar.m_aObjectStack.back();
592 aGrammar.m_aObjectStack.pop_back();
593 }
594 #if OSL_DEBUG_LEVEL > 1
595 else if( nEntries > 1 )
596 fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
597 #endif
598
599 return pRet;
600 }
601
read(const char * pFileName)602 PDFEntry* PDFReader::read( const char* pFileName )
603 {
604 #ifdef WIN32
605 /* #i106583#
606 since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
607 C++ stdlib istream_iterator does not allow "-" apparently
608 using spirit 2.0 doesn't work in our environment with the MSC
609
610 So for the time being bite the bullet and read the whole file.
611 FIXME: give Spirit 2.x another try when we upgrade boost again.
612 */
613 PDFEntry* pRet = NULL;
614 FILE* fp = fopen( pFileName, "rb" );
615 if( fp )
616 {
617 fseek( fp, 0, SEEK_END );
618 unsigned int nLen = (unsigned int)ftell( fp );
619 fseek( fp, 0, SEEK_SET );
620 char* pBuf = (char*)rtl_allocateMemory( nLen );
621 if( pBuf )
622 {
623 fread( pBuf, 1, nLen, fp );
624 pRet = read( pBuf, nLen );
625 rtl_freeMemory( pBuf );
626 }
627 fclose( fp );
628 }
629 return pRet;
630 #else
631 file_iterator<> file_start( pFileName );
632 if( ! file_start )
633 return NULL;
634 file_iterator<> file_end = file_start.make_end();
635 PDFGrammar< file_iterator<> > aGrammar( file_start );
636
637 try
638 {
639 boost::spirit::parse_info< file_iterator<> > aInfo =
640 boost::spirit::parse( file_start,
641 file_end,
642 aGrammar,
643 boost::spirit::space_p );
644 #if OSL_DEBUG_LEVEL > 1
645 fprintf( stderr, "parseinfo: stop at offset = %d, hit = %s, full = %s, length = %d\n",
646 aInfo.stop - file_start,
647 aInfo.hit ? "true" : "false",
648 aInfo.full ? "true" : "false",
649 (int)aInfo.length );
650 #endif
651 }
652 catch( parser_error< const char*, file_iterator<> >& rError )
653 {
654 #if OSL_DEBUG_LEVEL > 1
655 fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
656 rError.descriptor, rError.where - file_start );
657 unsigned int nElem = aGrammar.m_aObjectStack.size();
658 for( unsigned int i = 0; i < nElem; i++ )
659 {
660 fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
661 }
662 #endif
663 }
664
665 PDFEntry* pRet = NULL;
666 unsigned int nEntries = aGrammar.m_aObjectStack.size();
667 if( nEntries == 1 )
668 {
669 pRet = aGrammar.m_aObjectStack.back();
670 aGrammar.m_aObjectStack.pop_back();
671 }
672 #if OSL_DEBUG_LEVEL > 1
673 else if( nEntries > 1 )
674 {
675 fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
676 for( unsigned int i = 0; i < nEntries; i++ )
677 {
678 fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
679 PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
680 if( pObj )
681 fprintf( stderr, " -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration );
682 else
683 fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
684 }
685 }
686 #endif
687 return pRet;
688 #endif // WIN32
689 }
690
691 #if defined __SUNPRO_CC
692 #pragma enable_warn
693 #elif defined _MSC_VER
694 #pragma warning(pop)
695 #endif
696
697
698