1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 //#include <stdlib.h> 29 //#include <sal/alloca.h> 30 31 #include <boost/scoped_ptr.hpp> 32 33 #include <osl/diagnose.h> 34 #include <rtl/ustrbuf.hxx> 35 36 #include <com/sun/star/lang/DisposedException.hpp> 37 #include <com/sun/star/xml/sax/XFastContextHandler.hpp> 38 #include <com/sun/star/xml/sax/SAXParseException.hpp> 39 #include <com/sun/star/xml/sax/FastToken.hpp> 40 41 #include "fastparser.hxx" 42 43 #include <string.h> 44 45 using ::rtl::OString; 46 using ::rtl::OUString; 47 using ::rtl::OUStringBuffer; 48 using namespace ::std; 49 using namespace ::osl; 50 using namespace ::cppu; 51 using namespace ::com::sun::star::uno; 52 using namespace ::com::sun::star::lang; 53 using namespace ::com::sun::star::xml::sax; 54 //using namespace ::com::sun::star::util; 55 using namespace ::com::sun::star::io; 56 57 namespace sax_fastparser { 58 59 // -------------------------------------------------------------------- 60 61 struct SaxContextImpl 62 { 63 Reference< XFastContextHandler > mxContext; 64 sal_uInt32 mnNamespaceCount; 65 sal_Int32 mnElementToken; 66 OUString maNamespace; 67 OUString maElementName; 68 69 SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; } 70 SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; } 71 }; 72 73 // -------------------------------------------------------------------- 74 75 struct NamespaceDefine 76 { 77 OString maPrefix; 78 sal_Int32 mnToken; 79 OUString maNamespaceURL; 80 81 NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {} 82 }; 83 84 // -------------------------------------------------------------------- 85 // FastLocatorImpl 86 // -------------------------------------------------------------------- 87 88 class FastSaxParser; 89 90 class FastLocatorImpl : public WeakImplHelper1< XLocator > 91 { 92 public: 93 FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {} 94 95 void dispose() { mpParser = 0; } 96 void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); } 97 98 //XLocator 99 virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException); 100 virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException); 101 virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException); 102 virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException); 103 104 private: 105 FastSaxParser *mpParser; 106 }; 107 108 // -------------------------------------------------------------------- 109 // FastSaxParser 110 // -------------------------------------------------------------------- 111 112 //--------------------------------------------- 113 // the implementation part 114 //--------------------------------------------- 115 116 extern "C" { 117 118 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts) 119 { 120 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 121 pFastParser->callbackStartElement( name, atts ); 122 } 123 124 static void call_callbackEndElement(void *userData, const XML_Char *name) 125 { 126 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 127 pFastParser->callbackEndElement( name ); 128 } 129 130 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen ) 131 { 132 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData ); 133 pFastParser->callbackCharacters( s, nLen ); 134 } 135 136 static int call_callbackExternalEntityRef( XML_Parser parser, 137 const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId ) 138 { 139 FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) ); 140 return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId ); 141 } 142 143 } // extern "C" 144 145 // -------------------------------------------------------------------- 146 // FastLocatorImpl implementation 147 // -------------------------------------------------------------------- 148 149 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException) 150 { 151 checkDispose(); 152 return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser ); 153 } 154 155 // -------------------------------------------------------------------- 156 157 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException) 158 { 159 checkDispose(); 160 return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser ); 161 } 162 163 // -------------------------------------------------------------------- 164 165 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException) 166 { 167 checkDispose(); 168 return mpParser->getEntity().maStructSource.sPublicId; 169 } 170 // -------------------------------------------------------------------- 171 172 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException) 173 { 174 checkDispose(); 175 return mpParser->getEntity().maStructSource.sSystemId; 176 } 177 178 // -------------------------------------------------------------------- 179 180 ParserData::ParserData() 181 { 182 } 183 184 ParserData::~ParserData() 185 { 186 } 187 188 // -------------------------------------------------------------------- 189 190 Entity::Entity( const ParserData& rData ) : 191 ParserData( rData ) 192 { 193 // performance-Improvment. Reference is needed when calling the startTag callback. 194 // Handing out the same object with every call is allowed (see sax-specification) 195 mxAttributes.set( new FastAttributeList( mxTokenHandler ) ); 196 } 197 198 Entity::~Entity() 199 { 200 } 201 202 // -------------------------------------------------------------------- 203 // FastSaxParser implementation 204 // -------------------------------------------------------------------- 205 206 FastSaxParser::FastSaxParser() 207 { 208 mxDocumentLocator.set( new FastLocatorImpl( this ) ); 209 } 210 211 // -------------------------------------------------------------------- 212 213 FastSaxParser::~FastSaxParser() 214 { 215 if( mxDocumentLocator.is() ) 216 mxDocumentLocator->dispose(); 217 } 218 219 // -------------------------------------------------------------------- 220 221 void FastSaxParser::pushContext() 222 { 223 Entity& rEntity = getEntity(); 224 if( rEntity.maContextStack.empty() ) 225 { 226 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) ); 227 DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace"); 228 } 229 else 230 { 231 rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) ); 232 } 233 } 234 235 // -------------------------------------------------------------------- 236 237 void FastSaxParser::popContext() 238 { 239 Entity& rEntity = getEntity(); 240 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" ); 241 if( !rEntity.maContextStack.empty() ) 242 rEntity.maContextStack.pop(); 243 } 244 245 // -------------------------------------------------------------------- 246 247 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL ) 248 { 249 Entity& rEntity = getEntity(); 250 OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" ); 251 if( !rEntity.maContextStack.empty() ) 252 { 253 sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++; 254 255 if( rEntity.maNamespaceDefines.size() <= nOffset ) 256 rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 ); 257 258 const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 ); 259 rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) ); 260 } 261 } 262 263 // -------------------------------------------------------------------- 264 265 sal_Int32 FastSaxParser::GetToken( const OString& rToken ) 266 { 267 Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() ); 268 269 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 270 } 271 272 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ ) 273 { 274 if( !nLen ) 275 nLen = strlen( pToken ); 276 277 Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen ); 278 279 return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq ); 280 } 281 282 // -------------------------------------------------------------------- 283 284 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException) 285 { 286 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 287 288 Entity& rEntity = getEntity(); 289 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 290 while( nNamespace-- ) 291 { 292 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 293 { 294 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 295 break; 296 } 297 298 if( !nNamespace ) 299 throw SAXException(); // prefix that has no defined namespace url 300 } 301 302 if( nNamespaceToken != FastToken::DONTKNOW ) 303 { 304 sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() ); 305 if( nNameToken != FastToken::DONTKNOW ) 306 return nNamespaceToken | nNameToken; 307 } 308 309 return FastToken::DONTKNOW; 310 } 311 312 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException) 313 { 314 sal_Int32 nNamespaceToken = FastToken::DONTKNOW; 315 316 Entity& rEntity = getEntity(); 317 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 318 while( nNamespace-- ) 319 { 320 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 321 if( (rPrefix.getLength() == nPrefixLen) && 322 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 323 { 324 nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken; 325 break; 326 } 327 328 if( !nNamespace ) 329 throw SAXException(); // prefix that has no defined namespace url 330 } 331 332 if( nNamespaceToken != FastToken::DONTKNOW ) 333 { 334 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 335 if( nNameToken != FastToken::DONTKNOW ) 336 return nNamespaceToken | nNameToken; 337 } 338 339 return FastToken::DONTKNOW; 340 } 341 342 // -------------------------------------------------------------------- 343 344 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL ) 345 { 346 NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) ); 347 if( aIter != maNamespaceMap.end() ) 348 return (*aIter).second; 349 else 350 return FastToken::DONTKNOW; 351 } 352 353 // -------------------------------------------------------------------- 354 355 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException) 356 { 357 Entity& rEntity = getEntity(); 358 if( !rEntity.maContextStack.empty() ) 359 { 360 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 361 while( nNamespace-- ) 362 if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix ) 363 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 364 } 365 366 throw SAXException(); // prefix that has no defined namespace url 367 } 368 369 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException) 370 { 371 Entity& rEntity = getEntity(); 372 if( pPrefix && !rEntity.maContextStack.empty() ) 373 { 374 sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount; 375 while( nNamespace-- ) 376 { 377 const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix ); 378 if( (rPrefix.getLength() == nPrefixLen) && 379 (strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) ) 380 { 381 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL; 382 } 383 } 384 } 385 386 throw SAXException(); // prefix that has no defined namespace url 387 } 388 389 // -------------------------------------------------------------------- 390 391 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen ) 392 { 393 sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL ); 394 395 if( nNamespaceToken != FastToken::DONTKNOW ) 396 { 397 sal_Int32 nNameToken = GetToken( pName, nNameLen ); 398 if( nNameToken != FastToken::DONTKNOW ) 399 return nNamespaceToken | nNameToken; 400 } 401 402 return FastToken::DONTKNOW; 403 } 404 405 // -------------------------------------------------------------------- 406 407 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen ) 408 { 409 XML_Char *p; 410 for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ ) 411 { 412 if( *p == ':' ) 413 { 414 rPrefixLen = p - pwName; 415 rNameLen = 0; 416 } 417 else 418 { 419 rNameLen++; 420 } 421 } 422 if( rPrefixLen ) 423 { 424 rpPrefix = pwName; 425 rpName = &pwName[ rPrefixLen + 1 ]; 426 } 427 else 428 { 429 rpPrefix = 0; 430 rpName = pwName; 431 } 432 } 433 434 /*************** 435 * 436 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does 437 * the file-specific initialization work. (During a parser run, external files may be opened) 438 * 439 ****************/ 440 void FastSaxParser::parseStream( const InputSource& maStructSource) throw (SAXException, IOException, RuntimeException) 441 { 442 // Only one text at one time 443 MutexGuard guard( maMutex ); 444 445 Entity entity( maData ); 446 entity.maStructSource = maStructSource; 447 448 if( !entity.maStructSource.aInputStream.is() ) 449 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() ); 450 451 entity.maConverter.setInputStream( entity.maStructSource.aInputStream ); 452 if( entity.maStructSource.sEncoding.getLength() ) 453 entity.maConverter.setEncoding( OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) ); 454 455 // create parser with proper encoding 456 entity.mpParser = XML_ParserCreate( 0 ); 457 if( !entity.mpParser ) 458 throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() ); 459 460 // set all necessary C-Callbacks 461 XML_SetUserData( entity.mpParser, this ); 462 XML_SetElementHandler( entity.mpParser, call_callbackStartElement, call_callbackEndElement ); 463 XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters ); 464 XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef ); 465 466 pushEntity( entity ); 467 try 468 { 469 // start the document 470 if( entity.mxDocumentHandler.is() ) 471 { 472 Reference< XLocator > xLoc( mxDocumentLocator.get() ); 473 entity.mxDocumentHandler->setDocumentLocator( xLoc ); 474 entity.mxDocumentHandler->startDocument(); 475 } 476 477 parse(); 478 479 // finish document 480 if( entity.mxDocumentHandler.is() ) 481 { 482 entity.mxDocumentHandler->endDocument(); 483 } 484 } 485 catch( SAXException & ) 486 { 487 popEntity(); 488 XML_ParserFree( entity.mpParser ); 489 throw; 490 } 491 catch( IOException & ) 492 { 493 popEntity(); 494 XML_ParserFree( entity.mpParser ); 495 throw; 496 } 497 catch( RuntimeException & ) 498 { 499 popEntity(); 500 XML_ParserFree( entity.mpParser ); 501 throw; 502 } 503 504 popEntity(); 505 XML_ParserFree( entity.mpParser ); 506 } 507 508 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException) 509 { 510 maData.mxDocumentHandler = Handler; 511 } 512 513 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException) 514 { 515 maData.mxTokenHandler = Handler; 516 } 517 518 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException) 519 { 520 if( NamespaceToken >= FastToken::NAMESPACE ) 521 { 522 if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW ) 523 { 524 maNamespaceMap[ NamespaceURL ] = NamespaceToken; 525 return; 526 } 527 } 528 throw IllegalArgumentException(); 529 } 530 531 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException) 532 { 533 maData.mxErrorHandler = Handler; 534 } 535 536 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException) 537 { 538 maData.mxEntityResolver = Resolver; 539 } 540 541 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException) 542 { 543 maData.maLocale = Locale; 544 } 545 546 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void) 547 { 548 Sequence<OUString> aRet(1); 549 aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) ); 550 return aRet; 551 } 552 553 // XServiceInfo 554 OUString FastSaxParser::getImplementationName() throw (RuntimeException) 555 { 556 return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME ); 557 } 558 559 // XServiceInfo 560 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException) 561 { 562 Sequence< OUString > aSNL = getSupportedServiceNames(); 563 const OUString * pArray = aSNL.getConstArray(); 564 565 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ ) 566 if( pArray[i] == ServiceName ) 567 return sal_True; 568 569 return sal_False; 570 } 571 572 // XServiceInfo 573 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException) 574 { 575 576 Sequence<OUString> seq(1); 577 seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME ); 578 return seq; 579 } 580 581 582 /*--------------------------------------- 583 * 584 * Helper functions and classes 585 * 586 *-------------------------------------------*/ 587 588 namespace { 589 590 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine ) 591 { 592 const sal_Char* pMessage = ""; 593 switch( xmlE ) 594 { 595 case XML_ERROR_NONE: pMessage = "No"; break; 596 case XML_ERROR_NO_MEMORY: pMessage = "no memory"; break; 597 case XML_ERROR_SYNTAX: pMessage = "syntax"; break; 598 case XML_ERROR_NO_ELEMENTS: pMessage = "no elements"; break; 599 case XML_ERROR_INVALID_TOKEN: pMessage = "invalid token"; break; 600 case XML_ERROR_UNCLOSED_TOKEN: pMessage = "unclosed token"; break; 601 case XML_ERROR_PARTIAL_CHAR: pMessage = "partial char"; break; 602 case XML_ERROR_TAG_MISMATCH: pMessage = "tag mismatch"; break; 603 case XML_ERROR_DUPLICATE_ATTRIBUTE: pMessage = "duplicate attribute"; break; 604 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: pMessage = "junk after doc element"; break; 605 case XML_ERROR_PARAM_ENTITY_REF: pMessage = "parameter entity reference"; break; 606 case XML_ERROR_UNDEFINED_ENTITY: pMessage = "undefined entity"; break; 607 case XML_ERROR_RECURSIVE_ENTITY_REF: pMessage = "recursive entity reference"; break; 608 case XML_ERROR_ASYNC_ENTITY: pMessage = "async entity"; break; 609 case XML_ERROR_BAD_CHAR_REF: pMessage = "bad char reference"; break; 610 case XML_ERROR_BINARY_ENTITY_REF: pMessage = "binary entity reference"; break; 611 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: pMessage = "attribute external entity reference"; break; 612 case XML_ERROR_MISPLACED_XML_PI: pMessage = "misplaced xml processing instruction"; break; 613 case XML_ERROR_UNKNOWN_ENCODING: pMessage = "unknown encoding"; break; 614 case XML_ERROR_INCORRECT_ENCODING: pMessage = "incorrect encoding"; break; 615 case XML_ERROR_UNCLOSED_CDATA_SECTION: pMessage = "unclosed cdata section"; break; 616 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: pMessage = "external entity reference"; break; 617 case XML_ERROR_NOT_STANDALONE: pMessage = "not standalone"; break; 618 default:; 619 } 620 621 OUStringBuffer aBuffer( sal_Unicode( '[' ) ); 622 aBuffer.append( sSystemId ); 623 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) ); 624 aBuffer.append( nLine ); 625 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) ); 626 aBuffer.appendAscii( pMessage ); 627 aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) ); 628 return aBuffer.makeStringAndClear(); 629 } 630 631 } // namespace 632 633 // starts parsing with actual parser ! 634 void FastSaxParser::parse() 635 { 636 const int BUFFER_SIZE = 16 * 1024; 637 Sequence< sal_Int8 > seqOut( BUFFER_SIZE ); 638 639 Entity& rEntity = getEntity(); 640 int nRead = 0; 641 do 642 { 643 nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE ); 644 if( nRead <= 0 ) 645 { 646 XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 ); 647 break; 648 } 649 650 bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0; 651 // callbacks used inside XML_Parse may have caught an exception 652 if( !bContinue || rEntity.maSavedException.hasValue() ) 653 { 654 // Error during parsing ! 655 XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser ); 656 OUString sSystemId = mxDocumentLocator->getSystemId(); 657 sal_Int32 nLine = mxDocumentLocator->getLineNumber(); 658 659 SAXParseException aExcept( 660 lclGetErrorMessage( xmlE, sSystemId, nLine ), 661 Reference< XInterface >(), 662 Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ), 663 mxDocumentLocator->getPublicId(), 664 mxDocumentLocator->getSystemId(), 665 mxDocumentLocator->getLineNumber(), 666 mxDocumentLocator->getColumnNumber() 667 ); 668 669 // error handler is set, it may throw the exception 670 if( rEntity.mxErrorHandler.is() ) 671 rEntity.mxErrorHandler->fatalError( Any( aExcept ) ); 672 673 // error handler has not thrown, but parsing cannot go on, the 674 // exception MUST be thrown 675 throw aExcept; 676 } 677 } 678 while( nRead > 0 ); 679 } 680 681 //------------------------------------------ 682 // 683 // The C-Callbacks 684 // 685 //----------------------------------------- 686 687 namespace { 688 689 struct AttributeData 690 { 691 OString maPrefix; 692 OString maName; 693 OString maValue; 694 }; 695 696 } // namespace 697 698 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes ) 699 { 700 Reference< XFastContextHandler > xParentContext; 701 Entity& rEntity = getEntity(); 702 if( !rEntity.maContextStack.empty() ) 703 { 704 xParentContext = rEntity.maContextStack.top()->mxContext; 705 if( !xParentContext.is() ) 706 { 707 // we ignore current elements, so no processing needed 708 pushContext(); 709 return; 710 } 711 } 712 713 pushContext(); 714 715 rEntity.mxAttributes->clear(); 716 717 // create attribute map and process namespace instructions 718 int i = 0; 719 sal_Int32 nNameLen, nPrefixLen; 720 const XML_Char *pName; 721 const XML_Char *pPrefix; 722 723 try 724 { 725 /* #158414# Each element may define new namespaces, also for attribues. 726 First, process all namespace attributes and cache other attributes in a 727 vector. Second, process the attributes after namespaces have been 728 initialized. */ 729 ::std::vector< AttributeData > aAttribs; 730 731 // #158414# first: get namespaces 732 for( ; awAttributes[i]; i += 2 ) 733 { 734 OSL_ASSERT( awAttributes[i+1] ); 735 736 splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen ); 737 if( nPrefixLen ) 738 { 739 if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) ) 740 { 741 DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] ); 742 } 743 else 744 { 745 aAttribs.resize( aAttribs.size() + 1 ); 746 aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen ); 747 aAttribs.back().maName = OString( pName, nNameLen ); 748 aAttribs.back().maValue = OString( awAttributes[i+1] ); 749 } 750 } 751 else 752 { 753 if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) ) 754 { 755 // namespace of the element found 756 rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 ); 757 } 758 else 759 { 760 aAttribs.resize( aAttribs.size() + 1 ); 761 aAttribs.back().maName = OString( pName, nNameLen ); 762 aAttribs.back().maValue = OString( awAttributes[i+1] ); 763 } 764 } 765 } 766 767 // #158414# second: fill attribute list with other attributes 768 for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt ) 769 { 770 if( aIt->maPrefix.getLength() > 0 ) 771 { 772 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName ); 773 if( nAttributeToken != FastToken::DONTKNOW ) 774 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 775 else 776 rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue ); 777 } 778 else 779 { 780 sal_Int32 nAttributeToken = GetToken( aIt->maName ); 781 if( nAttributeToken != FastToken::DONTKNOW ) 782 rEntity.mxAttributes->add( nAttributeToken, aIt->maValue ); 783 else 784 rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue ); 785 } 786 } 787 788 sal_Int32 nElementToken; 789 splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen ); 790 if( nPrefixLen > 0 ) 791 nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen ); 792 else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 ) 793 nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen ); 794 else 795 nElementToken = GetToken( pName ); 796 rEntity.maContextStack.top()->mnElementToken = nElementToken; 797 798 Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() ); 799 Reference< XFastContextHandler > xContext; 800 if( nElementToken == FastToken::DONTKNOW ) 801 { 802 if( nPrefixLen > 0 ) 803 rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen ); 804 805 const OUString aNamespace( rEntity.maContextStack.top()->maNamespace ); 806 const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 ); 807 rEntity.maContextStack.top()->maElementName = aElementName; 808 809 if( xParentContext.is() ) 810 xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr ); 811 else 812 xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr ); 813 814 if( xContext.is() ) 815 { 816 rEntity.maContextStack.top()->mxContext = xContext; 817 xContext->startUnknownElement( aNamespace, aElementName, xAttr ); 818 } 819 } 820 else 821 { 822 if( xParentContext.is() ) 823 xContext = xParentContext->createFastChildContext( nElementToken, xAttr ); 824 else 825 xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr ); 826 827 828 if( xContext.is() ) 829 { 830 rEntity.maContextStack.top()->mxContext = xContext; 831 xContext->startFastElement( nElementToken, xAttr ); 832 } 833 } 834 } 835 catch( Exception& e ) 836 { 837 rEntity.maSavedException <<= e; 838 } 839 } 840 841 void FastSaxParser::callbackEndElement( const XML_Char* ) 842 { 843 Entity& rEntity = getEntity(); 844 OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" ); 845 if( !rEntity.maContextStack.empty() ) 846 { 847 SaxContextImplPtr pContext = rEntity.maContextStack.top(); 848 const Reference< XFastContextHandler >& xContext( pContext->mxContext ); 849 if( xContext.is() ) try 850 { 851 sal_Int32 nElementToken = pContext->mnElementToken; 852 if( nElementToken != FastToken::DONTKNOW ) 853 xContext->endFastElement( nElementToken ); 854 else 855 xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName ); 856 } 857 catch( Exception& e ) 858 { 859 rEntity.maSavedException <<= e; 860 } 861 862 popContext(); 863 } 864 } 865 866 867 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen ) 868 { 869 Entity& rEntity = getEntity(); 870 const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext ); 871 if( xContext.is() ) try 872 { 873 xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) ); 874 } 875 catch( Exception& e ) 876 { 877 rEntity.maSavedException <<= e; 878 } 879 } 880 881 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser, 882 const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId ) 883 { 884 bool bOK = true; 885 InputSource source; 886 887 Entity& rCurrEntity = getEntity(); 888 Entity aNewEntity( rCurrEntity ); 889 890 if( rCurrEntity.mxEntityResolver.is() ) try 891 { 892 aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity( 893 OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) , 894 OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) ); 895 } 896 catch( SAXParseException & e ) 897 { 898 rCurrEntity.maSavedException <<= e; 899 bOK = false; 900 } 901 catch( SAXException & e ) 902 { 903 rCurrEntity.maSavedException <<= SAXParseException( 904 e.Message, e.Context, e.WrappedException, 905 mxDocumentLocator->getPublicId(), 906 mxDocumentLocator->getSystemId(), 907 mxDocumentLocator->getLineNumber(), 908 mxDocumentLocator->getColumnNumber() ); 909 bOK = false; 910 } 911 912 if( aNewEntity.maStructSource.aInputStream.is() ) 913 { 914 aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 ); 915 if( !aNewEntity.mpParser ) 916 { 917 return false; 918 } 919 920 aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream ); 921 pushEntity( aNewEntity ); 922 try 923 { 924 parse(); 925 } 926 catch( SAXParseException & e ) 927 { 928 rCurrEntity.maSavedException <<= e; 929 bOK = false; 930 } 931 catch( IOException &e ) 932 { 933 SAXException aEx; 934 aEx.WrappedException <<= e; 935 rCurrEntity.maSavedException <<= aEx; 936 bOK = false; 937 } 938 catch( RuntimeException &e ) 939 { 940 SAXException aEx; 941 aEx.WrappedException <<= e; 942 rCurrEntity.maSavedException <<= aEx; 943 bOK = false; 944 } 945 946 popEntity(); 947 XML_ParserFree( aNewEntity.mpParser ); 948 } 949 950 return bOK; 951 } 952 953 } // namespace sax_fastparser 954