1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 //#include <stdlib.h>
29 //#include <sal/alloca.h>
30 
31 #include <boost/scoped_ptr.hpp>
32 
33 #include <osl/diagnose.h>
34 #include <rtl/ustrbuf.hxx>
35 
36 #include <com/sun/star/lang/DisposedException.hpp>
37 #include <com/sun/star/xml/sax/XFastContextHandler.hpp>
38 #include <com/sun/star/xml/sax/SAXParseException.hpp>
39 #include <com/sun/star/xml/sax/FastToken.hpp>
40 
41 #include "fastparser.hxx"
42 
43 #include <string.h>
44 
45 using ::rtl::OString;
46 using ::rtl::OUString;
47 using ::rtl::OUStringBuffer;
48 using namespace ::std;
49 using namespace ::osl;
50 using namespace ::cppu;
51 using namespace ::com::sun::star::uno;
52 using namespace ::com::sun::star::lang;
53 using namespace ::com::sun::star::xml::sax;
54 //using namespace ::com::sun::star::util;
55 using namespace ::com::sun::star::io;
56 
57 namespace sax_fastparser {
58 
59 // --------------------------------------------------------------------
60 
61 struct SaxContextImpl
62 {
63 	Reference< XFastContextHandler >	mxContext;
64 	sal_uInt32		mnNamespaceCount;
65 	sal_Int32		mnElementToken;
66 	OUString		maNamespace;
67 	OUString		maElementName;
68 
69 	SaxContextImpl() { mnNamespaceCount = 0; mnElementToken = 0; }
70 	SaxContextImpl( const SaxContextImplPtr& p ) { mnNamespaceCount = p->mnNamespaceCount; mnElementToken = p->mnElementToken; maNamespace = p->maNamespace; }
71 };
72 
73 // --------------------------------------------------------------------
74 
75 struct NamespaceDefine
76 {
77 	OString		maPrefix;
78 	sal_Int32	mnToken;
79 	OUString	maNamespaceURL;
80 
81 	NamespaceDefine( const OString& rPrefix, sal_Int32 nToken, const OUString& rNamespaceURL ) : maPrefix( rPrefix ), mnToken( nToken ), maNamespaceURL( rNamespaceURL ) {}
82 };
83 
84 // --------------------------------------------------------------------
85 // FastLocatorImpl
86 // --------------------------------------------------------------------
87 
88 class FastSaxParser;
89 
90 class FastLocatorImpl : public WeakImplHelper1< XLocator >
91 {
92 public:
93 	FastLocatorImpl( FastSaxParser *p ) : mpParser(p) {}
94 
95 	void dispose() { mpParser = 0; }
96 	void checkDispose() throw (RuntimeException) { if( !mpParser ) throw DisposedException(); }
97 
98 	//XLocator
99     virtual sal_Int32 SAL_CALL getColumnNumber(void) throw (RuntimeException);
100 	virtual sal_Int32 SAL_CALL getLineNumber(void) throw (RuntimeException);
101     virtual OUString SAL_CALL getPublicId(void) throw (RuntimeException);
102     virtual OUString SAL_CALL getSystemId(void) throw (RuntimeException);
103 
104 private:
105 	FastSaxParser *mpParser;
106 };
107 
108 // --------------------------------------------------------------------
109 // FastSaxParser
110 // --------------------------------------------------------------------
111 
112 //---------------------------------------------
113 // the implementation part
114 //---------------------------------------------
115 
116 extern "C" {
117 
118 static void call_callbackStartElement(void *userData, const XML_Char *name , const XML_Char **atts)
119 {
120     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
121     pFastParser->callbackStartElement( name, atts );
122 }
123 
124 static void call_callbackEndElement(void *userData, const XML_Char *name)
125 {
126     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
127     pFastParser->callbackEndElement( name );
128 }
129 
130 static void call_callbackCharacters( void *userData , const XML_Char *s , int nLen )
131 {
132     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( userData );
133     pFastParser->callbackCharacters( s, nLen );
134 }
135 
136 static int call_callbackExternalEntityRef( XML_Parser parser,
137         const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId )
138 {
139     FastSaxParser* pFastParser = reinterpret_cast< FastSaxParser* >( XML_GetUserData( parser ) );
140     return pFastParser->callbackExternalEntityRef( parser, openEntityNames, base, systemId, publicId );
141 }
142 
143 } // extern "C"
144 
145 // --------------------------------------------------------------------
146 // FastLocatorImpl implementation
147 // --------------------------------------------------------------------
148 
149 sal_Int32 SAL_CALL FastLocatorImpl::getColumnNumber(void) throw (RuntimeException)
150 {
151 	checkDispose();
152 	return XML_GetCurrentColumnNumber( mpParser->getEntity().mpParser );
153 }
154 
155 // --------------------------------------------------------------------
156 
157 sal_Int32 SAL_CALL FastLocatorImpl::getLineNumber(void) throw (RuntimeException)
158 {
159 	checkDispose();
160 	return XML_GetCurrentLineNumber( mpParser->getEntity().mpParser );
161 }
162 
163 // --------------------------------------------------------------------
164 
165 OUString SAL_CALL FastLocatorImpl::getPublicId(void) throw (RuntimeException)
166 {
167 	checkDispose();
168 	return mpParser->getEntity().maStructSource.sPublicId;
169 }
170 // --------------------------------------------------------------------
171 
172 OUString SAL_CALL FastLocatorImpl::getSystemId(void) throw (RuntimeException)
173 {
174 	checkDispose();
175 	return mpParser->getEntity().maStructSource.sSystemId;
176 }
177 
178 // --------------------------------------------------------------------
179 
180 ParserData::ParserData()
181 {
182 }
183 
184 ParserData::~ParserData()
185 {
186 }
187 
188 // --------------------------------------------------------------------
189 
190 Entity::Entity( const ParserData& rData ) :
191     ParserData( rData )
192 {
193 	// performance-Improvment. Reference is needed when calling the startTag callback.
194 	// Handing out the same object with every call is allowed (see sax-specification)
195 	mxAttributes.set( new FastAttributeList( mxTokenHandler ) );
196 }
197 
198 Entity::~Entity()
199 {
200 }
201 
202 // --------------------------------------------------------------------
203 // FastSaxParser implementation
204 // --------------------------------------------------------------------
205 
206 FastSaxParser::FastSaxParser()
207 {
208 	mxDocumentLocator.set( new FastLocatorImpl( this ) );
209 }
210 
211 // --------------------------------------------------------------------
212 
213 FastSaxParser::~FastSaxParser()
214 {
215 	if( mxDocumentLocator.is() )
216 		mxDocumentLocator->dispose();
217 }
218 
219 // --------------------------------------------------------------------
220 
221 void FastSaxParser::pushContext()
222 {
223     Entity& rEntity = getEntity();
224 	if( rEntity.maContextStack.empty() )
225 	{
226         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl ) );
227 		DefineNamespace( OString("xml"), "http://www.w3.org/XML/1998/namespace");
228 	}
229 	else
230 	{
231         rEntity.maContextStack.push( SaxContextImplPtr( new SaxContextImpl( rEntity.maContextStack.top() ) ) );
232 	}
233 }
234 
235 // --------------------------------------------------------------------
236 
237 void FastSaxParser::popContext()
238 {
239     Entity& rEntity = getEntity();
240 	OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::popContext(), pop without push?" );
241 	if( !rEntity.maContextStack.empty() )
242 		rEntity.maContextStack.pop();
243 }
244 
245 // --------------------------------------------------------------------
246 
247 void FastSaxParser::DefineNamespace( const OString& rPrefix, const sal_Char* pNamespaceURL )
248 {
249     Entity& rEntity = getEntity();
250 	OSL_ENSURE( !rEntity.maContextStack.empty(), "sax::FastSaxParser::DefineNamespace(), I need a context!" );
251 	if( !rEntity.maContextStack.empty() )
252 	{
253 		sal_uInt32 nOffset = rEntity.maContextStack.top()->mnNamespaceCount++;
254 
255 		if( rEntity.maNamespaceDefines.size() <= nOffset )
256 			rEntity.maNamespaceDefines.resize( rEntity.maNamespaceDefines.size() + 64 );
257 
258 		const OUString aNamespaceURL( pNamespaceURL, strlen( pNamespaceURL ), RTL_TEXTENCODING_UTF8 );
259 		rEntity.maNamespaceDefines[nOffset].reset( new NamespaceDefine( rPrefix, GetNamespaceToken( aNamespaceURL ), aNamespaceURL ) );
260 	}
261 }
262 
263 // --------------------------------------------------------------------
264 
265 sal_Int32 FastSaxParser::GetToken( const OString& rToken )
266 {
267     Sequence< sal_Int8 > aSeq( (sal_Int8*)rToken.getStr(), rToken.getLength() );
268 
269     return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
270 }
271 
272 sal_Int32 FastSaxParser::GetToken( const sal_Char* pToken, sal_Int32 nLen /* = 0 */ )
273 {
274 	if( !nLen )
275 		nLen = strlen( pToken );
276 
277 	Sequence< sal_Int8 > aSeq( (sal_Int8*)pToken, nLen );
278 
279 	return getEntity().mxTokenHandler->getTokenFromUTF8( aSeq );
280 }
281 
282 // --------------------------------------------------------------------
283 
284 sal_Int32 FastSaxParser::GetTokenWithPrefix( const OString& rPrefix, const OString& rName ) throw (SAXException)
285 {
286     sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
287 
288     Entity& rEntity = getEntity();
289     sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
290     while( nNamespace-- )
291     {
292         if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
293         {
294             nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
295             break;
296         }
297 
298         if( !nNamespace )
299             throw SAXException(); // prefix that has no defined namespace url
300     }
301 
302     if( nNamespaceToken != FastToken::DONTKNOW )
303     {
304         sal_Int32 nNameToken = GetToken( rName.getStr(), rName.getLength() );
305         if( nNameToken != FastToken::DONTKNOW )
306             return nNamespaceToken | nNameToken;
307     }
308 
309     return FastToken::DONTKNOW;
310 }
311 
312 sal_Int32 FastSaxParser::GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (SAXException)
313 {
314 	sal_Int32 nNamespaceToken = FastToken::DONTKNOW;
315 
316     Entity& rEntity = getEntity();
317 	sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
318 	while( nNamespace-- )
319 	{
320 		const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
321 		if( (rPrefix.getLength() == nPrefixLen) &&
322 			(strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
323 		{
324 			nNamespaceToken = rEntity.maNamespaceDefines[nNamespace]->mnToken;
325 			break;
326 		}
327 
328 		if( !nNamespace )
329 			throw SAXException(); // prefix that has no defined namespace url
330 	}
331 
332 	if( nNamespaceToken != FastToken::DONTKNOW )
333 	{
334 		sal_Int32 nNameToken = GetToken( pName, nNameLen );
335 		if( nNameToken != FastToken::DONTKNOW )
336 			return nNamespaceToken | nNameToken;
337 	}
338 
339 	return FastToken::DONTKNOW;
340 }
341 
342 // --------------------------------------------------------------------
343 
344 sal_Int32 FastSaxParser::GetNamespaceToken( const OUString& rNamespaceURL )
345 {
346 	NamespaceMap::iterator aIter( maNamespaceMap.find( rNamespaceURL ) );
347 	if( aIter != maNamespaceMap.end() )
348 		return (*aIter).second;
349 	else
350 		return FastToken::DONTKNOW;
351 }
352 
353 // --------------------------------------------------------------------
354 
355 OUString FastSaxParser::GetNamespaceURL( const OString& rPrefix ) throw (SAXException)
356 {
357     Entity& rEntity = getEntity();
358     if( !rEntity.maContextStack.empty() )
359     {
360         sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
361         while( nNamespace-- )
362             if( rEntity.maNamespaceDefines[nNamespace]->maPrefix == rPrefix )
363                 return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
364     }
365 
366     throw SAXException(); // prefix that has no defined namespace url
367 }
368 
369 OUString FastSaxParser::GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw(SAXException)
370 {
371     Entity& rEntity = getEntity();
372 	if( pPrefix && !rEntity.maContextStack.empty() )
373 	{
374 		sal_uInt32 nNamespace = rEntity.maContextStack.top()->mnNamespaceCount;
375 		while( nNamespace-- )
376 		{
377 			const OString& rPrefix( rEntity.maNamespaceDefines[nNamespace]->maPrefix );
378 			if( (rPrefix.getLength() == nPrefixLen) &&
379 				(strncmp( rPrefix.getStr(), pPrefix, nPrefixLen ) == 0 ) )
380 			{
381 				return rEntity.maNamespaceDefines[nNamespace]->maNamespaceURL;
382 			}
383 		}
384 	}
385 
386 	throw SAXException(); // prefix that has no defined namespace url
387 }
388 
389 // --------------------------------------------------------------------
390 
391 sal_Int32 FastSaxParser::GetTokenWithNamespaceURL( const OUString& rNamespaceURL, const sal_Char* pName, int nNameLen )
392 {
393 	sal_Int32 nNamespaceToken = GetNamespaceToken( rNamespaceURL );
394 
395 	if( nNamespaceToken != FastToken::DONTKNOW )
396 	{
397 		sal_Int32 nNameToken = GetToken( pName, nNameLen );
398 		if( nNameToken != FastToken::DONTKNOW )
399 			return nNamespaceToken | nNameToken;
400 	}
401 
402 	return FastToken::DONTKNOW;
403 }
404 
405 // --------------------------------------------------------------------
406 
407 void FastSaxParser::splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen )
408 {
409 	XML_Char *p;
410 	for( p = const_cast< XML_Char* >( pwName ), rNameLen = 0, rPrefixLen = 0; *p; p++ )
411 	{
412 		if( *p == ':' )
413 		{
414 			rPrefixLen = p - pwName;
415 			rNameLen = 0;
416 		}
417 		else
418 		{
419 			rNameLen++;
420 		}
421 	}
422 	if( rPrefixLen )
423 	{
424 		rpPrefix = pwName;
425 		rpName = &pwName[ rPrefixLen + 1 ];
426 	}
427 	else
428 	{
429 		rpPrefix = 0;
430 		rpName = pwName;
431 	}
432 }
433 
434 /***************
435 *
436 * parseStream does Parser-startup initializations. The FastSaxParser::parse() method does
437 * the file-specific initialization work. (During a parser run, external files may be opened)
438 *
439 ****************/
440 void FastSaxParser::parseStream( const InputSource& maStructSource)	throw (SAXException, IOException, RuntimeException)
441 {
442 	// Only one text at one time
443 	MutexGuard guard( maMutex );
444 
445 	Entity entity( maData );
446 	entity.maStructSource = maStructSource;
447 
448 	if( !entity.maStructSource.aInputStream.is() )
449 		throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "No input source" ) ), Reference< XInterface >(), Any() );
450 
451 	entity.maConverter.setInputStream( entity.maStructSource.aInputStream );
452 	if( entity.maStructSource.sEncoding.getLength() )
453 		entity.maConverter.setEncoding(	OUStringToOString( entity.maStructSource.sEncoding, RTL_TEXTENCODING_ASCII_US ) );
454 
455 	// create parser with proper encoding
456 	entity.mpParser = XML_ParserCreate( 0 );
457 	if( !entity.mpParser )
458 		throw SAXException( OUString( RTL_CONSTASCII_USTRINGPARAM( "Couldn't create parser" ) ), Reference< XInterface >(), Any() );
459 
460 	// set all necessary C-Callbacks
461 	XML_SetUserData( entity.mpParser, this );
462 	XML_SetElementHandler( entity.mpParser,	call_callbackStartElement, call_callbackEndElement );
463 	XML_SetCharacterDataHandler( entity.mpParser, call_callbackCharacters );
464 	XML_SetExternalEntityRefHandler( entity.mpParser, call_callbackExternalEntityRef );
465 
466 	pushEntity( entity );
467 	try
468 	{
469 		// start the document
470 		if( entity.mxDocumentHandler.is() )
471 		{
472 			Reference< XLocator > xLoc( mxDocumentLocator.get() );
473 			entity.mxDocumentHandler->setDocumentLocator( xLoc );
474 			entity.mxDocumentHandler->startDocument();
475 		}
476 
477 		parse();
478 
479 		// finish document
480 		if( entity.mxDocumentHandler.is() )
481 		{
482 			entity.mxDocumentHandler->endDocument();
483 		}
484 	}
485 	catch( SAXException & )
486 	{
487 		popEntity();
488 		XML_ParserFree( entity.mpParser );
489   		throw;
490 	}
491 	catch( IOException & )
492 	{
493 		popEntity();
494 		XML_ParserFree( entity.mpParser );
495 		throw;
496 	}
497 	catch( RuntimeException & )
498 	{
499 		popEntity();
500 		XML_ParserFree( entity.mpParser );
501 		throw;
502 	}
503 
504 	popEntity();
505 	XML_ParserFree( entity.mpParser );
506 }
507 
508 void FastSaxParser::setFastDocumentHandler( const Reference< XFastDocumentHandler >& Handler ) throw (RuntimeException)
509 {
510 	maData.mxDocumentHandler = Handler;
511 }
512 
513 void SAL_CALL FastSaxParser::setTokenHandler( const Reference< XFastTokenHandler >& Handler ) throw (RuntimeException)
514 {
515 	maData.mxTokenHandler = Handler;
516 }
517 
518 void SAL_CALL FastSaxParser::registerNamespace( const OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (IllegalArgumentException, RuntimeException)
519 {
520 	if( NamespaceToken >= FastToken::NAMESPACE )
521 	{
522 		if( GetNamespaceToken( NamespaceURL ) == FastToken::DONTKNOW )
523 		{
524 			maNamespaceMap[ NamespaceURL ] = NamespaceToken;
525 			return;
526 		}
527 	}
528 	throw IllegalArgumentException();
529 }
530 
531 void FastSaxParser::setErrorHandler(const Reference< XErrorHandler > & Handler) throw (RuntimeException)
532 {
533 	maData.mxErrorHandler = Handler;
534 }
535 
536 void FastSaxParser::setEntityResolver(const Reference < XEntityResolver > & Resolver) throw (RuntimeException)
537 {
538 	maData.mxEntityResolver = Resolver;
539 }
540 
541 void FastSaxParser::setLocale( const Locale & Locale ) throw (RuntimeException)
542 {
543 	maData.maLocale = Locale;
544 }
545 
546 Sequence< OUString > FastSaxParser::getSupportedServiceNames_Static(void)
547 {
548 	Sequence<OUString> aRet(1);
549 	aRet.getArray()[0] = ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(PARSER_SERVICE_NAME) );
550 	return aRet;
551 }
552 
553 // XServiceInfo
554 OUString FastSaxParser::getImplementationName() throw (RuntimeException)
555 {
556     return OUString::createFromAscii( PARSER_IMPLEMENTATION_NAME );
557 }
558 
559 // XServiceInfo
560 sal_Bool FastSaxParser::supportsService(const OUString& ServiceName) throw (RuntimeException)
561 {
562     Sequence< OUString > aSNL = getSupportedServiceNames();
563     const OUString * pArray = aSNL.getConstArray();
564 
565     for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
566         if( pArray[i] == ServiceName )
567             return sal_True;
568 
569     return sal_False;
570 }
571 
572 // XServiceInfo
573 Sequence< OUString > FastSaxParser::getSupportedServiceNames(void) throw (RuntimeException)
574 {
575 
576     Sequence<OUString> seq(1);
577     seq.getArray()[0] = OUString::createFromAscii( PARSER_SERVICE_NAME );
578     return seq;
579 }
580 
581 
582 /*---------------------------------------
583 *
584 * Helper functions and classes
585 *
586 *-------------------------------------------*/
587 
588 namespace {
589 
590 OUString lclGetErrorMessage( XML_Error xmlE, const OUString& sSystemId, sal_Int32 nLine )
591 {
592 	const sal_Char* pMessage = "";
593 	switch( xmlE )
594 	{
595         case XML_ERROR_NONE:                            pMessage = "No";                                    break;
596         case XML_ERROR_NO_MEMORY:                       pMessage = "no memory";                             break;
597         case XML_ERROR_SYNTAX:                          pMessage = "syntax";                                break;
598         case XML_ERROR_NO_ELEMENTS:                     pMessage = "no elements";                           break;
599         case XML_ERROR_INVALID_TOKEN:                   pMessage = "invalid token";                         break;
600         case XML_ERROR_UNCLOSED_TOKEN:                  pMessage = "unclosed token";                        break;
601         case XML_ERROR_PARTIAL_CHAR:                    pMessage = "partial char";                          break;
602         case XML_ERROR_TAG_MISMATCH:                    pMessage = "tag mismatch";                          break;
603         case XML_ERROR_DUPLICATE_ATTRIBUTE:             pMessage = "duplicate attribute";                   break;
604         case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:          pMessage = "junk after doc element";                break;
605         case XML_ERROR_PARAM_ENTITY_REF:                pMessage = "parameter entity reference";            break;
606         case XML_ERROR_UNDEFINED_ENTITY:                pMessage = "undefined entity";                      break;
607         case XML_ERROR_RECURSIVE_ENTITY_REF:            pMessage = "recursive entity reference";            break;
608         case XML_ERROR_ASYNC_ENTITY:                    pMessage = "async entity";                          break;
609         case XML_ERROR_BAD_CHAR_REF:                    pMessage = "bad char reference";                    break;
610         case XML_ERROR_BINARY_ENTITY_REF:               pMessage = "binary entity reference";               break;
611         case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:   pMessage = "attribute external entity reference";   break;
612         case XML_ERROR_MISPLACED_XML_PI:                pMessage = "misplaced xml processing instruction";  break;
613         case XML_ERROR_UNKNOWN_ENCODING:                pMessage = "unknown encoding";                      break;
614         case XML_ERROR_INCORRECT_ENCODING:              pMessage = "incorrect encoding";                    break;
615         case XML_ERROR_UNCLOSED_CDATA_SECTION:          pMessage = "unclosed cdata section";                break;
616         case XML_ERROR_EXTERNAL_ENTITY_HANDLING:        pMessage = "external entity reference";             break;
617         case XML_ERROR_NOT_STANDALONE:                  pMessage = "not standalone";                        break;
618         default:;
619     }
620 
621 	OUStringBuffer aBuffer( sal_Unicode( '[' ) );
622 	aBuffer.append( sSystemId );
623 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " line " ) );
624 	aBuffer.append( nLine );
625 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( "]: " ) );
626 	aBuffer.appendAscii( pMessage );
627 	aBuffer.appendAscii( RTL_CONSTASCII_STRINGPARAM( " error" ) );
628 	return aBuffer.makeStringAndClear();
629 }
630 
631 } // namespace
632 
633 // starts parsing with actual parser !
634 void FastSaxParser::parse()
635 {
636 	const int BUFFER_SIZE = 16 * 1024;
637 	Sequence< sal_Int8 > seqOut( BUFFER_SIZE );
638 
639     Entity& rEntity = getEntity();
640 	int nRead = 0;
641     do
642 	{
643 		nRead = rEntity.maConverter.readAndConvert( seqOut, BUFFER_SIZE );
644 		if( nRead <= 0 )
645 		{
646 			XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), 0, 1 );
647 			break;
648 		}
649 
650 		bool bContinue = XML_Parse( rEntity.mpParser, (const char*) seqOut.getConstArray(), nRead, 0 ) != 0;
651 		// callbacks used inside XML_Parse may have caught an exception
652 		if( !bContinue || rEntity.maSavedException.hasValue() )
653 		{
654 			// Error during parsing !
655 			XML_Error xmlE = XML_GetErrorCode( rEntity.mpParser );
656 			OUString sSystemId = mxDocumentLocator->getSystemId();
657 			sal_Int32 nLine = mxDocumentLocator->getLineNumber();
658 
659 			SAXParseException aExcept(
660 				lclGetErrorMessage( xmlE, sSystemId, nLine ),
661 				Reference< XInterface >(),
662 				Any( &rEntity.maSavedException, getCppuType( &rEntity.maSavedException ) ),
663 				mxDocumentLocator->getPublicId(),
664 				mxDocumentLocator->getSystemId(),
665 				mxDocumentLocator->getLineNumber(),
666 				mxDocumentLocator->getColumnNumber()
667 			);
668 
669             // error handler is set, it may throw the exception
670 			if( rEntity.mxErrorHandler.is() )
671 				rEntity.mxErrorHandler->fatalError( Any( aExcept ) );
672 
673 			// error handler has not thrown, but parsing cannot go on, the
674             // exception MUST be thrown
675 			throw aExcept;
676 		}
677 	}
678 	while( nRead > 0 );
679 }
680 
681 //------------------------------------------
682 //
683 // The C-Callbacks
684 //
685 //-----------------------------------------
686 
687 namespace {
688 
689 struct AttributeData
690 {
691     OString             maPrefix;
692     OString             maName;
693     OString             maValue;
694 };
695 
696 } // namespace
697 
698 void FastSaxParser::callbackStartElement( const XML_Char* pwName, const XML_Char** awAttributes )
699 {
700 	Reference< XFastContextHandler > xParentContext;
701 	Entity& rEntity = getEntity();
702 	if( !rEntity.maContextStack.empty() )
703 	{
704 		xParentContext = rEntity.maContextStack.top()->mxContext;
705 		if( !xParentContext.is() )
706 		{
707 			// we ignore current elements, so no processing needed
708 			pushContext();
709 			return;
710 		}
711 	}
712 
713 	pushContext();
714 
715 	rEntity.mxAttributes->clear();
716 
717 	// create attribute map and process namespace instructions
718 	int i = 0;
719 	sal_Int32 nNameLen, nPrefixLen;
720 	const XML_Char *pName;
721 	const XML_Char *pPrefix;
722 
723 	try
724 	{
725         /*  #158414# Each element may define new namespaces, also for attribues.
726             First, process all namespace attributes and cache other attributes in a
727             vector. Second, process the attributes after namespaces have been
728             initialized. */
729         ::std::vector< AttributeData > aAttribs;
730 
731         // #158414# first: get namespaces
732     	for( ; awAttributes[i]; i += 2 )
733     	{
734     		OSL_ASSERT( awAttributes[i+1] );
735 
736     		splitName( awAttributes[i], pPrefix, nPrefixLen, pName, nNameLen );
737     		if( nPrefixLen )
738     		{
739     			if( (nPrefixLen == 5) && (strncmp( pPrefix, "xmlns", 5 ) == 0) )
740     			{
741     				DefineNamespace( OString( pName, nNameLen ), awAttributes[i+1] );
742     			}
743     			else
744     			{
745                     aAttribs.resize( aAttribs.size() + 1 );
746                     aAttribs.back().maPrefix = OString( pPrefix, nPrefixLen );
747                     aAttribs.back().maName = OString( pName, nNameLen );
748                     aAttribs.back().maValue = OString( awAttributes[i+1] );
749                 }
750     		}
751     		else
752     		{
753     			if( (nNameLen == 5) && (strcmp( pName, "xmlns" ) == 0) )
754     			{
755     				// namespace of the element found
756     				rEntity.maContextStack.top()->maNamespace = OUString( awAttributes[i+1], strlen( awAttributes[i+1] ), RTL_TEXTENCODING_UTF8 );
757     			}
758     			else
759     			{
760                     aAttribs.resize( aAttribs.size() + 1 );
761                     aAttribs.back().maName = OString( pName, nNameLen );
762                     aAttribs.back().maValue = OString( awAttributes[i+1] );
763     			}
764     		}
765     	}
766 
767         // #158414# second: fill attribute list with other attributes
768         for( ::std::vector< AttributeData >::const_iterator aIt = aAttribs.begin(), aEnd = aAttribs.end(); aIt != aEnd; ++aIt )
769         {
770             if( aIt->maPrefix.getLength() > 0 )
771             {
772                 sal_Int32 nAttributeToken = GetTokenWithPrefix( aIt->maPrefix, aIt->maName );
773                 if( nAttributeToken != FastToken::DONTKNOW )
774                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
775                 else
776                     rEntity.mxAttributes->addUnknown( GetNamespaceURL( aIt->maPrefix ), aIt->maName, aIt->maValue );
777             }
778             else
779             {
780                 sal_Int32 nAttributeToken = GetToken( aIt->maName );
781                 if( nAttributeToken != FastToken::DONTKNOW )
782                     rEntity.mxAttributes->add( nAttributeToken, aIt->maValue );
783                 else
784                     rEntity.mxAttributes->addUnknown( aIt->maName, aIt->maValue );
785             }
786         }
787 
788     	sal_Int32 nElementToken;
789     	splitName( pwName, pPrefix, nPrefixLen, pName, nNameLen );
790     	if( nPrefixLen > 0 )
791     		nElementToken = GetTokenWithPrefix( pPrefix, nPrefixLen, pName, nNameLen );
792     	else if( rEntity.maContextStack.top()->maNamespace.getLength() > 0 )
793     		nElementToken = GetTokenWithNamespaceURL( rEntity.maContextStack.top()->maNamespace, pName, nNameLen );
794     	else
795     		nElementToken = GetToken( pName );
796     	rEntity.maContextStack.top()->mnElementToken = nElementToken;
797 
798 		Reference< XFastAttributeList > xAttr( rEntity.mxAttributes.get() );
799 		Reference< XFastContextHandler > xContext;
800 		if( nElementToken == FastToken::DONTKNOW )
801 		{
802 			if( nPrefixLen > 0 )
803 				rEntity.maContextStack.top()->maNamespace = GetNamespaceURL( pPrefix, nPrefixLen );
804 
805 			const OUString aNamespace( rEntity.maContextStack.top()->maNamespace );
806 			const OUString aElementName( pPrefix, nPrefixLen, RTL_TEXTENCODING_UTF8 );
807 			rEntity.maContextStack.top()->maElementName = aElementName;
808 
809 			if( xParentContext.is() )
810 				xContext = xParentContext->createUnknownChildContext( aNamespace, aElementName, xAttr );
811 			else
812 				xContext = rEntity.mxDocumentHandler->createUnknownChildContext( aNamespace, aElementName, xAttr );
813 
814 			if( xContext.is() )
815 			{
816 				rEntity.maContextStack.top()->mxContext = xContext;
817 				xContext->startUnknownElement( aNamespace, aElementName, xAttr );
818 			}
819 		}
820 		else
821 		{
822 			if( xParentContext.is() )
823 				xContext = xParentContext->createFastChildContext( nElementToken, xAttr );
824 			else
825 				xContext = rEntity.mxDocumentHandler->createFastChildContext( nElementToken, xAttr );
826 
827 
828 			if( xContext.is() )
829 			{
830 				rEntity.maContextStack.top()->mxContext = xContext;
831 				xContext->startFastElement( nElementToken, xAttr );
832 			}
833 		}
834 	}
835 	catch( Exception& e )
836 	{
837 		rEntity.maSavedException <<= e;
838 	}
839 }
840 
841 void FastSaxParser::callbackEndElement( const XML_Char* )
842 {
843     Entity& rEntity = getEntity();
844     OSL_ENSURE( !rEntity.maContextStack.empty(), "FastSaxParser::callbackEndElement - no context" );
845 	if( !rEntity.maContextStack.empty() )
846 	{
847 		SaxContextImplPtr pContext = rEntity.maContextStack.top();
848 		const Reference< XFastContextHandler >& xContext( pContext->mxContext );
849 		if( xContext.is() ) try
850 		{
851 			sal_Int32 nElementToken = pContext->mnElementToken;
852 			if( nElementToken != FastToken::DONTKNOW )
853 				xContext->endFastElement( nElementToken );
854 			else
855 				xContext->endUnknownElement( pContext->maNamespace, pContext->maElementName );
856 		}
857 		catch( Exception& e )
858 		{
859 			rEntity.maSavedException <<= e;
860 		}
861 
862 		popContext();
863 	}
864 }
865 
866 
867 void FastSaxParser::callbackCharacters( const XML_Char* s, int nLen )
868 {
869     Entity& rEntity = getEntity();
870 	const Reference< XFastContextHandler >& xContext( rEntity.maContextStack.top()->mxContext );
871 	if( xContext.is() ) try
872 	{
873 		xContext->characters( OUString( s, nLen, RTL_TEXTENCODING_UTF8 ) );
874 	}
875 	catch( Exception& e )
876 	{
877 		rEntity.maSavedException <<= e;
878 	}
879 }
880 
881 int FastSaxParser::callbackExternalEntityRef( XML_Parser parser,
882         const XML_Char *context, const XML_Char * /*base*/, const XML_Char *systemId, const XML_Char *publicId )
883 {
884 	bool bOK = true;
885 	InputSource source;
886 
887     Entity& rCurrEntity = getEntity();
888 	Entity aNewEntity( rCurrEntity );
889 
890 	if( rCurrEntity.mxEntityResolver.is() ) try
891 	{
892     	aNewEntity.maStructSource = rCurrEntity.mxEntityResolver->resolveEntity(
893 			OUString( publicId, strlen( publicId ), RTL_TEXTENCODING_UTF8 ) ,
894 			OUString( systemId, strlen( systemId ), RTL_TEXTENCODING_UTF8 ) );
895     }
896     catch( SAXParseException & e )
897 	{
898     	rCurrEntity.maSavedException <<= e;
899     	bOK = false;
900     }
901     catch( SAXException & e )
902 	{
903     	rCurrEntity.maSavedException <<= SAXParseException(
904 			e.Message, e.Context, e.WrappedException,
905 			mxDocumentLocator->getPublicId(),
906 			mxDocumentLocator->getSystemId(),
907 			mxDocumentLocator->getLineNumber(),
908 			mxDocumentLocator->getColumnNumber() );
909 		bOK = false;
910     }
911 
912 	if( aNewEntity.maStructSource.aInputStream.is() )
913 	{
914 		aNewEntity.mpParser = XML_ExternalEntityParserCreate( parser, context, 0 );
915 		if( !aNewEntity.mpParser )
916 		{
917 			return false;
918 		}
919 
920 		aNewEntity.maConverter.setInputStream( aNewEntity.maStructSource.aInputStream );
921 		pushEntity( aNewEntity );
922 		try
923 		{
924 			parse();
925 		}
926 		catch( SAXParseException & e )
927 		{
928 			rCurrEntity.maSavedException <<= e;
929 			bOK = false;
930 		}
931 		catch( IOException &e )
932 		{
933 			SAXException aEx;
934 			aEx.WrappedException <<= e;
935 			rCurrEntity.maSavedException <<= aEx;
936 			bOK = false;
937 		}
938 		catch( RuntimeException &e )
939 		{
940 			SAXException aEx;
941 			aEx.WrappedException <<= e;
942 			rCurrEntity.maSavedException <<= aEx;
943 			bOK = false;
944 		}
945 
946 		popEntity();
947 		XML_ParserFree( aNewEntity.mpParser );
948 	}
949 
950 	return bOK;
951 }
952 
953 } // namespace sax_fastparser
954