1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #ifndef _SAX_FASTPARSER_HXX_
29 #define _SAX_FASTPARSER_HXX_
30 
31 #include <vector>
32 #include <stack>
33 #include <hash_map>
34 #include <boost/shared_ptr.hpp>
35 #include <rtl/ref.hxx>
36 #include <com/sun/star/xml/sax/XFastParser.hpp>
37 #include <com/sun/star/xml/sax/XFastTokenHandler.hpp>
38 #include <com/sun/star/xml/sax/XFastDocumentHandler.hpp>
39 #include <com/sun/star/lang/XServiceInfo.hpp>
40 #include <cppuhelper/implbase2.hxx>
41 
42 #include <expat.h>
43 #include "xml2utf.hxx"
44 
45 #include <sax/fastattribs.hxx>
46 
47 #define PARSER_IMPLEMENTATION_NAME "com.sun.star.comp.extensions.xml.sax.FastParser"
48 #define PARSER_SERVICE_NAME        "com.sun.star.xml.sax.FastParser"
49 
50 namespace sax_fastparser {
51 
52 class FastLocatorImpl;
53 struct NamespaceDefine;
54 struct SaxContextImpl;
55 
56 typedef ::boost::shared_ptr< SaxContextImpl > SaxContextImplPtr;
57 typedef ::boost::shared_ptr< NamespaceDefine > NamespaceDefineRef;
58 
59 typedef ::std::hash_map< ::rtl::OUString, sal_Int32,
60         ::rtl::OUStringHash, ::std::equal_to< ::rtl::OUString > > NamespaceMap;
61 
62 // --------------------------------------------------------------------
63 
64 struct ParserData
65 {
66     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler > mxDocumentHandler;
67     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >    mxTokenHandler;
68     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >        mxErrorHandler;
69     ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >      mxEntityResolver;
70     ::com::sun::star::lang::Locale          maLocale;
71 
72     ParserData();
73     ~ParserData();
74 };
75 
76 // --------------------------------------------------------------------
77 
78 // Entity binds all information needed for a single file
79 struct Entity : public ParserData
80 {
81     ::com::sun::star::xml::sax::InputSource maStructSource;
82     XML_Parser                              mpParser;
83     ::sax_expatwrap::XMLFile2UTFConverter   maConverter;
84     ::rtl::Reference< FastAttributeList >   mxAttributes;
85 
86     // Exceptions cannot be thrown through the C-XmlParser (possible resource leaks),
87     // therefore the exception must be saved somewhere.
88     ::com::sun::star::uno::Any              maSavedException;
89 
90     ::std::stack< SaxContextImplPtr >       maContextStack;
91     ::std::vector< NamespaceDefineRef >     maNamespaceDefines;
92 
93     explicit Entity( const ParserData& rData );
94     ~Entity();
95 };
96 
97 // --------------------------------------------------------------------
98 
99 // This class implements the external Parser interface
100 class FastSaxParser : public ::cppu::WeakImplHelper2< ::com::sun::star::xml::sax::XFastParser, ::com::sun::star::lang::XServiceInfo >
101 {
102 public:
103     FastSaxParser();
104     virtual ~FastSaxParser();
105 
106     // The implementation details
107     static ::com::sun::star::uno::Sequence< ::rtl::OUString > getSupportedServiceNames_Static(void);
108 
109     // XFastParser
110     virtual void SAL_CALL parseStream( const ::com::sun::star::xml::sax::InputSource& aInputSource ) throw (::com::sun::star::xml::sax::SAXException, ::com::sun::star::io::IOException, ::com::sun::star::uno::RuntimeException);
111     virtual void SAL_CALL setFastDocumentHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastDocumentHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
112     virtual void SAL_CALL setTokenHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XFastTokenHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
113     virtual void SAL_CALL registerNamespace( const ::rtl::OUString& NamespaceURL, sal_Int32 NamespaceToken ) throw (::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException);
114     virtual void SAL_CALL setErrorHandler( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XErrorHandler >& Handler ) throw (::com::sun::star::uno::RuntimeException);
115     virtual void SAL_CALL setEntityResolver( const ::com::sun::star::uno::Reference< ::com::sun::star::xml::sax::XEntityResolver >& Resolver ) throw (::com::sun::star::uno::RuntimeException);
116     virtual void SAL_CALL setLocale( const ::com::sun::star::lang::Locale& rLocale ) throw (::com::sun::star::uno::RuntimeException);
117 
118     // XServiceInfo
119     virtual ::rtl::OUString SAL_CALL getImplementationName(  ) throw (::com::sun::star::uno::RuntimeException);
120     virtual sal_Bool SAL_CALL supportsService( const ::rtl::OUString& ServiceName ) throw (::com::sun::star::uno::RuntimeException);
121     virtual ::com::sun::star::uno::Sequence< ::rtl::OUString > SAL_CALL getSupportedServiceNames(  ) throw (::com::sun::star::uno::RuntimeException);
122 
123     // called by the C callbacks of the expat parser
124     void callbackStartElement( const XML_Char* name, const XML_Char** atts );
125     void callbackEndElement( const XML_Char* name );
126     void callbackCharacters( const XML_Char* s, int nLen );
127     int callbackExternalEntityRef( XML_Parser parser, const XML_Char *openEntityNames, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId);
128 
129     inline void pushEntity( const Entity& rEntity ) { maEntities.push( rEntity ); }
130     inline void popEntity()                         { maEntities.pop(); }
131     Entity& getEntity()                             { return maEntities.top(); }
132 
133 private:
134     void parse();
135 
136     sal_Int32 GetToken( const ::rtl::OString& rToken );
137     sal_Int32 GetToken( const sal_Char* pToken, sal_Int32 nTokenLen = 0 );
138     sal_Int32 GetTokenWithPrefix( const ::rtl::OString& rPrefix, const ::rtl::OString& rName ) throw (::com::sun::star::xml::sax::SAXException);
139     sal_Int32 GetTokenWithPrefix( const sal_Char*pPrefix, int nPrefixLen, const sal_Char* pName, int nNameLen ) throw (::com::sun::star::xml::sax::SAXException);
140     ::rtl::OUString GetNamespaceURL( const ::rtl::OString& rPrefix ) throw (::com::sun::star::xml::sax::SAXException);
141     ::rtl::OUString GetNamespaceURL( const sal_Char*pPrefix, int nPrefixLen ) throw (::com::sun::star::xml::sax::SAXException);
142     sal_Int32 GetNamespaceToken( const ::rtl::OUString& rNamespaceURL );
143     sal_Int32 GetTokenWithNamespaceURL( const ::rtl::OUString& rNamespaceURL, const sal_Char* pName, int nNameLen );
144     void DefineNamespace( const ::rtl::OString& rPrefix, const sal_Char* pNamespaceURL );
145     sal_Int32 CreateCustomToken( const sal_Char* pToken, int len = 0 );
146 
147     void pushContext();
148     void popContext();
149 
150     void splitName( const XML_Char *pwName, const XML_Char *&rpPrefix, sal_Int32 &rPrefixLen, const XML_Char *&rpName, sal_Int32 &rNameLen );
151 
152 private:
153     ::osl::Mutex maMutex;
154 
155     ::rtl::Reference< FastLocatorImpl >     mxDocumentLocator;
156     NamespaceMap                            maNamespaceMap;
157 
158     ParserData maData;                      /// Cached parser configuration for next call of parseStream().
159     ::std::stack< Entity > maEntities;      /// Entity stack for each call of parseStream().
160 };
161 
162 }
163 
164 #endif // _SAX_FASTPARSER_HXX_
165