xref: /aoo42x/main/shell/source/all/xml_parser.cxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_shell.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #ifndef XML_PARSER_HXX_INCLUDED
32*cdf0e10cSrcweir #include "internal/xml_parser.hxx"
33*cdf0e10cSrcweir #endif
34*cdf0e10cSrcweir #include "internal/i_xml_parser_event_handler.hxx"
35*cdf0e10cSrcweir 
36*cdf0e10cSrcweir #include <assert.h>
37*cdf0e10cSrcweir 
38*cdf0e10cSrcweir namespace /* private */
39*cdf0e10cSrcweir {
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir 	//######################################################
42*cdf0e10cSrcweir 	/*  Extracts the local part of tag without
43*cdf0e10cSrcweir 		namespace decoration e.g. meta:creator -> creator */
44*cdf0e10cSrcweir 	const XML_Char COLON = (XML_Char)':';
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir 	const XML_Char* get_local_name(const XML_Char* rawname)
47*cdf0e10cSrcweir 	{
48*cdf0e10cSrcweir 		const XML_Char* p = rawname;
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir 		// go to the end
51*cdf0e10cSrcweir 		while (*p) p++;
52*cdf0e10cSrcweir 
53*cdf0e10cSrcweir 		// go back until the first ':'
54*cdf0e10cSrcweir 		while (*p != COLON && p > rawname)
55*cdf0e10cSrcweir 			p--;
56*cdf0e10cSrcweir 
57*cdf0e10cSrcweir 		// if we are on a colon one step forward
58*cdf0e10cSrcweir 		if (*p == COLON)
59*cdf0e10cSrcweir 			p++;
60*cdf0e10cSrcweir 
61*cdf0e10cSrcweir 		return p;
62*cdf0e10cSrcweir 	}
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir 	//################################################
65*cdf0e10cSrcweir 	inline xml_parser* get_parser_instance(void* data)
66*cdf0e10cSrcweir 	{
67*cdf0e10cSrcweir 		return reinterpret_cast<xml_parser*>(XML_GetUserData(
68*cdf0e10cSrcweir 			reinterpret_cast<XML_Parser>(data)));
69*cdf0e10cSrcweir 	}
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir     //################################################
72*cdf0e10cSrcweir     bool has_only_whitespaces(const XML_Char* s, int len)
73*cdf0e10cSrcweir     {
74*cdf0e10cSrcweir         const XML_Char* p = s;
75*cdf0e10cSrcweir         for (int i = 0; i < len; i++)
76*cdf0e10cSrcweir             if (*p++ != ' ') return false;
77*cdf0e10cSrcweir         return true;
78*cdf0e10cSrcweir     }
79*cdf0e10cSrcweir }
80*cdf0e10cSrcweir 
81*cdf0e10cSrcweir //###################################################
82*cdf0e10cSrcweir xml_parser::xml_parser(const XML_Char* EncodingName) :
83*cdf0e10cSrcweir 	document_handler_(0),
84*cdf0e10cSrcweir 	xml_parser_(XML_ParserCreate(EncodingName))
85*cdf0e10cSrcweir {
86*cdf0e10cSrcweir 	init();
87*cdf0e10cSrcweir }
88*cdf0e10cSrcweir 
89*cdf0e10cSrcweir //###################################################
90*cdf0e10cSrcweir xml_parser::~xml_parser()
91*cdf0e10cSrcweir {
92*cdf0e10cSrcweir 	XML_ParserFree(xml_parser_);
93*cdf0e10cSrcweir }
94*cdf0e10cSrcweir 
95*cdf0e10cSrcweir //###################################################
96*cdf0e10cSrcweir /* Callback functions will be called by the parser on
97*cdf0e10cSrcweir    different events */
98*cdf0e10cSrcweir 
99*cdf0e10cSrcweir //###################################################
100*cdf0e10cSrcweir extern "C"
101*cdf0e10cSrcweir {
102*cdf0e10cSrcweir 
103*cdf0e10cSrcweir static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
104*cdf0e10cSrcweir {
105*cdf0e10cSrcweir 	assert(UserData != NULL);
106*cdf0e10cSrcweir 
107*cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
108*cdf0e10cSrcweir 
109*cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
110*cdf0e10cSrcweir 	if (pDocHdl)
111*cdf0e10cSrcweir 	{
112*cdf0e10cSrcweir 		xml_tag_attribute_container_t attributes;
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir 		int i = 0;
115*cdf0e10cSrcweir 
116*cdf0e10cSrcweir 		while(atts[i])
117*cdf0e10cSrcweir 		{
118*cdf0e10cSrcweir 			attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]);
119*cdf0e10cSrcweir 			i += 2; // skip to next pair
120*cdf0e10cSrcweir 		}
121*cdf0e10cSrcweir 
122*cdf0e10cSrcweir 		pDocHdl->start_element(
123*cdf0e10cSrcweir 			reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes);
124*cdf0e10cSrcweir 	}
125*cdf0e10cSrcweir }
126*cdf0e10cSrcweir 
127*cdf0e10cSrcweir //###################################################
128*cdf0e10cSrcweir static void xml_end_element_handler(void* UserData, const XML_Char* name)
129*cdf0e10cSrcweir {
130*cdf0e10cSrcweir 	assert(UserData);
131*cdf0e10cSrcweir 
132*cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
133*cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
134*cdf0e10cSrcweir 	if (pDocHdl)
135*cdf0e10cSrcweir 		pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)));
136*cdf0e10cSrcweir }
137*cdf0e10cSrcweir 
138*cdf0e10cSrcweir //###################################################
139*cdf0e10cSrcweir static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
140*cdf0e10cSrcweir {
141*cdf0e10cSrcweir 	assert(UserData);
142*cdf0e10cSrcweir 
143*cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
144*cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
145*cdf0e10cSrcweir 	if (pDocHdl)
146*cdf0e10cSrcweir     {
147*cdf0e10cSrcweir         if (has_only_whitespaces(s,len))
148*cdf0e10cSrcweir             pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len));
149*cdf0e10cSrcweir         else
150*cdf0e10cSrcweir             pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len));
151*cdf0e10cSrcweir     }
152*cdf0e10cSrcweir }
153*cdf0e10cSrcweir 
154*cdf0e10cSrcweir //###################################################
155*cdf0e10cSrcweir static void xml_comment_handler(void* UserData, const XML_Char* Data)
156*cdf0e10cSrcweir {
157*cdf0e10cSrcweir 	assert(UserData);
158*cdf0e10cSrcweir 
159*cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
160*cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
161*cdf0e10cSrcweir 	if (pDocHdl)
162*cdf0e10cSrcweir 		pDocHdl->comment(reinterpret_cast<const char_t*>(Data));
163*cdf0e10cSrcweir }
164*cdf0e10cSrcweir 
165*cdf0e10cSrcweir } // extern "C"
166*cdf0e10cSrcweir 
167*cdf0e10cSrcweir //###################################################
168*cdf0e10cSrcweir void xml_parser::init()
169*cdf0e10cSrcweir {
170*cdf0e10cSrcweir 	XML_SetUserData(xml_parser_, this);
171*cdf0e10cSrcweir 
172*cdf0e10cSrcweir 	// we use the parser as handler argument,
173*cdf0e10cSrcweir 	// so we could use it if necessary, the
174*cdf0e10cSrcweir 	// UserData are usable anyway using
175*cdf0e10cSrcweir 	// XML_GetUserData(...)
176*cdf0e10cSrcweir 	XML_UseParserAsHandlerArg(xml_parser_);
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir 	XML_SetElementHandler(
179*cdf0e10cSrcweir 		xml_parser_,
180*cdf0e10cSrcweir 		xml_start_element_handler,
181*cdf0e10cSrcweir 		xml_end_element_handler);
182*cdf0e10cSrcweir 
183*cdf0e10cSrcweir 	XML_SetCharacterDataHandler(
184*cdf0e10cSrcweir 		xml_parser_,
185*cdf0e10cSrcweir 		xml_character_data_handler);
186*cdf0e10cSrcweir 
187*cdf0e10cSrcweir 	XML_SetCommentHandler(
188*cdf0e10cSrcweir 		xml_parser_,
189*cdf0e10cSrcweir 		xml_comment_handler);
190*cdf0e10cSrcweir }
191*cdf0e10cSrcweir 
192*cdf0e10cSrcweir //###################################################
193*cdf0e10cSrcweir void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
194*cdf0e10cSrcweir {
195*cdf0e10cSrcweir 	if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal))
196*cdf0e10cSrcweir 		throw xml_parser_exception(
197*cdf0e10cSrcweir 			(char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
198*cdf0e10cSrcweir 			(int)XML_GetErrorCode(xml_parser_),
199*cdf0e10cSrcweir 			XML_GetCurrentLineNumber(xml_parser_),
200*cdf0e10cSrcweir 			XML_GetCurrentColumnNumber(xml_parser_),
201*cdf0e10cSrcweir 			XML_GetCurrentByteIndex(xml_parser_));
202*cdf0e10cSrcweir }
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir //###################################################
205*cdf0e10cSrcweir void xml_parser::set_document_handler(
206*cdf0e10cSrcweir 	i_xml_parser_event_handler* event_handler)
207*cdf0e10cSrcweir {
208*cdf0e10cSrcweir 	document_handler_ = event_handler;
209*cdf0e10cSrcweir }
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir //###################################################
212*cdf0e10cSrcweir i_xml_parser_event_handler* xml_parser::get_document_handler() const
213*cdf0e10cSrcweir {
214*cdf0e10cSrcweir 	return document_handler_;
215*cdf0e10cSrcweir }
216