xref: /aoo42x/main/shell/source/all/xml_parser.cxx (revision f8e2c85a)
1*f8e2c85aSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*f8e2c85aSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*f8e2c85aSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*f8e2c85aSAndrew Rist  * distributed with this work for additional information
6*f8e2c85aSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*f8e2c85aSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*f8e2c85aSAndrew Rist  * "License"); you may not use this file except in compliance
9*f8e2c85aSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*f8e2c85aSAndrew Rist  *
11*f8e2c85aSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*f8e2c85aSAndrew Rist  *
13*f8e2c85aSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*f8e2c85aSAndrew Rist  * software distributed under the License is distributed on an
15*f8e2c85aSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*f8e2c85aSAndrew Rist  * KIND, either express or implied.  See the License for the
17*f8e2c85aSAndrew Rist  * specific language governing permissions and limitations
18*f8e2c85aSAndrew Rist  * under the License.
19*f8e2c85aSAndrew Rist  *
20*f8e2c85aSAndrew Rist  *************************************************************/
21*f8e2c85aSAndrew Rist 
22*f8e2c85aSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_shell.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #ifndef XML_PARSER_HXX_INCLUDED
28cdf0e10cSrcweir #include "internal/xml_parser.hxx"
29cdf0e10cSrcweir #endif
30cdf0e10cSrcweir #include "internal/i_xml_parser_event_handler.hxx"
31cdf0e10cSrcweir 
32cdf0e10cSrcweir #include <assert.h>
33cdf0e10cSrcweir 
34cdf0e10cSrcweir namespace /* private */
35cdf0e10cSrcweir {
36cdf0e10cSrcweir 
37cdf0e10cSrcweir 	//######################################################
38cdf0e10cSrcweir 	/*  Extracts the local part of tag without
39cdf0e10cSrcweir 		namespace decoration e.g. meta:creator -> creator */
40cdf0e10cSrcweir 	const XML_Char COLON = (XML_Char)':';
41cdf0e10cSrcweir 
get_local_name(const XML_Char * rawname)42cdf0e10cSrcweir 	const XML_Char* get_local_name(const XML_Char* rawname)
43cdf0e10cSrcweir 	{
44cdf0e10cSrcweir 		const XML_Char* p = rawname;
45cdf0e10cSrcweir 
46cdf0e10cSrcweir 		// go to the end
47cdf0e10cSrcweir 		while (*p) p++;
48cdf0e10cSrcweir 
49cdf0e10cSrcweir 		// go back until the first ':'
50cdf0e10cSrcweir 		while (*p != COLON && p > rawname)
51cdf0e10cSrcweir 			p--;
52cdf0e10cSrcweir 
53cdf0e10cSrcweir 		// if we are on a colon one step forward
54cdf0e10cSrcweir 		if (*p == COLON)
55cdf0e10cSrcweir 			p++;
56cdf0e10cSrcweir 
57cdf0e10cSrcweir 		return p;
58cdf0e10cSrcweir 	}
59cdf0e10cSrcweir 
60cdf0e10cSrcweir 	//################################################
get_parser_instance(void * data)61cdf0e10cSrcweir 	inline xml_parser* get_parser_instance(void* data)
62cdf0e10cSrcweir 	{
63cdf0e10cSrcweir 		return reinterpret_cast<xml_parser*>(XML_GetUserData(
64cdf0e10cSrcweir 			reinterpret_cast<XML_Parser>(data)));
65cdf0e10cSrcweir 	}
66cdf0e10cSrcweir 
67cdf0e10cSrcweir     //################################################
has_only_whitespaces(const XML_Char * s,int len)68cdf0e10cSrcweir     bool has_only_whitespaces(const XML_Char* s, int len)
69cdf0e10cSrcweir     {
70cdf0e10cSrcweir         const XML_Char* p = s;
71cdf0e10cSrcweir         for (int i = 0; i < len; i++)
72cdf0e10cSrcweir             if (*p++ != ' ') return false;
73cdf0e10cSrcweir         return true;
74cdf0e10cSrcweir     }
75cdf0e10cSrcweir }
76cdf0e10cSrcweir 
77cdf0e10cSrcweir //###################################################
xml_parser(const XML_Char * EncodingName)78cdf0e10cSrcweir xml_parser::xml_parser(const XML_Char* EncodingName) :
79cdf0e10cSrcweir 	document_handler_(0),
80cdf0e10cSrcweir 	xml_parser_(XML_ParserCreate(EncodingName))
81cdf0e10cSrcweir {
82cdf0e10cSrcweir 	init();
83cdf0e10cSrcweir }
84cdf0e10cSrcweir 
85cdf0e10cSrcweir //###################################################
~xml_parser()86cdf0e10cSrcweir xml_parser::~xml_parser()
87cdf0e10cSrcweir {
88cdf0e10cSrcweir 	XML_ParserFree(xml_parser_);
89cdf0e10cSrcweir }
90cdf0e10cSrcweir 
91cdf0e10cSrcweir //###################################################
92cdf0e10cSrcweir /* Callback functions will be called by the parser on
93cdf0e10cSrcweir    different events */
94cdf0e10cSrcweir 
95cdf0e10cSrcweir //###################################################
96cdf0e10cSrcweir extern "C"
97cdf0e10cSrcweir {
98cdf0e10cSrcweir 
xml_start_element_handler(void * UserData,const XML_Char * name,const XML_Char ** atts)99cdf0e10cSrcweir static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts)
100cdf0e10cSrcweir {
101cdf0e10cSrcweir 	assert(UserData != NULL);
102cdf0e10cSrcweir 
103cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
104cdf0e10cSrcweir 
105cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
106cdf0e10cSrcweir 	if (pDocHdl)
107cdf0e10cSrcweir 	{
108cdf0e10cSrcweir 		xml_tag_attribute_container_t attributes;
109cdf0e10cSrcweir 
110cdf0e10cSrcweir 		int i = 0;
111cdf0e10cSrcweir 
112cdf0e10cSrcweir 		while(atts[i])
113cdf0e10cSrcweir 		{
114cdf0e10cSrcweir 			attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]);
115cdf0e10cSrcweir 			i += 2; // skip to next pair
116cdf0e10cSrcweir 		}
117cdf0e10cSrcweir 
118cdf0e10cSrcweir 		pDocHdl->start_element(
119cdf0e10cSrcweir 			reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes);
120cdf0e10cSrcweir 	}
121cdf0e10cSrcweir }
122cdf0e10cSrcweir 
123cdf0e10cSrcweir //###################################################
xml_end_element_handler(void * UserData,const XML_Char * name)124cdf0e10cSrcweir static void xml_end_element_handler(void* UserData, const XML_Char* name)
125cdf0e10cSrcweir {
126cdf0e10cSrcweir 	assert(UserData);
127cdf0e10cSrcweir 
128cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
129cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
130cdf0e10cSrcweir 	if (pDocHdl)
131cdf0e10cSrcweir 		pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)));
132cdf0e10cSrcweir }
133cdf0e10cSrcweir 
134cdf0e10cSrcweir //###################################################
xml_character_data_handler(void * UserData,const XML_Char * s,int len)135cdf0e10cSrcweir static void xml_character_data_handler(void* UserData, const XML_Char* s, int len)
136cdf0e10cSrcweir {
137cdf0e10cSrcweir 	assert(UserData);
138cdf0e10cSrcweir 
139cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
140cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
141cdf0e10cSrcweir 	if (pDocHdl)
142cdf0e10cSrcweir     {
143cdf0e10cSrcweir         if (has_only_whitespaces(s,len))
144cdf0e10cSrcweir             pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len));
145cdf0e10cSrcweir         else
146cdf0e10cSrcweir             pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len));
147cdf0e10cSrcweir     }
148cdf0e10cSrcweir }
149cdf0e10cSrcweir 
150cdf0e10cSrcweir //###################################################
xml_comment_handler(void * UserData,const XML_Char * Data)151cdf0e10cSrcweir static void xml_comment_handler(void* UserData, const XML_Char* Data)
152cdf0e10cSrcweir {
153cdf0e10cSrcweir 	assert(UserData);
154cdf0e10cSrcweir 
155cdf0e10cSrcweir 	xml_parser* pImpl  = get_parser_instance(UserData);
156cdf0e10cSrcweir     i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler();
157cdf0e10cSrcweir 	if (pDocHdl)
158cdf0e10cSrcweir 		pDocHdl->comment(reinterpret_cast<const char_t*>(Data));
159cdf0e10cSrcweir }
160cdf0e10cSrcweir 
161cdf0e10cSrcweir } // extern "C"
162cdf0e10cSrcweir 
163cdf0e10cSrcweir //###################################################
init()164cdf0e10cSrcweir void xml_parser::init()
165cdf0e10cSrcweir {
166cdf0e10cSrcweir 	XML_SetUserData(xml_parser_, this);
167cdf0e10cSrcweir 
168cdf0e10cSrcweir 	// we use the parser as handler argument,
169cdf0e10cSrcweir 	// so we could use it if necessary, the
170cdf0e10cSrcweir 	// UserData are usable anyway using
171cdf0e10cSrcweir 	// XML_GetUserData(...)
172cdf0e10cSrcweir 	XML_UseParserAsHandlerArg(xml_parser_);
173cdf0e10cSrcweir 
174cdf0e10cSrcweir 	XML_SetElementHandler(
175cdf0e10cSrcweir 		xml_parser_,
176cdf0e10cSrcweir 		xml_start_element_handler,
177cdf0e10cSrcweir 		xml_end_element_handler);
178cdf0e10cSrcweir 
179cdf0e10cSrcweir 	XML_SetCharacterDataHandler(
180cdf0e10cSrcweir 		xml_parser_,
181cdf0e10cSrcweir 		xml_character_data_handler);
182cdf0e10cSrcweir 
183cdf0e10cSrcweir 	XML_SetCommentHandler(
184cdf0e10cSrcweir 		xml_parser_,
185cdf0e10cSrcweir 		xml_comment_handler);
186cdf0e10cSrcweir }
187cdf0e10cSrcweir 
188cdf0e10cSrcweir //###################################################
parse(const char * XmlData,size_t Length,bool IsFinal)189cdf0e10cSrcweir void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal)
190cdf0e10cSrcweir {
191cdf0e10cSrcweir 	if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal))
192cdf0e10cSrcweir 		throw xml_parser_exception(
193cdf0e10cSrcweir 			(char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)),
194cdf0e10cSrcweir 			(int)XML_GetErrorCode(xml_parser_),
195cdf0e10cSrcweir 			XML_GetCurrentLineNumber(xml_parser_),
196cdf0e10cSrcweir 			XML_GetCurrentColumnNumber(xml_parser_),
197cdf0e10cSrcweir 			XML_GetCurrentByteIndex(xml_parser_));
198cdf0e10cSrcweir }
199cdf0e10cSrcweir 
200cdf0e10cSrcweir //###################################################
set_document_handler(i_xml_parser_event_handler * event_handler)201cdf0e10cSrcweir void xml_parser::set_document_handler(
202cdf0e10cSrcweir 	i_xml_parser_event_handler* event_handler)
203cdf0e10cSrcweir {
204cdf0e10cSrcweir 	document_handler_ = event_handler;
205cdf0e10cSrcweir }
206cdf0e10cSrcweir 
207cdf0e10cSrcweir //###################################################
get_document_handler() const208cdf0e10cSrcweir i_xml_parser_event_handler* xml_parser::get_document_handler() const
209cdf0e10cSrcweir {
210cdf0e10cSrcweir 	return document_handler_;
211cdf0e10cSrcweir }
212