1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_shell.hxx" 30 31 #ifndef XML_PARSER_HXX_INCLUDED 32 #include "internal/xml_parser.hxx" 33 #endif 34 #include "internal/i_xml_parser_event_handler.hxx" 35 36 #include <assert.h> 37 38 namespace /* private */ 39 { 40 41 //###################################################### 42 /* Extracts the local part of tag without 43 namespace decoration e.g. meta:creator -> creator */ 44 const XML_Char COLON = (XML_Char)':'; 45 46 const XML_Char* get_local_name(const XML_Char* rawname) 47 { 48 const XML_Char* p = rawname; 49 50 // go to the end 51 while (*p) p++; 52 53 // go back until the first ':' 54 while (*p != COLON && p > rawname) 55 p--; 56 57 // if we are on a colon one step forward 58 if (*p == COLON) 59 p++; 60 61 return p; 62 } 63 64 //################################################ 65 inline xml_parser* get_parser_instance(void* data) 66 { 67 return reinterpret_cast<xml_parser*>(XML_GetUserData( 68 reinterpret_cast<XML_Parser>(data))); 69 } 70 71 //################################################ 72 bool has_only_whitespaces(const XML_Char* s, int len) 73 { 74 const XML_Char* p = s; 75 for (int i = 0; i < len; i++) 76 if (*p++ != ' ') return false; 77 return true; 78 } 79 } 80 81 //################################################### 82 xml_parser::xml_parser(const XML_Char* EncodingName) : 83 document_handler_(0), 84 xml_parser_(XML_ParserCreate(EncodingName)) 85 { 86 init(); 87 } 88 89 //################################################### 90 xml_parser::~xml_parser() 91 { 92 XML_ParserFree(xml_parser_); 93 } 94 95 //################################################### 96 /* Callback functions will be called by the parser on 97 different events */ 98 99 //################################################### 100 extern "C" 101 { 102 103 static void xml_start_element_handler(void* UserData, const XML_Char* name, const XML_Char** atts) 104 { 105 assert(UserData != NULL); 106 107 xml_parser* pImpl = get_parser_instance(UserData); 108 109 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 110 if (pDocHdl) 111 { 112 xml_tag_attribute_container_t attributes; 113 114 int i = 0; 115 116 while(atts[i]) 117 { 118 attributes[reinterpret_cast<const char_t*>(get_local_name(atts[i]))] = reinterpret_cast<const char_t*>(atts[i+1]); 119 i += 2; // skip to next pair 120 } 121 122 pDocHdl->start_element( 123 reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name)), attributes); 124 } 125 } 126 127 //################################################### 128 static void xml_end_element_handler(void* UserData, const XML_Char* name) 129 { 130 assert(UserData); 131 132 xml_parser* pImpl = get_parser_instance(UserData); 133 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 134 if (pDocHdl) 135 pDocHdl->end_element(reinterpret_cast<const char_t*>(name), reinterpret_cast<const char_t*>(get_local_name(name))); 136 } 137 138 //################################################### 139 static void xml_character_data_handler(void* UserData, const XML_Char* s, int len) 140 { 141 assert(UserData); 142 143 xml_parser* pImpl = get_parser_instance(UserData); 144 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 145 if (pDocHdl) 146 { 147 if (has_only_whitespaces(s,len)) 148 pDocHdl->ignore_whitespace(string_t(reinterpret_cast<const char_t*>(s), len)); 149 else 150 pDocHdl->characters(string_t(reinterpret_cast<const char_t*>(s), len)); 151 } 152 } 153 154 //################################################### 155 static void xml_comment_handler(void* UserData, const XML_Char* Data) 156 { 157 assert(UserData); 158 159 xml_parser* pImpl = get_parser_instance(UserData); 160 i_xml_parser_event_handler* pDocHdl = pImpl->get_document_handler(); 161 if (pDocHdl) 162 pDocHdl->comment(reinterpret_cast<const char_t*>(Data)); 163 } 164 165 } // extern "C" 166 167 //################################################### 168 void xml_parser::init() 169 { 170 XML_SetUserData(xml_parser_, this); 171 172 // we use the parser as handler argument, 173 // so we could use it if necessary, the 174 // UserData are usable anyway using 175 // XML_GetUserData(...) 176 XML_UseParserAsHandlerArg(xml_parser_); 177 178 XML_SetElementHandler( 179 xml_parser_, 180 xml_start_element_handler, 181 xml_end_element_handler); 182 183 XML_SetCharacterDataHandler( 184 xml_parser_, 185 xml_character_data_handler); 186 187 XML_SetCommentHandler( 188 xml_parser_, 189 xml_comment_handler); 190 } 191 192 //################################################### 193 void xml_parser::parse(const char* XmlData, size_t Length, bool IsFinal) 194 { 195 if (0 == XML_Parse(xml_parser_, XmlData, Length, IsFinal)) 196 throw xml_parser_exception( 197 (char*)XML_ErrorString(XML_GetErrorCode(xml_parser_)), 198 (int)XML_GetErrorCode(xml_parser_), 199 XML_GetCurrentLineNumber(xml_parser_), 200 XML_GetCurrentColumnNumber(xml_parser_), 201 XML_GetCurrentByteIndex(xml_parser_)); 202 } 203 204 //################################################### 205 void xml_parser::set_document_handler( 206 i_xml_parser_event_handler* event_handler) 207 { 208 document_handler_ = event_handler; 209 } 210 211 //################################################### 212 i_xml_parser_event_handler* xml_parser::get_document_handler() const 213 { 214 return document_handler_; 215 } 216