xref: /trunk/main/l10ntools/inc/xmlparse.hxx (revision 983d4c8a)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef BOOTSTRP_XMLPARSE_HXX
25 #define BOOTSTRP_XMLPARSE_HXX
26 
27 #include <signal.h>
28 #include <expat.h>
29 #include <rtl/ustring.hxx>
30 #include <rtl/ustrbuf.hxx>
31 #include "tools/string.hxx"
32 #include "tools/list.hxx"
33 #define ENABLE_BYTESTRING_STREAM_OPERATORS
34 #include "tools/stream.hxx"
35 #include "tools/isofallback.hxx"
36 #include "export.hxx"
37 #include "xmlutil.hxx"
38 
39 #include <fstream>
40 #include <iostream>
41 
42 class XMLParentNode;
43 class XMLElement;
44 
45 
46 using namespace ::rtl;
47 using namespace std;
48 
49 #include <hash_map> /* std::hashmap*/
50 #include <deque>	/* std::deque*/
51 #include <iterator> /* std::iterator*/
52 #include <list>		/* std::list*/
53 #include <vector>	/* std::vector*/
54 #define XML_NODE_TYPE_FILE			0x001
55 #define XML_NODE_TYPE_ELEMENT		0x002
56 #define XML_NODE_TYPE_DATA			0x003
57 #define XML_NODE_TYPE_COMMENT		0x004
58 #define XML_NODE_TYPE_DEFAULT		0x005
59 #define MAX_LANGUAGES				99
60 
61 
62 //#define TESTDRIVER		/* use xml2gsi testclass */
63 //-------------------------------------------------------------------------
64 
65 /** Holds data of Attributes
66  */
67 class XMLAttribute : public String
68 {
69 private:
70 	String sValue;
71 
72 public:
73 	/// creates an attribute
XMLAttribute(const String & rName,const String & rValue)74 	XMLAttribute(
75 		const String &rName, 	// attributes name
76 		const String &rValue	// attributes data
77 	)
78 				: String( rName ), sValue( rValue ) {}
79 
80     /// getting value of an attribue
GetValue()81 	const String &GetValue() { return sValue; }
82 
setValue(const String & rValue)83     void setValue(const String &rValue){sValue=rValue;}
84 
85 	/// returns true if two attributes are equal and have the same value
IsEqual(const XMLAttribute & rAttribute)86 	sal_Bool IsEqual(
87 		const XMLAttribute &rAttribute	// the attribute which has to be equal
88 	)
89 	{
90 		return (( rAttribute == *this ) && ( rAttribute.sValue == sValue ));
91 	}
92 };
93 
94 DECLARE_LIST( XMLAttributeList, XMLAttribute * )
95 
96 //-------------------------------------------------------------------------
97 
98 /** Virtual base to handle different kinds of XML nodes
99  */
100 class XMLNode
101 {
102 protected:
XMLNode()103 	XMLNode() {}
104 
105 public:
106 	virtual sal_uInt16 GetNodeType() = 0;
~XMLNode()107     virtual ~XMLNode() {}
108 };
109 
110 //-------------------------------------------------------------------------
111 
112 /** Virtual base to handle different kinds of child nodes
113  */
114 class XMLChildNode : public XMLNode
115 {
116 private:
117 	XMLParentNode *pParent;
118 
119 protected:
120 	XMLChildNode( XMLParentNode *pPar );
XMLChildNode()121     XMLChildNode():pParent( NULL ){};
122     XMLChildNode( const XMLChildNode& obj);
123     XMLChildNode& operator=(const XMLChildNode& obj);
124 public:
125 	virtual sal_uInt16 GetNodeType() = 0;
126 
127 	/// returns the parent of this node
GetParent()128 	XMLParentNode *GetParent() { return pParent; }
~XMLChildNode()129 	virtual ~XMLChildNode(){};
130 };
131 
132 DECLARE_LIST( XMLChildNodeList, XMLChildNode * )
133 
134 //-------------------------------------------------------------------------
135 
136 /** Virtual base to handle different kinds of parent nodes
137  */
138 class XMLData;
139 
140 class XMLParentNode : public XMLChildNode
141 {
142 private:
143 	XMLChildNodeList *pChildList;
144 	static int dbgcnt;
145     //int         nParentPos;
146 protected:
XMLParentNode(XMLParentNode * pPar)147 	XMLParentNode( XMLParentNode *pPar )
148 				: XMLChildNode( pPar ), pChildList( NULL )
149               {
150 			  }
XMLParentNode()151 	XMLParentNode(): pChildList(NULL){
152 	}
153     /// Copyconstructor
154     XMLParentNode( const XMLParentNode& );
155 
156     XMLParentNode& operator=(const XMLParentNode& obj);
157     virtual ~XMLParentNode();
158 
159 
160 public:
161 	virtual sal_uInt16 GetNodeType() = 0;
162 
163 	/// returns child list of this node
GetChildList()164 	XMLChildNodeList *GetChildList() { return pChildList; }
165 
166 	/// adds a new child
167 	void AddChild(
168 		XMLChildNode *pChild  	/// the new child
169 	);
170 
171     void AddChild(
172 		XMLChildNode *pChild , int pos 	/// the new child
173 	);
174 
175     virtual int GetPosition( ByteString id );
176     int RemoveChild( XMLElement *pRefElement );
177 	void RemoveAndDeleteAllChilds();
178 
179 	/// returns a child element which matches the given one
180 	XMLElement *GetChildElement(
181 		XMLElement *pRefElement	// the reference elelement
182 	);
183 };
184 
185 //-------------------------------------------------------------------------
186 
187 DECLARE_LIST( XMLStringList, XMLElement* )
188 
189 /// Mapping numeric Language code <-> XML Element
190 typedef std::hash_map< ByteString ,XMLElement* , hashByteString,equalByteString > LangHashMap;
191 
192 /// Mapping XML Element string identifier <-> Language Map
193 typedef std::hash_map<ByteString , LangHashMap* ,
194 					  hashByteString,equalByteString>					XMLHashMap;
195 
196 /// Mapping iso alpha string code <-> iso numeric code
197 typedef std::hash_map<ByteString, int, hashByteString,equalByteString>	HashMap;
198 
199 /// Mapping XML tag names <-> have localizable strings
200 typedef std::hash_map<ByteString , sal_Bool ,
201 					  hashByteString,equalByteString>					TagMap;
202 
203 /** Holds information of a XML file, is root node of tree
204  */
205 
206 
207 class XMLFile : public XMLParentNode
208 {
209 public:
210 	XMLFile() ;
211 	XMLFile(
212 				const String &rFileName // the file name, empty if created from memory stream
213 	);
214     XMLFile( const XMLFile& obj ) ;
215     ~XMLFile();
216 
217     ByteString*	GetGroupID(std::deque<ByteString> &groupid);
218 	void 		Print( XMLNode *pCur = NULL, sal_uInt16 nLevel = 0 );
219 	virtual void SearchL10NElements( XMLParentNode *pCur, int pos = 0 );
220 	void		Extract( XMLFile *pCur = NULL );
221 	void		View();
222 //	void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo);
223 	void		showType(XMLParentNode* node);
224 
GetStrings()225 	XMLHashMap* GetStrings(){return XMLStrings;}
226 	sal_Bool 		Write( ByteString &rFilename );
227 	sal_Bool 		Write( ofstream &rStream , XMLNode *pCur = NULL );
228 
229     bool        CheckExportStatus( XMLParentNode *pCur = NULL );// , int pos = 0 );
230 
231     XMLFile&    operator=(const XMLFile& obj);
232 
233 	virtual sal_uInt16 	GetNodeType();
234 
235 	/// returns file name
GetName()236 	const String &GetName() { return sFileName; }
SetName(const String & rFilename)237     void          SetName( const String &rFilename ) { sFileName = rFilename; }
SetFullName(const String & rFullFilename)238     void          SetFullName( const String &rFullFilename ) { sFullName = rFullFilename; }
getOrder()239     const std::vector<ByteString> getOrder(){ return order; }
240 
241 protected:
242 	// writes a string as UTF8 with dos line ends to a given stream
243     void        WriteString( ofstream &rStream, const String &sString );
244 
245     // quotes the given text for writing to a file
246 	void 		QuotHTML( String &rString );
247 
248 	void		InsertL10NElement( XMLElement* pElement);
249 
250 	// DATA
251 	String 		sFileName;
252     String      sFullName;
253 
254 	const ByteString ID,OLDREF,XML_LANG;
255 
256 	TagMap		nodes_localize;
257 	XMLHashMap* XMLStrings;
258 
259     std::vector <ByteString> order;
260 };
261 
262 /// An Utility class for XML
263 /// See RFC 3066 / #i8252# for ISO codes
264 class XMLUtil{
265 
266 public:
267     /// Quot the XML characters and replace \n \t
268     static void         QuotHTML( String &rString );
269 
270     /// UnQuot the XML characters and restore \n \t
271     static void         UnQuotHTML  ( String &rString );
272 
273     /// Return the numeric iso language code
274     //sal_uInt16		        GetLangByIsoLang( const ByteString &rIsoLang );
275 
276     /// Return the alpha strings representation
277     ByteString	        GetIsoLangByIndex( sal_uInt16 nIndex );
278 
279     static XMLUtil&     Instance();
280     ~XMLUtil();
281 
282     void         dump();
283 
284 private:
285     /// Mapping iso alpha string code <-> iso numeric code
286     HashMap      lMap;
287 
288     /// Mapping iso numeric code      <-> iso alpha string code
289     ByteString	 isoArray[MAX_LANGUAGES];
290 
291     static void UnQuotData( String &rString );
292     static void UnQuotTags( String &rString );
293 
294 	XMLUtil();
295 	XMLUtil(const XMLUtil&);
296 
297 };
298 
299 
300 
301 //-------------------------------------------------------------------------
302 
303 /** Hold information of an element node
304  */
305 class XMLElement : public XMLParentNode
306 {
307 private:
308 	String sElementName;
309 	XMLAttributeList *pAttributes;
310 	ByteString 	 project,
311 			     filename,
312 			     id,
313 			     sOldRef,
314 			     resourceType,
315 			     languageId;
316     int          nPos;
317 
318 protected:
319 	void Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement);
320 public:
321 	/// create a element node
XMLElement()322 	XMLElement(){}
XMLElement(const String & rName,XMLParentNode * Parent)323     XMLElement(
324 		const String &rName, 	// the element name
325 		XMLParentNode *Parent 	// parent node of this element
326 	):			XMLParentNode( Parent ),
327 				sElementName( rName ),
328 				pAttributes( NULL ),
329 				project(""),
330 				filename(""),
331 				id(""),
332 				sOldRef(""),
333 				resourceType(""),
334 				languageId(""),
335                 nPos(0)
336    				{
337 				}
338 	~XMLElement();
339     XMLElement(const XMLElement&);
340 
341     XMLElement& operator=(const XMLElement& obj);
342 	/// returns node type XML_NODE_ELEMENT
343 	virtual sal_uInt16 GetNodeType();
344 
345 	/// returns element name
GetName()346 	const String &GetName() { return sElementName; }
347 
348 	/// returns list of attributes of this element
GetAttributeList()349 	XMLAttributeList *GetAttributeList() { return pAttributes; }
350 
351 	/// adds a new attribute to this element, typically used by parser
352 	void AddAttribute( const String &rAttribute, const String &rValue );
353 
354     void ChangeLanguageTag( const String &rValue );
355 	// Return a ASCII String representation of this object
356 	OString ToOString();
357 
358 	// Return a Unicode String representation of this object
359 	OUString ToOUString();
360 
361 	bool	Equals(OUString refStr);
362 
363 	/// returns a attribute
364 	XMLAttribute *GetAttribute(
365 		const String &rName	// the attribute name
366 	);
SetProject(ByteString prj)367 	void SetProject         ( ByteString prj        ){ project = prj;        }
SetFileName(ByteString fn)368 	void SetFileName        ( ByteString fn         ){ filename = fn;        }
SetId(ByteString theId)369 	void SetId              ( ByteString theId      ){ id = theId;           }
SetResourceType(ByteString rt)370 	void SetResourceType    ( ByteString rt         ){ resourceType = rt;    }
SetLanguageId(ByteString lid)371 	void SetLanguageId      ( ByteString lid        ){ languageId = lid;     }
SetPos(int nPos_in)372     void SetPos             ( int nPos_in           ){ nPos = nPos_in;       }
SetOldRef(ByteString sOldRef_in)373     void SetOldRef          ( ByteString sOldRef_in ){ sOldRef = sOldRef_in; }
374 
GetPos()375     virtual int        GetPos()         { return nPos;         }
GetProject()376     ByteString GetProject()     { return project;      }
GetFileName()377 	ByteString GetFileName()    { return filename;     }
GetId()378 	ByteString GetId()          { return id;           }
GetOldref()379 	ByteString GetOldref()      { return sOldRef;      }
GetResourceType()380 	ByteString GetResourceType(){ return resourceType; }
GetLanguageId()381 	ByteString GetLanguageId()  { return languageId;   }
382 
383 
384 };
385 //-------------------------------------------------------------------------
386 
387 
388 /** Holds character data
389  */
390 class XMLData : public XMLChildNode
391 {
392 private:
393 	String sData;
394     bool   isNewCreated;
395 
396 public:
397 	/// create a data node
XMLData(const String & rData,XMLParentNode * Parent)398 	XMLData(
399 		const String &rData, 	// the initial data
400 		XMLParentNode *Parent	// the parent node of this data, typically a element node
401 	)
402 				: XMLChildNode( Parent ), sData( rData ) , isNewCreated ( false ){}
XMLData(const String & rData,XMLParentNode * Parent,bool newCreated)403 	XMLData(
404 		const String &rData, 	// the initial data
405 		XMLParentNode *Parent,	// the parent node of this data, typically a element node
406         bool newCreated
407     )
408 				: XMLChildNode( Parent ), sData( rData ) , isNewCreated ( newCreated ){}
409 
410     XMLData(const XMLData& obj);
411 
412     XMLData& operator=(const XMLData& obj);
413 	virtual sal_uInt16 GetNodeType();
414 
415 	/// returns the data
GetData()416 	const String &GetData() { return sData; }
417 
isNew()418     bool isNew() { return isNewCreated; }
419     /// adds new character data to the existing one
420 	void AddData(
421 		const String &rData	// the new data
422 	);
423 
424 
425 
426 };
427 
428 //-------------------------------------------------------------------------
429 
430 /** Holds comments
431  */
432 class XMLComment : public XMLChildNode
433 {
434 private:
435 	String sComment;
436 
437 public:
438 	/// create a comment node
XMLComment(const String & rComment,XMLParentNode * Parent)439 	XMLComment(
440 		const String &rComment,	// the comment
441 		XMLParentNode *Parent	// the parent node of this comemnt, typically a element node
442 	)
443 				: XMLChildNode( Parent ), sComment( rComment ) {}
444 
445 	virtual sal_uInt16 GetNodeType();
446 
447     XMLComment( const XMLComment& obj );
448 
449     XMLComment& operator=(const XMLComment& obj);
450 
451     /// returns the comment
GetComment()452 	const String &GetComment()  { return sComment; }
453 };
454 
455 //-------------------------------------------------------------------------
456 
457 /** Holds additional file content like those for which no handler exists
458  */
459 class XMLDefault : public XMLChildNode
460 {
461 private:
462 	String sDefault;
463 
464 public:
465 	/// create a comment node
XMLDefault(const String & rDefault,XMLParentNode * Parent)466 	XMLDefault(
467 		const String &rDefault,	// the comment
468 		XMLParentNode *Parent	// the parent node of this comemnt, typically a element node
469 	)
470 				: XMLChildNode( Parent ), sDefault( rDefault ) {}
471 
472     XMLDefault(const XMLDefault& obj);
473 
474     XMLDefault& operator=(const XMLDefault& obj);
475 
476     /// returns node type XML_NODE_TYPE_COMMENT
477 	virtual sal_uInt16 GetNodeType();
478 
479 	/// returns the comment
GetDefault()480 	const String &GetDefault()  { return sDefault; }
481 };
482 
483 //-------------------------------------------------------------------------
484 
485 /** struct for error information, used by class SimpleXMLParser
486  */
487 struct XMLError {
488 	XML_Error eCode;	// the error code
489 	sal_uLong nLine;       	// error line number
490 	sal_uLong nColumn;		// error column number
491 	String sMessage;   	// readable error message
492 };
493 
494 //-------------------------------------------------------------------------
495 
496 /** validating xml parser, creates a document tree with xml nodes
497  */
498 
499 
500 class SimpleXMLParser
501 {
502 private:
503 	XML_Parser aParser;
504 	XMLError aErrorInformation;
505 
506 	XMLFile *pXMLFile;
507 	XMLParentNode *pCurNode;
508 	XMLData *pCurData;
509 
510 
511     static void StartElementHandler( void *userData, const XML_Char *name, const XML_Char **atts );
512 	static void EndElementHandler( void *userData, const XML_Char *name );
513 	static void CharacterDataHandler( void *userData, const XML_Char *s, int len );
514 	static void CommentHandler( void *userData, const XML_Char *data );
515 	static void DefaultHandler( void *userData, const XML_Char *s, int len );
516 
517 
518 	void StartElement( const XML_Char *name, const XML_Char **atts );
519 	void EndElement( const XML_Char *name );
520 	void CharacterData( const XML_Char *s, int len );
521 	void Comment( const XML_Char *data );
522 	void Default( const XML_Char *s, int len );
523 
524 
525 public:
526 	/// creates a new parser
527 	SimpleXMLParser();
528 	~SimpleXMLParser();
529 
530 	/// parse a file, returns NULL on criticall errors
531 	XMLFile *Execute(
532         const String &rFullFileName,
533         const String &rFileName,	// the file name
534         XMLFile *pXMLFileIn         // the XMLFile
535 	);
536 
537 	/// parse a memory stream, returns NULL on criticall errors
538 	XMLFile *Execute(
539 		SvMemoryStream *pStream	// the stream
540 	);
541 
542 	/// returns an error struct
GetError()543 	const XMLError &GetError() { return aErrorInformation; }
544 };
545 
546 #endif
547