1*ab595ff6SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*ab595ff6SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*ab595ff6SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*ab595ff6SAndrew Rist * distributed with this work for additional information 6*ab595ff6SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*ab595ff6SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*ab595ff6SAndrew Rist * "License"); you may not use this file except in compliance 9*ab595ff6SAndrew Rist * with the License. You may obtain a copy of the License at 10*ab595ff6SAndrew Rist * 11*ab595ff6SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*ab595ff6SAndrew Rist * 13*ab595ff6SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*ab595ff6SAndrew Rist * software distributed under the License is distributed on an 15*ab595ff6SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*ab595ff6SAndrew Rist * KIND, either express or implied. See the License for the 17*ab595ff6SAndrew Rist * specific language governing permissions and limitations 18*ab595ff6SAndrew Rist * under the License. 19*ab595ff6SAndrew Rist * 20*ab595ff6SAndrew Rist *************************************************************/ 21*ab595ff6SAndrew Rist 22*ab595ff6SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir 25cdf0e10cSrcweir #include <parse.hxx> 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <string.h> 28cdf0e10cSrcweir #include <iostream> 29cdf0e10cSrcweir #include <xmlelem.hxx> 30cdf0e10cSrcweir 31cdf0e10cSrcweir #if (_MSC_VER >=1400) 32cdf0e10cSrcweir #pragma warning(disable:4365) 33cdf0e10cSrcweir #endif 34cdf0e10cSrcweir 35cdf0e10cSrcweir #ifdef UNX 36cdf0e10cSrcweir #define strnicmp strncasecmp 37cdf0e10cSrcweir #endif 38cdf0e10cSrcweir 39cdf0e10cSrcweir 40cdf0e10cSrcweir 41cdf0e10cSrcweir // NOT FULLY DEFINED SERVICES 42cdf0e10cSrcweir 43cdf0e10cSrcweir 44cdf0e10cSrcweir 45cdf0e10cSrcweir #define AssertionOf(x) \ 46cdf0e10cSrcweir {if (!(x)) {std::cerr << "Assertion failed: " << #x << __FILE__ << __LINE__ << std::endl; exit(3); }} 47cdf0e10cSrcweir 48cdf0e10cSrcweir 49cdf0e10cSrcweir 50cdf0e10cSrcweir X2CParser::X2CParser( XmlElement & o_rDocumentData ) 51cdf0e10cSrcweir : // sFileName, 52cdf0e10cSrcweir nFileLine(0), 53cdf0e10cSrcweir pDocumentData(&o_rDocumentData), 54cdf0e10cSrcweir // sWord, 55cdf0e10cSrcweir text(0) 56cdf0e10cSrcweir { 57cdf0e10cSrcweir } 58cdf0e10cSrcweir 59cdf0e10cSrcweir X2CParser::~X2CParser() 60cdf0e10cSrcweir { 61cdf0e10cSrcweir } 62cdf0e10cSrcweir 63cdf0e10cSrcweir 64cdf0e10cSrcweir bool 65cdf0e10cSrcweir X2CParser::LoadFile( const char * i_sFilename ) 66cdf0e10cSrcweir { 67cdf0e10cSrcweir sFileName = i_sFilename; 68cdf0e10cSrcweir nFileLine = 1; 69cdf0e10cSrcweir 70cdf0e10cSrcweir // Load file: 71cdf0e10cSrcweir if ( ! LoadXmlFile( aFile, i_sFilename ) ) 72cdf0e10cSrcweir return false; 73cdf0e10cSrcweir 74cdf0e10cSrcweir // Test correct end: 75cdf0e10cSrcweir const char * pLastTag = strrchr(aFile.operator const char *(),'<'); 76cdf0e10cSrcweir if (pLastTag == 0) 77cdf0e10cSrcweir return false; 78cdf0e10cSrcweir if ( strnicmp(pLastTag+2, pDocumentData->Name().str(), pDocumentData->Name().l()) != 0 79cdf0e10cSrcweir || strnicmp(pLastTag, "</", 2) != 0 ) 80cdf0e10cSrcweir return false; 81cdf0e10cSrcweir if (strchr(pLastTag,'>') == 0) 82cdf0e10cSrcweir return false; 83cdf0e10cSrcweir return true; 84cdf0e10cSrcweir } 85cdf0e10cSrcweir 86cdf0e10cSrcweir void 87cdf0e10cSrcweir X2CParser::Parse() 88cdf0e10cSrcweir { 89cdf0e10cSrcweir // Parse: 90cdf0e10cSrcweir text = aFile.operator const char *(); 91cdf0e10cSrcweir 92cdf0e10cSrcweir Parse_XmlDeclaration(); 93cdf0e10cSrcweir Parse_Doctype(); 94cdf0e10cSrcweir 95cdf0e10cSrcweir pDocumentData->Parse(*this); 96cdf0e10cSrcweir } 97cdf0e10cSrcweir 98cdf0e10cSrcweir bool 99cdf0e10cSrcweir X2CParser::Parse( const char * i_sFilename ) 100cdf0e10cSrcweir { 101cdf0e10cSrcweir bool ret = LoadFile(i_sFilename); 102cdf0e10cSrcweir if (ret) 103cdf0e10cSrcweir Parse(); 104cdf0e10cSrcweir return ret; 105cdf0e10cSrcweir } 106cdf0e10cSrcweir 107cdf0e10cSrcweir void 108cdf0e10cSrcweir X2CParser::Parse_XmlDeclaration() 109cdf0e10cSrcweir { 110cdf0e10cSrcweir Goto('<'); 111cdf0e10cSrcweir if ( IsText("<?xml") ) 112cdf0e10cSrcweir { 113cdf0e10cSrcweir Goto_And_Pass('>'); 114cdf0e10cSrcweir } 115cdf0e10cSrcweir } 116cdf0e10cSrcweir 117cdf0e10cSrcweir void 118cdf0e10cSrcweir X2CParser::Parse_Doctype() 119cdf0e10cSrcweir { 120cdf0e10cSrcweir Goto('<'); 121cdf0e10cSrcweir if ( IsText("<!DOCTYPE") ) 122cdf0e10cSrcweir Goto_And_Pass('>'); 123cdf0e10cSrcweir } 124cdf0e10cSrcweir 125cdf0e10cSrcweir void 126cdf0e10cSrcweir X2CParser::Parse_Sequence( DynamicList<XmlElement> & o_rElements, 127cdf0e10cSrcweir const Simstr & i_sElementName ) 128cdf0e10cSrcweir { 129cdf0e10cSrcweir CheckAndPassBeginTag(i_sElementName.str()); 130cdf0e10cSrcweir 131cdf0e10cSrcweir unsigned int i_max = o_rElements.size(); 132cdf0e10cSrcweir for (unsigned i = 0; i < i_max; ++i) 133cdf0e10cSrcweir { 134cdf0e10cSrcweir o_rElements[i]->Parse(*this); 135cdf0e10cSrcweir } // end for 136cdf0e10cSrcweir 137cdf0e10cSrcweir CheckAndPassEndTag(i_sElementName.str()); 138cdf0e10cSrcweir } 139cdf0e10cSrcweir 140cdf0e10cSrcweir void 141cdf0e10cSrcweir X2CParser::Parse_FreeChoice( DynamicList<XmlElement> & o_rElements ) 142cdf0e10cSrcweir { 143cdf0e10cSrcweir unsigned nSize = o_rElements.size(); 144cdf0e10cSrcweir 145cdf0e10cSrcweir for ( bool bBreak = false; !bBreak; ) 146cdf0e10cSrcweir { 147cdf0e10cSrcweir bBreak = true; 148cdf0e10cSrcweir for ( unsigned i = 0; i < nSize; ++i ) 149cdf0e10cSrcweir { 150cdf0e10cSrcweir Goto('<'); 151cdf0e10cSrcweir if ( IsBeginTag(o_rElements[i]->Name().str()) ) 152cdf0e10cSrcweir { 153cdf0e10cSrcweir o_rElements[i]->Parse(*this); 154cdf0e10cSrcweir bBreak = false; 155cdf0e10cSrcweir break; 156cdf0e10cSrcweir } 157cdf0e10cSrcweir } // end for i 158cdf0e10cSrcweir } // end for !bBreak 159cdf0e10cSrcweir } 160cdf0e10cSrcweir 161cdf0e10cSrcweir void 162cdf0e10cSrcweir X2CParser::Parse_List( ListElement & o_rListElem ) 163cdf0e10cSrcweir { 164cdf0e10cSrcweir 165cdf0e10cSrcweir for ( Goto('<'); IsBeginTag(o_rListElem.Name().str()); Goto('<') ) 166cdf0e10cSrcweir { 167cdf0e10cSrcweir XmlElement * pNew = o_rListElem.Create_and_Add_NewElement(); 168cdf0e10cSrcweir pNew->Parse(*this); 169cdf0e10cSrcweir } 170cdf0e10cSrcweir } 171cdf0e10cSrcweir 172cdf0e10cSrcweir void 173cdf0e10cSrcweir X2CParser::Parse_Text( Simstr & o_sText, 174cdf0e10cSrcweir const Simstr & i_sElementName, 175cdf0e10cSrcweir bool i_bReverseName ) 176cdf0e10cSrcweir { 177cdf0e10cSrcweir 178cdf0e10cSrcweir if ( ! CheckAndPassBeginTag(i_sElementName.str()) ) 179cdf0e10cSrcweir return; 180cdf0e10cSrcweir 181cdf0e10cSrcweir // Add new Element 182cdf0e10cSrcweir GetTextTill( o_sText, '<', i_bReverseName ); 183cdf0e10cSrcweir o_sText.remove_trailing_blanks(); 184cdf0e10cSrcweir 185cdf0e10cSrcweir CheckAndPassEndTag(i_sElementName.str()); 186cdf0e10cSrcweir } 187cdf0e10cSrcweir 188cdf0e10cSrcweir void 189cdf0e10cSrcweir X2CParser::Parse_MultipleText( List<Simstr> & o_rTexts, 190cdf0e10cSrcweir const Simstr & i_sElementName, 191cdf0e10cSrcweir bool i_bReverseName ) 192cdf0e10cSrcweir { 193cdf0e10cSrcweir for ( Goto('<'); IsBeginTag(i_sElementName.str()); Goto('<') ) 194cdf0e10cSrcweir { 195cdf0e10cSrcweir Simstr sNew; 196cdf0e10cSrcweir Parse_Text(sNew, i_sElementName, i_bReverseName); 197cdf0e10cSrcweir if (sNew.l() > 0) 198cdf0e10cSrcweir o_rTexts.push_back(sNew); 199cdf0e10cSrcweir } 200cdf0e10cSrcweir } 201cdf0e10cSrcweir 202cdf0e10cSrcweir void 203cdf0e10cSrcweir X2CParser::Parse_SglAttr( Simstr & o_sAttrValue, 204cdf0e10cSrcweir const Simstr & i_sElementName, 205cdf0e10cSrcweir const Simstr & i_sAttrName ) 206cdf0e10cSrcweir { 207cdf0e10cSrcweir Goto('<'); 208cdf0e10cSrcweir if ( !IsBeginTag(i_sElementName.str()) ) 209cdf0e10cSrcweir SyntaxError("unexpected element"); 210cdf0e10cSrcweir Move( i_sElementName.l() + 1 ); 211cdf0e10cSrcweir 212cdf0e10cSrcweir Pass_White(); 213cdf0e10cSrcweir if (*text == '>') 214cdf0e10cSrcweir SyntaxError("no attribute found, where one was expected"); 215cdf0e10cSrcweir Simstr sAttrName; 216cdf0e10cSrcweir Get_Attribute(o_sAttrValue, sAttrName); 217cdf0e10cSrcweir if (sAttrName != i_sAttrName) 218cdf0e10cSrcweir SyntaxError("unknown attribute found"); 219cdf0e10cSrcweir Pass_White(); 220cdf0e10cSrcweir if (strncmp(text,"/>",2) != 0) 221cdf0e10cSrcweir SyntaxError("missing \"/>\" at end of empty element"); 222cdf0e10cSrcweir Move(2); 223cdf0e10cSrcweir } 224cdf0e10cSrcweir 225cdf0e10cSrcweir void 226cdf0e10cSrcweir X2CParser::Parse_MultipleAttr( List<Simstr> & o_rAttrValues, 227cdf0e10cSrcweir const Simstr & i_sElementName, 228cdf0e10cSrcweir const List<Simstr> & i_rAttrNames ) 229cdf0e10cSrcweir { 230cdf0e10cSrcweir Goto('<'); 231cdf0e10cSrcweir if ( !IsBeginTag(i_sElementName.str()) ) 232cdf0e10cSrcweir SyntaxError("unexpected element"); 233cdf0e10cSrcweir Move( i_sElementName.l() + 1 ); 234cdf0e10cSrcweir Simstr sAttrName; 235cdf0e10cSrcweir Simstr sAttrValue; 236cdf0e10cSrcweir unsigned nSize = i_rAttrNames.size(); 237cdf0e10cSrcweir unsigned i; 238cdf0e10cSrcweir 239cdf0e10cSrcweir for ( Pass_White(); *text != '/'; Pass_White() ) 240cdf0e10cSrcweir { 241cdf0e10cSrcweir 242cdf0e10cSrcweir Get_Attribute(sAttrValue, sAttrName); 243cdf0e10cSrcweir 244cdf0e10cSrcweir for ( i = 0; i < nSize; ++i ) 245cdf0e10cSrcweir { 246cdf0e10cSrcweir if ( i_rAttrNames[i] == sAttrName ) 247cdf0e10cSrcweir { 248cdf0e10cSrcweir o_rAttrValues[i] = sAttrValue; 249cdf0e10cSrcweir break; 250cdf0e10cSrcweir } 251cdf0e10cSrcweir } 252cdf0e10cSrcweir if (i == nSize) 253cdf0e10cSrcweir SyntaxError("unknown attribute found"); 254cdf0e10cSrcweir } 255cdf0e10cSrcweir Move(2); 256cdf0e10cSrcweir } 257cdf0e10cSrcweir 258cdf0e10cSrcweir 259cdf0e10cSrcweir void 260cdf0e10cSrcweir X2CParser::Get_Attribute( Simstr & o_rAttrValue, 261cdf0e10cSrcweir Simstr & o_rAttrName ) 262cdf0e10cSrcweir { 263cdf0e10cSrcweir GetTextTill( o_rAttrName, '='); 264cdf0e10cSrcweir 265cdf0e10cSrcweir while (*(++text) != '"') 266cdf0e10cSrcweir { 267cdf0e10cSrcweir if (*text == '\0') 268cdf0e10cSrcweir SyntaxError("unexpected end of file"); 269cdf0e10cSrcweir } 270cdf0e10cSrcweir 271cdf0e10cSrcweir ++text; 272cdf0e10cSrcweir GetTextTill( o_rAttrValue, '"'); 273cdf0e10cSrcweir ++text; 274cdf0e10cSrcweir } 275cdf0e10cSrcweir 276cdf0e10cSrcweir bool 277cdf0e10cSrcweir X2CParser::IsText( const char * i_sComparedText ) 278cdf0e10cSrcweir { 279cdf0e10cSrcweir return strnicmp( text, i_sComparedText, strlen(i_sComparedText) ) == 0; 280cdf0e10cSrcweir } 281cdf0e10cSrcweir 282cdf0e10cSrcweir bool 283cdf0e10cSrcweir X2CParser::IsBeginTag( const char * i_sTagName ) 284cdf0e10cSrcweir { 285cdf0e10cSrcweir return strnicmp( text+1, i_sTagName, strlen(i_sTagName) ) == 0 286cdf0e10cSrcweir && *text == '<'; 287cdf0e10cSrcweir } 288cdf0e10cSrcweir 289cdf0e10cSrcweir bool 290cdf0e10cSrcweir X2CParser::IsEndTag( const char * i_sTagName ) 291cdf0e10cSrcweir { 292cdf0e10cSrcweir return strnicmp( text+2, i_sTagName, strlen(i_sTagName) ) == 0 293cdf0e10cSrcweir && strnicmp( text, "</", 2 ) == 0; 294cdf0e10cSrcweir } 295cdf0e10cSrcweir 296cdf0e10cSrcweir void 297cdf0e10cSrcweir X2CParser::Goto( char i_cNext ) 298cdf0e10cSrcweir { 299cdf0e10cSrcweir while (*text != i_cNext) 300cdf0e10cSrcweir { 301cdf0e10cSrcweir TestCurChar(); 302cdf0e10cSrcweir ++text; 303cdf0e10cSrcweir } 304cdf0e10cSrcweir } 305cdf0e10cSrcweir 306cdf0e10cSrcweir void 307cdf0e10cSrcweir X2CParser::Goto_And_Pass( char i_cNext ) 308cdf0e10cSrcweir { 309cdf0e10cSrcweir Goto(i_cNext); 310cdf0e10cSrcweir ++text; 311cdf0e10cSrcweir } 312cdf0e10cSrcweir 313cdf0e10cSrcweir void 314cdf0e10cSrcweir X2CParser::Move( int i_nForward ) 315cdf0e10cSrcweir { 316cdf0e10cSrcweir text += i_nForward; 317cdf0e10cSrcweir } 318cdf0e10cSrcweir 319cdf0e10cSrcweir void 320cdf0e10cSrcweir X2CParser::Pass_White() 321cdf0e10cSrcweir { 322cdf0e10cSrcweir while (*text <= 32) 323cdf0e10cSrcweir { 324cdf0e10cSrcweir TestCurChar(); 325cdf0e10cSrcweir ++text; 326cdf0e10cSrcweir } 327cdf0e10cSrcweir } 328cdf0e10cSrcweir 329cdf0e10cSrcweir void 330cdf0e10cSrcweir X2CParser::GetTextTill( Simstr & o_rText, 331cdf0e10cSrcweir char i_cEnd, 332cdf0e10cSrcweir bool i_bReverseName ) 333cdf0e10cSrcweir { 334cdf0e10cSrcweir char * pResult = &sWord[0]; 335cdf0e10cSrcweir char * pSet; 336cdf0e10cSrcweir 337cdf0e10cSrcweir for ( pSet = pResult; 338cdf0e10cSrcweir *text != i_cEnd; 339cdf0e10cSrcweir ++text ) 340cdf0e10cSrcweir { 341cdf0e10cSrcweir TestCurChar(); 342cdf0e10cSrcweir *pSet++ = *text; 343cdf0e10cSrcweir } 344cdf0e10cSrcweir 345cdf0e10cSrcweir while ( *pResult < 33 && *pResult > 0 ) 346cdf0e10cSrcweir ++pResult; 347cdf0e10cSrcweir while ( pSet > pResult ? *(pSet-1) < 33 : false ) 348cdf0e10cSrcweir pSet--; 349cdf0e10cSrcweir *pSet = '\0'; 350cdf0e10cSrcweir 351cdf0e10cSrcweir 352cdf0e10cSrcweir if (i_bReverseName) 353cdf0e10cSrcweir { 354cdf0e10cSrcweir const unsigned int nMaxLen = 1000; 355cdf0e10cSrcweir if (strlen(pResult) < nMaxLen) 356cdf0e10cSrcweir { 357cdf0e10cSrcweir char * sBreak = strrchr(pResult,'.'); 358cdf0e10cSrcweir if (sBreak != 0) 359cdf0e10cSrcweir { 360cdf0e10cSrcweir static char sScope[nMaxLen+10]; 361cdf0e10cSrcweir static char sName[nMaxLen+10]; 362cdf0e10cSrcweir 363cdf0e10cSrcweir unsigned nScopeLen = sBreak - pResult; 364cdf0e10cSrcweir strncpy ( sScope, pResult, nScopeLen ); // STRNCPY SAFE HERE 365cdf0e10cSrcweir sScope[nScopeLen] = '\0'; 366cdf0e10cSrcweir strcpy( sName, sBreak + 1 ); // STRCPY SAFE HERE 367cdf0e10cSrcweir strcat( sName, " in " ); // STRCAT SAFE HERE 368cdf0e10cSrcweir strcat( sName, sScope ); // STRCAT SAFE HERE 369cdf0e10cSrcweir 370cdf0e10cSrcweir o_rText = sName; 371cdf0e10cSrcweir return; 372cdf0e10cSrcweir } 373cdf0e10cSrcweir } 374cdf0e10cSrcweir } // endif (i_bReverseName) 375cdf0e10cSrcweir 376cdf0e10cSrcweir o_rText = &sWord[0]; 377cdf0e10cSrcweir } 378cdf0e10cSrcweir 379cdf0e10cSrcweir bool 380cdf0e10cSrcweir X2CParser::CheckAndPassBeginTag( const char * i_sElementName ) 381cdf0e10cSrcweir { 382cdf0e10cSrcweir bool ret = true; 383cdf0e10cSrcweir Goto('<'); 384cdf0e10cSrcweir if ( ! IsBeginTag(i_sElementName) ) 385cdf0e10cSrcweir SyntaxError( "unexpected element"); 386cdf0e10cSrcweir if (IsAbsoluteEmpty()) 387cdf0e10cSrcweir ret = false; 388cdf0e10cSrcweir Goto_And_Pass('>'); 389cdf0e10cSrcweir if (ret) 390cdf0e10cSrcweir Pass_White(); 391cdf0e10cSrcweir return ret; 392cdf0e10cSrcweir } 393cdf0e10cSrcweir 394cdf0e10cSrcweir void 395cdf0e10cSrcweir X2CParser::CheckAndPassEndTag( const char * i_sElementName ) 396cdf0e10cSrcweir { 397cdf0e10cSrcweir Pass_White(); 398cdf0e10cSrcweir if ( !IsEndTag(i_sElementName) ) 399cdf0e10cSrcweir SyntaxError("missing or not matching end tag"); 400cdf0e10cSrcweir Goto_And_Pass('>'); 401cdf0e10cSrcweir } 402cdf0e10cSrcweir 403cdf0e10cSrcweir bool 404cdf0e10cSrcweir X2CParser::IsAbsoluteEmpty() const 405cdf0e10cSrcweir { 406cdf0e10cSrcweir const char * pEnd = strchr(text+1, '>'); 407cdf0e10cSrcweir if (pEnd != 0) 408cdf0e10cSrcweir { 409cdf0e10cSrcweir if ( *(pEnd-1) == '/' ) 410cdf0e10cSrcweir { 411cdf0e10cSrcweir const char * pAttr = strchr(text+1, '"'); 412cdf0e10cSrcweir if (pAttr == 0) 413cdf0e10cSrcweir return true; 414cdf0e10cSrcweir else if ( (pAttr-text) > (pEnd-text) ) 415cdf0e10cSrcweir return true; 416cdf0e10cSrcweir } 417cdf0e10cSrcweir } 418cdf0e10cSrcweir return false; 419cdf0e10cSrcweir } 420cdf0e10cSrcweir 421cdf0e10cSrcweir void 422cdf0e10cSrcweir X2CParser::SyntaxError( const char * i_sText ) 423cdf0e10cSrcweir { 424cdf0e10cSrcweir std::cerr 425cdf0e10cSrcweir << "Syntax error " 426cdf0e10cSrcweir << i_sText 427cdf0e10cSrcweir << " in file: " 428cdf0e10cSrcweir << sFileName.str() 429cdf0e10cSrcweir << " in line " 430cdf0e10cSrcweir << nFileLine 431cdf0e10cSrcweir << "." 432cdf0e10cSrcweir << std::endl; 433cdf0e10cSrcweir 434cdf0e10cSrcweir exit(3); 435cdf0e10cSrcweir } 436cdf0e10cSrcweir 437cdf0e10cSrcweir void 438cdf0e10cSrcweir X2CParser::TestCurChar() 439cdf0e10cSrcweir { 440cdf0e10cSrcweir // if (*text == '\0') 441cdf0e10cSrcweir // SyntaxError("unexpected end of file"); 442cdf0e10cSrcweir // else 443cdf0e10cSrcweir 444cdf0e10cSrcweir if (*text == '\n') 445cdf0e10cSrcweir nFileLine++; 446cdf0e10cSrcweir } 447cdf0e10cSrcweir 448cdf0e10cSrcweir 449