xref: /aoo4110/main/l10ntools/source/tagtest.cxx (revision b1cdbd2c)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_l10ntools.hxx"
26 #include <tools/string.hxx>
27 #include "tagtest.hxx"
28 
29 #if OSL_DEBUG_LEVEL > 1
30 #include <stdio.h>
31 #endif
32 
33 #include "gsicheck.hxx"
34 
35 #define HAS_FLAG( nFlags, nFlag )		( ( nFlags & nFlag ) != 0 )
36 #define SET_FLAG( nFlags, nFlag )		( nFlags |= nFlag )
37 #define RESET_FLAG( nFlags, nFlag )		( nFlags &= ~nFlag )	// ~ = Bitweises NOT
38 
39 
40 
TokenInfo(TokenId pnId,sal_uInt16 nP,String paStr,ParserMessageList & rErrorList)41 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList )
42 : bClosed(sal_False)
43 , bCloseTag(sal_False)
44 , bIsBroken(sal_False)
45 , bHasBeenFixed(sal_False)
46 , bDone(sal_False)
47 , aTokenString( paStr )
48 , nId( pnId )
49 , nPos(nP)
50 {
51     if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND )
52         SplitTag( rErrorList );
53 }
54 
55 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR };
56 
57 /*
58                                                       \<  link  href  =  \"text\"  name  =  \"C\"  \>
59 START               ' ' ->  HAS_TAG_NAME
60 START               '/' ->  CLOSED
61 START               '/' ->  CLOSETAG    - no Portion (starting with /)
62 START               '>' ->  FINISHED
63 HAS_TAG_NAME        '=' ->  HAS_PROP_NAME_EQ
64 HAS_TAG_NAME        ' ' ->  HAS_PROP_NAME_SP
65 HAS_TAG_NAME        '/' ->  CLOSED
66 HAS_TAG_NAME        '>' ->  FINISHED
67 HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
68 HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
69 HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
70 HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
71 INSIDE_STRING       ' ' ->  INSIDE_STRING
72 INSIDE_STRING       '=' ->  INSIDE_STRING
73 INSIDE_STRING       '>' ->  INSIDE_STRING
74 INSIDE_STRING       '"' ->  PROP_FINISHED
75 PROP_FINISHED       ' ' ->  HAS_TAG_NAME
76 PROP_FINISHED       '/' ->  CLOSED
77 PROP_FINISHED       '>' ->  FINISHED
78 CLOSED              ' ' ->  CLOSED_SPACE
79 CLOSED              '>' ->  FINISHED
80 CLOSED_SPACE        '>' ->  FINISHED
81 
82 CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
83 CLOSETAG            '>' ->  FINISHED
84 CLOSETAG_HAS_TAG_NAME  '>' ->  FINISHED
85 
86 */
SplitTag(ParserMessageList & rErrorList)87 void TokenInfo::SplitTag( ParserMessageList &rErrorList )
88 {
89     sal_uInt16 nLastPos = 2;    // skip initial  \<
90     sal_uInt16 nCheckPos = nLastPos;
91     String aDelims( String::CreateFromAscii( " \\=>/" ) );
92     String aPortion;
93     String aValue;      // store the value of a property
94     ByteString aName;   // store the name of a property/tag
95     sal_Bool bCheckName = sal_False;
96     sal_Bool bCheckEmpty = sal_False;
97     sal_Unicode cDelim;
98     tagcheck aState = TC_START;
99 
100     // skip blanks
101     while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
102         nLastPos++;
103 
104     nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
105     while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) )
106     {
107         aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos );
108 
109         if ( aTokenString.GetChar( nCheckPos ) == '\\' )
110             nCheckPos++;
111 
112         cDelim = aTokenString.GetChar( nCheckPos );
113         nCheckPos++;
114 
115         switch ( aState )
116         {
117 //            START           ' ' ->  HAS_TAG_NAME
118 //            START           '/' ->  CLOSED
119 //            START           '>' ->  FINISHED
120             case TC_START:
121                 aTagName = aPortion;
122                 switch ( cDelim )
123                 {
124                     case ' ':  aState = TC_HAS_TAG_NAME;
125                                bCheckName = sal_True;
126                                break;
127                     case '/':
128                         {
129                             if ( aPortion.Len() == 0 )
130                             {
131                                 aState = TC_CLOSETAG;
132                             }
133                             else
134                             {
135                                 aState = TC_CLOSED;
136                                 bCheckName = sal_True;
137                             }
138                         }
139                         break;
140                     case '>':  aState = TC_FINISHED;
141                                bCheckName = sal_True;
142                                break;
143                     default:   aState = TC_ERROR;
144                 }
145                 break;
146 
147 //            HAS_TAG_NAME    '=' ->  HAS_PROP_NAME_EQ
148 //            HAS_TAG_NAME    ' ' ->  HAS_PROP_NAME_SP
149 //            HAS_TAG_NAME    '/' ->  CLOSED
150 //            HAS_TAG_NAME    '>' ->  FINISHED
151             case TC_HAS_TAG_NAME:
152                 switch ( cDelim )
153                 {
154                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
155                                bCheckName = sal_True;
156                                break;
157                     case ' ':  aState = TC_HAS_PROP_NAME_SP;
158                                bCheckName = sal_True;
159                                break;
160                     case '/':  aState = TC_CLOSED;
161                                bCheckEmpty = sal_True;
162                                break;
163                     case '>':  aState = TC_FINISHED;
164                                bCheckEmpty = sal_True;
165                                break;
166                     default:   aState = TC_ERROR;
167                 }
168                 break;
169 
170 //            HAS_PROP_NAME_SP    '=' ->  HAS_PROP_NAME_EQ
171             case TC_HAS_PROP_NAME_SP:
172                 switch ( cDelim )
173                 {
174                     case '=':  aState = TC_HAS_PROP_NAME_EQ;
175                                bCheckEmpty = sal_True;
176                                break;
177                     default:   aState = TC_ERROR;
178                 }
179                 break;
180 
181 //            HAS_PROP_NAME_EQ    ' ' ->  HAS_PROP_NAME_EQ_SP
182 //            HAS_PROP_NAME_EQ    '"' ->  INSIDE_STRING
183             case TC_HAS_PROP_NAME_EQ:
184                 switch ( cDelim )
185                 {
186                     case ' ':  aState = TC_HAS_PROP_NAME_EQ_SP;
187                                bCheckEmpty = sal_True;
188                                break;
189                     case '\"': aState = TC_INSIDE_STRING;
190                                bCheckEmpty = sal_True;
191                                aValue.Erase();
192                                break;
193                     default:   aState = TC_ERROR;
194                 }
195                 break;
196 
197 //            HAS_PROP_NAME_EQ_SP '"' ->  INSIDE_STRING
198             case TC_HAS_PROP_NAME_EQ_SP:
199                 switch ( cDelim )
200                 {
201                     case '\"': aState = TC_INSIDE_STRING;
202                                bCheckEmpty = sal_True;
203                                aValue.Erase();
204                                break;
205                     default:   aState = TC_ERROR;
206                 }
207                 break;
208 
209 //            INSIDE_STRING    *  ->  INSIDE_STRING
210 //            INSIDE_STRING   '"' ->  PROP_FINISHED
211             case TC_INSIDE_STRING:
212                 switch ( cDelim )
213                 {
214                     case '\"':
215                         {
216                             aState = TC_PROP_FINISHED;
217                             aValue += aPortion;
218                             if ( aProperties.find( aName ) == aProperties.end() )
219                             {
220                                 if ( !IsPropertyValueValid( aName, aValue ) )
221                                 {
222                                     rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this );
223                                     bIsBroken = sal_True;
224                                 }
225                                 aProperties[ aName ] = aValue;
226                             }
227                             else
228                             {
229                                 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this );
230                                 bIsBroken = sal_True;
231                             }
232                         }
233                                break;
234                     default:
235                         {
236                             aState = TC_INSIDE_STRING;
237                             aValue += aPortion;
238                             aValue += cDelim;
239                         }
240                 }
241                 break;
242 
243 //            PROP_FINISHED   ' ' ->  HAS_TAG_NAME
244 //            PROP_FINISHED   '/' ->  CLOSED
245 //            PROP_FINISHED   '>' ->  FINISHED
246             case TC_PROP_FINISHED:
247                 switch ( cDelim )
248                 {
249                     case ' ': aState = TC_HAS_TAG_NAME;
250                                bCheckEmpty = sal_True;
251                                break;
252                     case '/': aState = TC_CLOSED;
253                                bCheckEmpty = sal_True;
254                                break;
255                     case '>': aState = TC_FINISHED;
256                                bCheckEmpty = sal_True;
257                                break;
258                     default:   aState = TC_ERROR;
259                 }
260                 break;
261 
262 //            CLOSED          ' ' ->  CLOSED_SPACE
263 //            CLOSED          '>' ->  FINISHED
264             case TC_CLOSED:
265                 switch ( cDelim )
266                 {
267                     case ' ': aState = TC_CLOSED_SPACE;
268                                bCheckEmpty = sal_True;
269                                bClosed = sal_True;
270                                break;
271                     case '>': aState = TC_FINISHED;
272                                bCheckEmpty = sal_True;
273                                break;
274                     default:   aState = TC_ERROR;
275                 }
276                 break;
277 
278 //            CLOSED_SPACE    '>' ->  FINISHED
279             case TC_CLOSED_SPACE:
280                 switch ( cDelim )
281                 {
282                     case '>': aState = TC_FINISHED;
283                                bCheckEmpty = sal_True;
284                                break;
285                     default:   aState = TC_ERROR;
286                 }
287                 break;
288 
289 // CLOSETAG            ' ' ->  CLOSETAG_HAS_TAG_NAME
290 // CLOSETAG            '>' ->  FINISHED
291             case TC_CLOSETAG:
292                 bCloseTag = sal_True;
293                 switch ( cDelim )
294                 {
295                     case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME;
296                                aTagName = aPortion;
297                                bCheckName = sal_True;
298                                break;
299                     case '>': aState = TC_FINISHED;
300                                aTagName = aPortion;
301                                bCheckName = sal_True;
302                                break;
303                     default:   aState = TC_ERROR;
304                 }
305                 break;
306 
307 // CLOSETAG_HAS_TAG_NAME       '>' ->  FINISHED
308             case TC_CLOSETAG_HAS_TAG_NAME:
309                 switch ( cDelim )
310                 {
311                     case '>': aState = TC_FINISHED;
312                                bCheckEmpty = sal_True;
313                                break;
314                     default:   aState = TC_ERROR;
315                 }
316                 break;
317 
318 
319             default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this );
320                      bIsBroken = sal_True;
321 
322         }
323 
324         if ( bCheckName )
325         {
326             if ( aPortion.Len() == 0 )
327             {
328                 rErrorList.AddError( 25, "Tag/Property name missing ", *this );
329                 bIsBroken = sal_True;
330             }
331             else
332             {
333                 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 );
334                 // "a-zA-Z_-.0-9"
335                 xub_StrLen nCount;
336                 sal_Bool bBroken = sal_False;
337                 const sal_Char* aBuf = aName.GetBuffer();
338                 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ )
339                 {
340                     bBroken = ! (   ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' )
341                                 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' )
342                                 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' )
343                                 ||( aBuf[nCount] == '_' )
344                                 ||( aBuf[nCount] == '-' )
345                                 ||( aBuf[nCount] == '.' )
346                                 );
347                 }
348 
349                 if ( bBroken )
350                 {
351                     rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this );
352                     bIsBroken = sal_True;
353                 }
354             }
355 
356             bCheckName = sal_False;
357         }
358 
359         if ( bCheckEmpty )
360         {
361             if ( aPortion.Len() )
362             {
363                 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this );
364                 bIsBroken = sal_True;
365             }
366             bCheckEmpty = sal_False;
367         }
368 
369 
370         nLastPos = nCheckPos;
371 
372         // skip further blanks
373         if ( cDelim == ' ' && aState != TC_INSIDE_STRING )
374             while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
375                 nLastPos++;
376 
377         nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
378     }
379     if ( aState != TC_FINISHED )
380     {
381         rErrorList.AddError( 25, "Parsing error in Tag ", *this );
382         bIsBroken = sal_True;
383     }
384 }
385 
IsPropertyRelevant(const ByteString & aName,const String & aValue) const386 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const
387 {
388     if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) )
389         return sal_False;
390     if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") )
391         return sal_False;
392     if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) )
393         return sal_False;
394 
395     return sal_True;
396 }
397 
IsPropertyValueValid(const ByteString & aName,const String & aValue) const398 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const
399 {
400 /*  removed due to i56740
401     if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) )
402     {
403         return aValue.EqualsAscii("sys") ||
404                aValue.EqualsAscii("appl") ||
405                aValue.EqualsAscii("distrib");
406     } */
407     if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) )
408     {
409         return /*!aValue.EqualsAscii("OS2") &&  removed due to i56740 */
410                !aValue.EqualsAscii("");
411     }
412 
413     // we don't know any better so we assume it to be OK
414     return sal_True;
415 }
416 
IsPropertyInvariant(const ByteString & aName,const String & aValue) const417 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const
418 {
419     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) )
420         return sal_False;
421     if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) )
422     {   // check for external reference
423         if (  aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" )
424            || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" )
425            || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) )
426             return sal_False;
427         else
428             return sal_True;
429     }
430     return sal_True;
431 }
432 
IsPropertyFixable(const ByteString & aName) const433 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const
434 {
435     // name everything that is allowed to be fixed automatically here
436     if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" ))
437       || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ))
438       || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" ))
439       || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" ))
440       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" ))
441       || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) ))
442         return sal_True;
443     return sal_False;
444 }
445 
MatchesTranslation(TokenInfo & rInfo,sal_Bool bGenErrors,ParserMessageList & rErrorList,sal_Bool bFixTags) const446 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const
447 {
448     // check if tags are equal
449     // check if all existing properties are in the translation as well and
450     // wether they have a matching content (the same in most cases)
451 
452     if ( nId != rInfo.nId )
453         return sal_False;
454 
455     if ( !aTagName.Equals( rInfo.aTagName ) )
456         return sal_False;
457 
458     // If one of the tags has formating errors already it does make no sense to check here, so return right away
459     if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) )
460         return sal_True;
461 
462 	StringHashMap::const_iterator iProp;
463 	for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
464     {
465         if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() )
466         {
467             if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) )
468             {
469                 if ( IsPropertyInvariant( iProp->first, iProp->second ) )
470                 {
471                     if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) )
472                     {
473                         if ( bGenErrors )
474                         {
475                             if ( bFixTags && IsPropertyFixable( iProp->first ) )
476                             {
477                                 rInfo.aProperties.find( iProp->first )->second = iProp->second;
478                                 rInfo.SetHasBeenFixed();
479                                 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this );
480                             }
481                             else
482                                 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this );
483                         }
484                         else return sal_False;
485                     }
486                 }
487             }
488         }
489         else
490         {
491             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
492             {
493                 if ( bGenErrors )
494                     rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this );
495                 else return sal_False;
496             }
497         }
498 	}
499 	for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp )
500     {
501         if ( aProperties.find( iProp->first ) == aProperties.end() )
502         {
503             if ( IsPropertyRelevant( iProp->first, iProp->second ) )
504             {
505                 if ( bGenErrors )
506                     rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo );
507                 else return sal_False;
508             }
509         }
510 	}
511 
512     // if we reach here eather
513     //   the tags match completely or
514     //   the tags match but not the properties and we generated errors for that
515     return sal_True;
516 }
517 
GetTagName() const518 String TokenInfo::GetTagName() const
519 {
520     return aTagName;
521 }
522 
MakeTag() const523 String TokenInfo::MakeTag() const
524 {
525     String aRet;
526     aRet.AppendAscii("\\<");
527     if ( bCloseTag )
528         aRet.AppendAscii("/");
529     aRet.Append( GetTagName() );
530 	StringHashMap::const_iterator iProp;
531 
532 	for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
533     {
534         aRet.AppendAscii(" ");
535         aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) );
536         aRet.AppendAscii("=\\\"");
537         aRet.Append( iProp->second );
538         aRet.AppendAscii("\\\"");
539     }
540     if ( bClosed )
541         aRet.AppendAscii("/");
542     aRet.AppendAscii("\\>");
543     return aRet;
544 }
545 
546 
AddError(sal_uInt16 nErrorNr,ByteString aErrorText,const TokenInfo & rTag)547 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
548 {
549     Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND );
550 }
551 
AddWarning(sal_uInt16 nErrorNr,ByteString aErrorText,const TokenInfo & rTag)552 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
553 {
554     Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND );
555 }
556 
HasErrors()557 sal_Bool ParserMessageList::HasErrors()
558 {
559     sal_uInt16 i;
560     for ( i=0 ; i < Count() ; i++ )
561         if ( GetObject( i )->IsError() )
562             return sal_True;
563     return sal_False;
564 }
565 
566 struct Tag
567 {
GetNameTag568     String GetName() const { return String::CreateFromAscii( pName ); };
569 	const char* pName;
570 	TokenId nTag;
571 };
572 
573 
574 static const Tag aKnownTags[] =
575 {
576 /*  commenting oldstyle tags
577 //	{ "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
578 	{ "<#BOLD>", TAG_BOLDON },
579 	{ "<#/BOLD>", TAG_BOLDOFF },
580 	{ "<#ITALIC>", TAG_ITALICON },
581 	{ "<#/ITALIC>", TAG_ITALICOFF },
582 	{ "<#UNDER>", TAG_UNDERLINEON },
583 	{ "<#/UNDER>", TAG_UNDERLINEOFF },
584 
585 //	{ "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
586 	{ "<#HELPID>", TAG_HELPID },
587 	{ "<#MODIFY>", TAG_MODIFY },
588 	{ "<#REFNR>", TAG_REFNR },
589 
590 //	{ "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
591 	{ "<#NAME>", TAG_NAME },
592 	{ "<#HREF>", TAG_HREF },
593 	{ "<#AVIS>", TAG_AVIS },
594 	{ "<#AHID>", TAG_AHID },
595 	{ "<#AEND>", TAG_AEND },
596 
597 	{ "<#TITEL>", TAG_TITEL },
598 	{ "<#KEY>", TAG_KEY },
599 	{ "<#INDEX>", TAG_INDEX },
600 
601 	{ "<#REFSTART>", TAG_REFSTART },
602 
603 	{ "<#GRAPHIC>", TAG_GRAPHIC },
604 	{ "<#NEXTVERSION>", TAG_NEXTVERSION },
605 
606     //	{ "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
607 	{ "<#WIN>", TAG_WIN },
608 	{ "<#UNIX>", TAG_UNIX },
609 	{ "<#MAC>", TAG_MAC },
610 	{ "<#OS2>", TAG_OS2 },
611 
612 //	{ "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
613 	{ "<#WRITER>", TAG_WRITER },
614 	{ "<#CALC>", TAG_CALC },
615 	{ "<#DRAW>", TAG_DRAW },
616 	{ "<#IMPRESS>", TAG_IMPRESS },
617 	{ "<#SCHEDULE>", TAG_SCHEDULE },
618 	{ "<#IMAGE>", TAG_IMAGE },
619 	{ "<#MATH>", TAG_MATH },
620 	{ "<#CHART>", TAG_CHART },
621 	{ "<#OFFICE>", TAG_OFFICE },
622   */
623 //	{ "<#TAG_GROUP_META>", TAG_GROUP_META },
624 	{ "$[officefullname]", TAG_OFFICEFULLNAME },
625 	{ "$[officename]", TAG_OFFICENAME },
626 	{ "$[officepath]", TAG_OFFICEPATH },
627 	{ "$[officeversion]", TAG_OFFICEVERSION },
628 	{ "$[portalname]", TAG_PORTALNAME },
629 	{ "$[portalfullname]", TAG_PORTALFULLNAME },
630 	{ "$[portalpath]", TAG_PORTALPATH },
631 	{ "$[portalversion]", TAG_PORTALVERSION },
632 	{ "$[portalshortname]", TAG_PORTALSHORTNAME },
633 /*  commenting oldstyle tags
634 //	{ "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
635 	{ "<#REFINSERT>", TAG_REFINSERT },
636 
637 //	{ "<#GROUP_MULTI>", TAG_GROUP_MULTI },
638 	{ "<#END>", TAG_END },
639 	{ "<#ELSE>", TAG_ELSE },
640 	{ "<#VERSIONEND>", TAG_VERSIONEND },
641 	{ "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
642 	{ "<Common Tag>", TAG_COMMONSTART },
643 	{ "</Common Tag>", TAG_COMMONEND },
644 
645     { "<no more tags>", TAG_NOMORETAGS },
646 	{ "", TAG_UNKNOWN_TAG },
647 };
648 
649 
SimpleParser()650 SimpleParser::SimpleParser()
651 : nPos( 0 )
652 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS )
653 {
654 }
655 
Parse(String PaSource)656 void SimpleParser::Parse( String PaSource )
657 {
658     aSource = PaSource;
659 	nPos = 0;
660 	aLastToken.Erase();
661     aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
662 	aTokenList.Clear();
663 };
664 
GetNextToken(ParserMessageList & rErrorList)665 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList )
666 {
667     TokenInfo aResult;
668     sal_uInt16 nTokenStartPos = 0;
669     if ( aNextTag.nId != TAG_NOMORETAGS )
670     {
671         aResult = aNextTag;
672         aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
673     }
674     else
675     {
676 	    aLastToken = GetNextTokenString( rErrorList, nTokenStartPos );
677 	    if ( aLastToken.Len() == 0 )
678 		    return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
679 
680         // do we have a \< ... \> style tag?
681         if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) )
682         {
683             // check for paired \" \"
684             bool bEven = true;
685             sal_uInt16 nQuotePos = 0;
686     	    sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" );
687     	    sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" );    // this is only to kick out quoted backslashes
688             while ( nQuotedQuotesPos != STRING_NOTFOUND )
689             {
690                 if ( nQuotedBackPos <= nQuotedQuotesPos )
691                     nQuotePos = nQuotedBackPos+2;
692                 else
693                 {
694                     nQuotePos = nQuotedQuotesPos+2;
695                     bEven = !bEven;
696                 }
697 		        nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos );
698     	        nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos );    // this is only to kick out quoted backslashes
699             }
700             if ( !bEven )
701             {
702         		rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) );
703             }
704 
705             // check if we have an end-tag or a start-tag
706             sal_uInt16 nNonBlankStartPos,nNonBlankEndPos;
707             nNonBlankStartPos = 2;
708             while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' )
709                 nNonBlankStartPos++;
710             if ( aLastToken.GetChar(nNonBlankStartPos) == '/' )
711                 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList );
712             else
713             {
714                 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList );
715                 nNonBlankEndPos = aLastToken.Len() -3;
716                 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' )
717                     nNonBlankEndPos--;
718                 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' )
719                     aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList );
720             }
721         }
722         else
723         {
724 	        sal_uInt16 i = 0;
725 	        while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
726 		        aLastToken != aKnownTags[i].GetName() )
727 		        i++;
728             aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos );
729         }
730     }
731 
732     if ( aResult.nId == TAG_UNKNOWN_TAG )
733         aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken );
734 	aTokenList.Insert( aResult, LIST_APPEND );
735 	return aResult;
736 }
737 
GetNextTokenString(ParserMessageList & rErrorList,sal_uInt16 & rTagStartPos)738 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos )
739 {
740 //	sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos );
741 	sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos );
742 	sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos );
743 	sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos );    // this is only to kick out quoted backslashes
744 
745     rTagStartPos = 0;
746 
747 /* removing since a \<... is not likely
748     // check if the tag starts with a letter to avoid things like <> <= ... >
749     while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) )
750     	nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 );
751 */
752     if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos )
753         return String();  // no more tokens
754 
755 	if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos )  // <= to make sure \\ is always handled first
756     {   // Skip quoted Backslash
757         nPos = nStyle4StartPos +2;
758         return GetNextTokenString( rErrorList, rTagStartPos );
759     }
760 
761 /*	if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos )  // <= to make sure our spechial tags are recognized before all others
762     {	// test for <# ... > style tokens
763 	    sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos );
764         if ( nEndPos == STRING_NOTFOUND )
765         {   // Token is incomplete. Skip start and search for better ones
766             nPos = nStyle1StartPos +2;
767             return GetNextTokenString( rErrorList, rTagStartPos );
768         }
769 	    nPos = nEndPos;
770         rTagStartPos = nStyle1StartPos;
771 	    return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii();
772     }
773     else*/ if ( nStyle2StartPos < nStyle3StartPos )
774 	{	// test for $[ ... ] style tokens
775 		sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos);
776         if ( nEndPos == STRING_NOTFOUND )
777         {   // Token is incomplete. Skip start and search for better ones
778             nPos = nStyle2StartPos +2;
779             return GetNextTokenString( rErrorList, rTagStartPos );
780         }
781 		nPos = nEndPos;
782         rTagStartPos = nStyle2StartPos;
783 		return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 );
784 	}
785     else
786 	{	// test for \< ... \> style tokens
787     	sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos);
788     	sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos );    // this is only to kick out quoted backslashes
789         while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND )
790         {
791 		    nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2);
792     	    nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 );    // this is only to kick out quoted backslashes
793         }
794         if ( nEndPos == STRING_NOTFOUND )
795         {   // Token is incomplete. Skip start and search for better ones
796             nPos = nStyle3StartPos +2;
797 		    ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " );
798         	rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) );
799             return GetNextTokenString( rErrorList, rTagStartPos );
800         }
801         // check for paired quoted "    -->   \"sometext\"
802 
803         nPos = nEndPos;
804         rTagStartPos = nStyle3StartPos;
805 		return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 );
806 	}
807 }
808 
GetLexem(TokenInfo const & aToken)809 String SimpleParser::GetLexem( TokenInfo const &aToken )
810 {
811     if ( aToken.aTokenString.Len() )
812         return aToken.aTokenString;
813     else
814     {
815         sal_uInt16 i = 0;
816 	    while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
817 		    aKnownTags[i].nTag != aToken.nId )
818 		    i++;
819 
820         return aKnownTags[i].GetName();
821     }
822 }
823 
TokenParser()824 TokenParser::TokenParser()
825 : pErrorList( NULL )
826 {}
827 
Parse(const String & aCode,ParserMessageList * pList)828 void TokenParser::Parse( const String &aCode, ParserMessageList* pList )
829 {
830     pErrorList = pList;
831 
832 	//Scanner initialisieren
833 	aParser.Parse( aCode );
834 
835 	//erstes Symbol holen
836 	aTag = aParser.GetNextToken( *pErrorList );
837 
838 	nPfCaseOptions = 0;
839 	nAppCaseOptions = 0;
840 	bPfCaseActive = sal_False;
841 	bAppCaseActive = sal_False;
842 
843 	nActiveRefTypes = 0;
844 
845     //Ausfuehren der Start-Produktion
846 	Paragraph();
847 
848 	//Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
849 	//kein Fehler aufgetreten
850 	//=> es wurde ein einleitendes Tag vergessen
851 	if ( aTag.nId != TAG_NOMORETAGS )
852 	{
853 		switch ( aTag.nId )
854 		{
855 			case TAG_END:
856 				{
857 					ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag );
858 				}
859 				break;
860 			case TAG_BOLDOFF:
861 				{
862 					ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag );
863 				}
864 				break;
865 			case TAG_ITALICOFF:
866 				{
867 					ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag );
868 				}
869 				break;
870 			case TAG_UNDERLINEOFF:
871 				{
872 					ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag );
873 				}
874 				break;
875 /*			case TAG_MISSPARENTHESIS:
876 				{
877 					ParseError( 14, "missing closing parenthesis '>'", aTag );
878 				}
879 				break;*/
880 			case TAG_AEND:
881 				{
882 					ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag );
883 				}
884 				break;
885 			case TAG_ELSE:
886 				{
887 					ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag );
888 				}
889 				break;
890 			case TAG_UNKNOWN_TAG:
891 				{
892 					ParseError( 6, "unknown Tag", aTag );
893 				}
894 				break;
895 			default:
896 				{
897 					ParseError( 6, "unexpected Tag", aTag );
898 				}
899 		}
900 	}
901     pErrorList = NULL;
902 }
903 
Paragraph()904 void TokenParser::Paragraph()
905 {
906 	switch ( aTag.nId )
907 	{
908 		case TAG_GRAPHIC:
909 		case TAG_NEXTVERSION:
910 			{
911 				TagRef();
912 				Paragraph();
913 			}
914 			break;
915 		case TAG_AVIS:
916 		case TAG_AHID:
917 			{
918 				TagRef();
919 				Paragraph();
920 			}
921 			break;
922 		case TAG_HELPID:
923 			{
924 				SimpleTag();
925 				Paragraph();
926 			}
927 			break;
928 		case TAG_OFFICEFULLNAME:
929 		case TAG_OFFICENAME:
930 		case TAG_OFFICEPATH:
931 		case TAG_OFFICEVERSION:
932 		case TAG_PORTALNAME:
933 		case TAG_PORTALFULLNAME:
934 		case TAG_PORTALPATH:
935 		case TAG_PORTALVERSION:
936 		case TAG_PORTALSHORTNAME:
937 			{
938 				SimpleTag();
939 				Paragraph();
940 			}
941 			break;
942 		case TAG_REFINSERT:
943 			{
944 				SimpleTag();
945 				Paragraph();
946 			}
947 			break;
948 		case TAG_BOLDON:
949 		case TAG_ITALICON:
950 		case TAG_UNDERLINEON:
951 		case TAG_COMMONSTART:
952 			{
953 				TagPair();
954 				Paragraph();
955 			}
956 			break;
957 		case TAG_HREF:
958 		case TAG_NAME:
959 		case TAG_KEY:
960 		case TAG_INDEX:
961         case TAG_TITEL:
962         case TAG_REFSTART:
963             {
964 				TagRef();
965 				Paragraph();
966 			}
967 			break;
968 		case TAG_OS2:
969 		case TAG_WIN:
970 		case TAG_UNIX:
971 		case TAG_MAC: //...
972 			{
973 				if ( ! bPfCaseActive )
974 				{
975                     //PfCases duerfen nicht verschachtelt sein:
976 					bPfCaseActive = sal_True;
977 					PfCase();
978 
979 					//So jetzt kann wieder ein PfCase kommen:
980 					bPfCaseActive = sal_False;
981 					Paragraph();
982 				}
983 			}
984 			break;
985 		case TAG_WRITER:
986 		case TAG_CALC:
987 		case TAG_DRAW:
988 		case TAG_IMPRESS:
989 		case TAG_SCHEDULE:
990 		case TAG_IMAGE:
991 		case TAG_MATH:
992 		case TAG_CHART:
993 		case TAG_OFFICE:
994 			{
995 				if ( !bAppCaseActive )
996 				{
997                     //AppCases duerfen nicht verschachtelt sein:
998 					bAppCaseActive = sal_True;
999 					AppCase();
1000 
1001                     //jetzt koennen wieder AppCases kommen:
1002 					bAppCaseActive = sal_False;
1003 					Paragraph();
1004 				}
1005 			}
1006 			break;
1007 
1008 		//Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1009 			//nichts tun wg. epsilon-Prod.
1010 	}
1011 }
1012 
PfCase()1013 void TokenParser::PfCase()
1014 {
1015 
1016 	//Produktion:
1017 	//PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1018 
1019 	PfCaseBegin();
1020 
1021 	//Jetzt ist eine PfCase-Produktion aktiv:
1022 	Paragraph();
1023 	switch ( aTag.nId )
1024 	{
1025 		case TAG_ELSE:
1026 		case TAG_END:
1027 			{
1028 				CaseEnd();
1029 			}
1030 			break;
1031 		case TAG_OS2:
1032 		case TAG_WIN:
1033 		case TAG_UNIX:
1034 		case TAG_MAC: //First (PfBegin)
1035 			{
1036 				PfCase();
1037 			}
1038 			break;
1039 		default:
1040 			ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag );
1041 	}
1042     //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1043 	nPfCaseOptions = 0;
1044 }
1045 
PfCaseBegin()1046 void TokenParser::PfCaseBegin()
1047 {
1048 	switch ( aTag.nId )
1049 	{
1050 		case TAG_OS2:
1051 		case TAG_WIN:
1052 		case TAG_UNIX:
1053 		case TAG_MAC:
1054 			{
1055 				//Token darf noch nicht vorgekommen sein im
1056 				//aktuellen Plattform-Case:
1057 				if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1058 				{
1059 					SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) );
1060 					match( aTag, aTag );
1061 				}
1062 				else {
1063 					ParseError( 9, "Tag defined twice in the same platform-case", aTag );
1064 				}
1065 			}
1066 	}
1067 }
1068 
AppCase()1069 void TokenParser::AppCase()
1070 {
1071 
1072 	//Produktion:
1073 	//AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1074 
1075 
1076 	AppCaseBegin();
1077 
1078 	Paragraph();
1079 
1080 	switch ( aTag.nId )
1081 	{
1082 		case TAG_ELSE:
1083 		case TAG_END:
1084 			{
1085 				CaseEnd();
1086 			}
1087 			break;
1088 		case TAG_WRITER:
1089 		case TAG_DRAW:
1090 		case TAG_CALC:
1091 		case TAG_IMAGE:
1092 		case TAG_MATH:
1093 		case TAG_CHART:
1094 		case TAG_OFFICE:
1095 		case TAG_IMPRESS:
1096 		case TAG_SCHEDULE: //First (AppBegin)
1097 			{
1098 				AppCase();
1099 			}
1100 			break;
1101 		default:
1102 			ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag );
1103 		}
1104 
1105     //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1106 	nAppCaseOptions = 0;
1107 }
1108 
AppCaseBegin()1109 void TokenParser::AppCaseBegin()
1110 {
1111 	switch ( aTag.nId )
1112 	{
1113 		case TAG_WRITER:
1114 		case TAG_DRAW:
1115 		case TAG_CALC:
1116 		case TAG_IMAGE:
1117 		case TAG_MATH:
1118 		case TAG_CHART:
1119 		case TAG_OFFICE:
1120 		case TAG_IMPRESS:
1121 		case TAG_SCHEDULE:
1122 			{
1123 				//Token darf noch nicht vorgekommen sein im
1124 				//aktuellen Plattform-Case:
1125 				if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1126 				{
1127 					SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) );
1128 					match( aTag, aTag );
1129 				}
1130 				else {
1131 					ParseError( 13, "Tag defined twice in the same application-case.", aTag );
1132 				}
1133 			}
1134 	}
1135 }
1136 
CaseEnd()1137 void TokenParser::CaseEnd()
1138 {
1139 	//Produktion:
1140 	//CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1141 
1142 	switch ( aTag.nId )
1143 	{
1144 		case TAG_ELSE:
1145 		{
1146 			match( aTag, TAG_ELSE );
1147 			Paragraph();
1148 			match( aTag, TAG_END );
1149 		}
1150 		break;
1151 		case TAG_END:
1152 		{
1153 			match( aTag, TAG_END );
1154 		}
1155 		break;
1156 		default:
1157 			ParseError( 2, "<#ELSE> or <#END> expected.", aTag );
1158 	}
1159 }
1160 
SimpleTag()1161 void TokenParser::SimpleTag()
1162 {
1163 
1164 	switch ( aTag.nId )
1165 	{
1166 		case TAG_HELPID:
1167 			{
1168 				match( aTag, TAG_HELPID );
1169     		}
1170 			break;
1171 		case TAG_OFFICEFULLNAME:
1172 		case TAG_OFFICENAME:
1173 		case TAG_OFFICEPATH:
1174 		case TAG_OFFICEVERSION:
1175 		case TAG_PORTALNAME:
1176 		case TAG_PORTALFULLNAME:
1177 		case TAG_PORTALPATH:
1178 		case TAG_PORTALVERSION:
1179 		case TAG_PORTALSHORTNAME:
1180 
1181         case TAG_REFINSERT:
1182 			{
1183 				match( aTag, aTag );
1184     		}
1185 			break;
1186 		default:
1187 			ParseError( 15, "[<#SimpleTag>] expected.", aTag );
1188 	}
1189 }
1190 
TagPair()1191 void TokenParser::TagPair()
1192 {
1193 	switch ( aTag.nId )
1194 	{
1195 		case TAG_BOLDON:
1196 			{
1197 				match( aTag, TAG_BOLDON );
1198 				Paragraph();
1199 				match( aTag, TAG_BOLDOFF );
1200 			}
1201 			break;
1202 		case TAG_ITALICON:
1203 			{
1204 				match( aTag, TAG_ITALICON );
1205 				Paragraph();
1206 				match( aTag, TAG_ITALICOFF );
1207 			}
1208 			break;
1209 		case TAG_UNDERLINEON:
1210 			{
1211 				match( aTag, TAG_UNDERLINEON );
1212 				Paragraph();
1213 				match( aTag, TAG_UNDERLINEOFF );
1214 			}
1215 			break;
1216 		case TAG_COMMONSTART:
1217 			{
1218                 //remember tag so we can give the original tag in case of an error
1219                 TokenInfo aEndTag( aTag );
1220                 aEndTag.nId = TAG_COMMONEND;
1221 				match( aTag, TAG_COMMONSTART );
1222 				Paragraph();
1223 				match( aTag, aEndTag );
1224 			}
1225 			break;
1226 		default:
1227 			ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag );
1228 	}
1229 }
1230 
1231 
TagRef()1232 void TokenParser::TagRef()
1233 {
1234 	switch ( aTag.nId )
1235 	{
1236 		case TAG_GRAPHIC:
1237 		case TAG_NEXTVERSION:
1238             {
1239 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1240 				{
1241 					TokenId aThisToken = aTag.nId;
1242 					SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1243 					match( aTag, aTag );
1244 					Paragraph();
1245                     if ( aThisToken == TAG_GRAPHIC )
1246     					match( aTag, TAG_ENDGRAPHIC );
1247                     else
1248     					match( aTag, TAG_VERSIONEND );
1249                     // don't reset since alowed only once per paragraph
1250 					// RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1251 				}
1252 				else
1253 				{
1254 					ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag );
1255 				}
1256             }
1257 			break;
1258 		case TAG_AVIS:
1259 		case TAG_AHID:
1260 			{
1261 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1262 				{
1263 					TokenId aThisToken = aTag.nId;
1264 					SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1265 					match( aTag, aTag );
1266 					Paragraph();
1267 					match( aTag, TAG_AEND );
1268 					RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1269 				}
1270 				else
1271 				{
1272 					ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag );
1273 				}
1274 			}
1275 			break;
1276 		case TAG_HREF:
1277 		case TAG_NAME:
1278 			{
1279 
1280 			}
1281 			// NOBREAK
1282 		case TAG_KEY:
1283 		case TAG_INDEX:
1284 		case TAG_TITEL:
1285         case TAG_REFSTART:
1286 			{
1287 				if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1288 				{
1289 					TokenId aThisToken = aTag.nId;
1290 					match( aTag, aTag );
1291 					if ( aThisToken != TAG_NAME )
1292 					{	// TAG_NAME has no TAG_END
1293 						SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1294 						Paragraph();
1295 						match( aTag, TAG_END );
1296 						RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1297 					}
1298 				}
1299 				else
1300 				{
1301 					ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag );
1302 				}
1303 			}
1304 			break;
1305 		default:
1306 			ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag );
1307 	}
1308 }
1309 
match(const TokenInfo & aCurrentToken,const TokenId & aExpectedToken)1310 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken )
1311 {
1312     return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) );
1313 }
1314 
match(const TokenInfo & aCurrentToken,const TokenInfo & rExpectedToken)1315 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken )
1316 {
1317     TokenInfo aExpectedToken( rExpectedToken );
1318 	if ( aCurrentToken.nId == aExpectedToken.nId )
1319 	{
1320         if ( ( aCurrentToken.nId == TAG_COMMONEND
1321                && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) )
1322              || aCurrentToken.nId != TAG_COMMONEND )
1323         {
1324 		    aTag = aParser.GetNextToken( *pErrorList );
1325 		    return sal_True;
1326         }
1327 	}
1328 
1329     if ( aExpectedToken.nId == TAG_COMMONEND )
1330     {
1331         aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 );
1332     }
1333 
1334     ByteString sTmp( "Expected Symbol" );
1335     if ( aCurrentToken.nId == TAG_NOMORETAGS )
1336     {
1337 		ParseError( 7, sTmp, aExpectedToken );
1338     }
1339     else
1340     {
1341 		sTmp += ": ";
1342 		sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 );
1343 		sTmp += " near ";
1344 		ParseError( 7, sTmp, aCurrentToken );
1345     }
1346 	return sal_False;
1347 }
1348 
ParseError(sal_uInt16 nErrNr,ByteString aErrMsg,const TokenInfo & rTag)1349 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag )
1350 {
1351 	pErrorList->AddError( nErrNr, aErrMsg, rTag);
1352 
1353     // Das Fehlerhafte Tag ueberspringen
1354 	aTag = aParser.GetNextToken( *pErrorList );
1355 }
1356 
1357 
ParserMessage(sal_uInt16 PnErrorNr,ByteString PaErrorText,const TokenInfo & rTag)1358 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag )
1359 		: nErrorNr( PnErrorNr )
1360 		, aErrorText( PaErrorText )
1361 		, nTagBegin( 0 )
1362 		, nTagLength( 0 )
1363 {
1364     String aLexem( SimpleParser::GetLexem( rTag ) );
1365     aErrorText.Append(": ");
1366 	aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 );
1367     if ( rTag.nId == TAG_NOMORETAGS )
1368         aErrorText.Append(" at end of line ");
1369     else if ( rTag.nPos != TOK_INVALIDPOS )
1370     {
1371         aErrorText.Append(" at Position ");
1372         aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) );
1373     }
1374     nTagBegin = rTag.nPos;
1375 	nTagLength = aLexem.Len();
1376 }
1377 
ParserError(sal_uInt16 ErrorNr,ByteString ErrorText,const TokenInfo & rTag)1378 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1379 : ParserMessage( ErrorNr, ErrorText, rTag )
1380 {}
1381 
ParserWarning(sal_uInt16 ErrorNr,ByteString ErrorText,const TokenInfo & rTag)1382 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1383 : ParserMessage( ErrorNr, ErrorText, rTag )
1384 {}
1385 
IsTagMandatory(TokenInfo const & aToken,TokenId & aMetaTokens)1386 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens )
1387 {
1388 	TokenId aTokenId = aToken.nId;
1389 	TokenId aTokenGroup = TAG_GROUP( aTokenId );
1390 	if ( TAG_GROUP_PROGSWITCH == aTokenGroup
1391 		|| TAG_REFINSERT == aTokenId
1392 		|| TAG_REFSTART == aTokenId
1393 		|| TAG_NAME == aTokenId
1394 		|| TAG_HREF == aTokenId
1395 		|| TAG_AVIS == aTokenId
1396 		|| TAG_AHID == aTokenId
1397 		|| TAG_GRAPHIC == aTokenId
1398 		|| TAG_NEXTVERSION == aTokenId
1399         || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) )
1400     {
1401         if ( TAG_GROUP_META == aTokenGroup )
1402             aMetaTokens |= aTokenId;
1403         return sal_True;
1404     }
1405     else if (   TAG_COMMONSTART == aTokenId
1406 		     || TAG_COMMONEND == aTokenId )
1407     {
1408         String aTagName = aToken.GetTagName();
1409         return !(aTagName.EqualsIgnoreCaseAscii( "comment" )
1410               || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" )
1411               || aTagName.EqualsIgnoreCaseAscii( "emph" )
1412               || aTagName.EqualsIgnoreCaseAscii( "item" )
1413               || aTagName.EqualsIgnoreCaseAscii( "br" ) );
1414     }
1415     return sal_False;
1416 }
1417 
CheckTags(TokenList & aReference,TokenList & aTestee,sal_Bool bFixTags)1418 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags )
1419 {
1420 	sal_uLong i=0,j=0;
1421 	// Clean old Warnings
1422 	while ( aCompareWarningList.Count() )
1423 	{
1424 		delete aCompareWarningList.GetCurObject();
1425 		aCompareWarningList.Remove();
1426 	}
1427 
1428     /* in xml tags, do not require the following tags
1429         comment
1430         bookmark_value
1431         emph
1432         item
1433         br
1434     */
1435 
1436     // filter uninteresting Tags
1437     TokenId aMetaTokens = 0;
1438 	for ( i=0 ; i < aReference.Count() ; i++ )
1439 	{
1440 		if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) )
1441 			aReference.GetObject( i ).SetDone();
1442 	}
1443 
1444     aMetaTokens = 0;
1445 	for ( i=0 ; i < aTestee.Count() ; i++ )
1446 	{
1447 		if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) )
1448 			aTestee.GetObject( i ).SetDone();
1449 	}
1450 
1451     // remove all matching tags
1452 	for ( i=0 ; i < aReference.Count() ; i++ )
1453     {
1454         if ( aReference.GetObject( i ).IsDone() )
1455             continue;
1456 
1457     	sal_Bool bTagFound = sal_False;
1458     	for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1459         {
1460             if ( aTestee.GetObject( j ).IsDone() )
1461                 continue;
1462 
1463 		    if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) )
1464             {
1465 			    aReference.GetObject( i ).SetDone();
1466 			    aTestee.GetObject( j ).SetDone();
1467                 bTagFound = sal_True;
1468             }
1469         }
1470     }
1471 
1472     sal_Bool bCanFix = sal_True;
1473 
1474     if ( bFixTags )
1475     {
1476         // we fix only if its a really simple case
1477         sal_uInt16 nTagCount = 0;
1478 	    for ( i=0 ; i < aReference.Count() ; i++ )
1479             if ( !aReference.GetObject( i ).IsDone() )
1480                 nTagCount++;
1481         if ( nTagCount > 1 )
1482             bCanFix = sal_False;
1483 
1484         nTagCount = 0;
1485 	    for ( i=0 ; i < aTestee.Count() ; i++ )
1486             if ( !aTestee.GetObject( i ).IsDone() )
1487                 nTagCount++;
1488         if ( nTagCount > 1 )
1489             bCanFix = sal_False;
1490     }
1491 
1492     // generate errors for tags that have differing attributes
1493 	for ( i=0 ; i < aReference.Count() ; i++ )
1494     {
1495         if ( aReference.GetObject( i ).IsDone() )
1496             continue;
1497 
1498     	sal_Bool bTagFound = sal_False;
1499     	for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1500         {
1501             if ( aTestee.GetObject( j ).IsDone() )
1502                 continue;
1503 
1504 		    if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) )
1505             {
1506 			    aReference.GetObject( i ).SetDone();
1507 			    aTestee.GetObject( j ).SetDone();
1508                 bTagFound = sal_True;
1509             }
1510         }
1511     }
1512 
1513     // list remaining tags as errors
1514 	for ( i=0 ; i < aReference.Count() ; i++ )
1515     {
1516         if ( aReference.GetObject( i ).IsDone() )
1517             continue;
1518 
1519         aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) );
1520     }
1521 	for ( i=0 ; i < aTestee.Count() ; i++ )
1522     {
1523         if ( aTestee.GetObject( i ).IsDone() )
1524             continue;
1525 
1526 		aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) );
1527     }
1528 
1529 	for ( i=0 ; i < aReference.Count() ; i++ )
1530         aReference.GetObject( i ).SetDone( sal_False );
1531 
1532 	for ( i=0 ; i < aTestee.Count() ; i++ )
1533         aTestee.GetObject( i ).SetDone( sal_False );
1534 }
1535 
CheckReference(GSILine * aReference)1536 void LingTest::CheckReference( GSILine *aReference )
1537 {
1538 	aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() );
1539 }
1540 
CheckTestee(GSILine * aTestee,sal_Bool bHasSourceLine,sal_Bool bFixTags)1541 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags )
1542 {
1543     aFixedTestee = aTestee->GetUText();
1544 	aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() );
1545 
1546     if ( bHasSourceLine )
1547 	    CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags );
1548 
1549     if ( bFixTags )
1550     {
1551         TokenList& aTesteeTokens = aTesteeParser.GetTokenList();
1552         sal_Bool bFixesDone = sal_False;
1553         // count backwards to allow replacing from right to left
1554         int i;
1555 	    for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- )
1556         {
1557             if ( aTesteeTokens.GetObject( i ).HasBeenFixed() )
1558             {
1559                 bFixesDone = sal_True;
1560                 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() );
1561             }
1562         }
1563         if ( bFixesDone )
1564         {
1565             aTestee->SetUText( aFixedTestee );
1566             aTestee->SetFixed();
1567         }
1568     }
1569 }
1570 
1571