1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_l10ntools.hxx"
26 #include <tools/string.hxx>
27 #include "tagtest.hxx"
28
29 #if OSL_DEBUG_LEVEL > 1
30 #include <stdio.h>
31 #endif
32
33 #include "gsicheck.hxx"
34
35 #define HAS_FLAG( nFlags, nFlag ) ( ( nFlags & nFlag ) != 0 )
36 #define SET_FLAG( nFlags, nFlag ) ( nFlags |= nFlag )
37 #define RESET_FLAG( nFlags, nFlag ) ( nFlags &= ~nFlag ) // ~ = bitwise NOT
38
39
40
TokenInfo(TokenId pnId,sal_uInt16 nP,String paStr,ParserMessageList & rErrorList)41 TokenInfo::TokenInfo( TokenId pnId, sal_uInt16 nP, String paStr, ParserMessageList &rErrorList )
42 : bClosed(sal_False)
43 , bCloseTag(sal_False)
44 , bIsBroken(sal_False)
45 , bHasBeenFixed(sal_False)
46 , bDone(sal_False)
47 , aTokenString( paStr )
48 , nId( pnId )
49 , nPos(nP)
50 {
51 if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND )
52 SplitTag( rErrorList );
53 }
54
55 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR };
56
57 /*
58 \< link href = \"text\" name = \"C\" \>
59 START ' ' -> HAS_TAG_NAME
60 START '/' -> CLOSED
61 START '/' -> CLOSETAG - no Portion (starting with /)
62 START '>' -> FINISHED
63 HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
64 HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
65 HAS_TAG_NAME '/' -> CLOSED
66 HAS_TAG_NAME '>' -> FINISHED
67 HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
68 HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
69 HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
70 HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
71 INSIDE_STRING ' ' -> INSIDE_STRING
72 INSIDE_STRING '=' -> INSIDE_STRING
73 INSIDE_STRING '>' -> INSIDE_STRING
74 INSIDE_STRING '"' -> PROP_FINISHED
75 PROP_FINISHED ' ' -> HAS_TAG_NAME
76 PROP_FINISHED '/' -> CLOSED
77 PROP_FINISHED '>' -> FINISHED
78 CLOSED ' ' -> CLOSED_SPACE
79 CLOSED '>' -> FINISHED
80 CLOSED_SPACE '>' -> FINISHED
81
82 CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
83 CLOSETAG '>' -> FINISHED
84 CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
85
86 */
SplitTag(ParserMessageList & rErrorList)87 void TokenInfo::SplitTag( ParserMessageList &rErrorList )
88 {
89 sal_uInt16 nLastPos = 2; // skip initial \<
90 sal_uInt16 nCheckPos = nLastPos;
91 String aDelims( String::CreateFromAscii( " \\=>/" ) );
92 String aPortion;
93 String aValue; // store the value of a property
94 ByteString aName; // store the name of a property/tag
95 sal_Bool bCheckName = sal_False;
96 sal_Bool bCheckEmpty = sal_False;
97 sal_Unicode cDelim;
98 tagcheck aState = TC_START;
99
100 // skip blanks
101 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
102 nLastPos++;
103
104 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
105 while ( nCheckPos != STRING_NOTFOUND && !( aState == TC_FINISHED || aState == TC_ERROR ) )
106 {
107 aPortion = aTokenString.Copy( nLastPos, nCheckPos-nLastPos );
108
109 if ( aTokenString.GetChar( nCheckPos ) == '\\' )
110 nCheckPos++;
111
112 cDelim = aTokenString.GetChar( nCheckPos );
113 nCheckPos++;
114
115 switch ( aState )
116 {
117 // START ' ' -> HAS_TAG_NAME
118 // START '/' -> CLOSED
119 // START '>' -> FINISHED
120 case TC_START:
121 aTagName = aPortion;
122 switch ( cDelim )
123 {
124 case ' ': aState = TC_HAS_TAG_NAME;
125 bCheckName = sal_True;
126 break;
127 case '/':
128 {
129 if ( aPortion.Len() == 0 )
130 {
131 aState = TC_CLOSETAG;
132 }
133 else
134 {
135 aState = TC_CLOSED;
136 bCheckName = sal_True;
137 }
138 }
139 break;
140 case '>': aState = TC_FINISHED;
141 bCheckName = sal_True;
142 break;
143 default: aState = TC_ERROR;
144 }
145 break;
146
147 // HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
148 // HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
149 // HAS_TAG_NAME '/' -> CLOSED
150 // HAS_TAG_NAME '>' -> FINISHED
151 case TC_HAS_TAG_NAME:
152 switch ( cDelim )
153 {
154 case '=': aState = TC_HAS_PROP_NAME_EQ;
155 bCheckName = sal_True;
156 break;
157 case ' ': aState = TC_HAS_PROP_NAME_SP;
158 bCheckName = sal_True;
159 break;
160 case '/': aState = TC_CLOSED;
161 bCheckEmpty = sal_True;
162 break;
163 case '>': aState = TC_FINISHED;
164 bCheckEmpty = sal_True;
165 break;
166 default: aState = TC_ERROR;
167 }
168 break;
169
170 // HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
171 case TC_HAS_PROP_NAME_SP:
172 switch ( cDelim )
173 {
174 case '=': aState = TC_HAS_PROP_NAME_EQ;
175 bCheckEmpty = sal_True;
176 break;
177 default: aState = TC_ERROR;
178 }
179 break;
180
181 // HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
182 // HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
183 case TC_HAS_PROP_NAME_EQ:
184 switch ( cDelim )
185 {
186 case ' ': aState = TC_HAS_PROP_NAME_EQ_SP;
187 bCheckEmpty = sal_True;
188 break;
189 case '\"': aState = TC_INSIDE_STRING;
190 bCheckEmpty = sal_True;
191 aValue.Erase();
192 break;
193 default: aState = TC_ERROR;
194 }
195 break;
196
197 // HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
198 case TC_HAS_PROP_NAME_EQ_SP:
199 switch ( cDelim )
200 {
201 case '\"': aState = TC_INSIDE_STRING;
202 bCheckEmpty = sal_True;
203 aValue.Erase();
204 break;
205 default: aState = TC_ERROR;
206 }
207 break;
208
209 // INSIDE_STRING * -> INSIDE_STRING
210 // INSIDE_STRING '"' -> PROP_FINISHED
211 case TC_INSIDE_STRING:
212 switch ( cDelim )
213 {
214 case '\"':
215 {
216 aState = TC_PROP_FINISHED;
217 aValue += aPortion;
218 if ( aProperties.find( aName ) == aProperties.end() )
219 {
220 if ( !IsPropertyValueValid( aName, aValue ) )
221 {
222 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' has invalid value '").Append(ByteString( aValue, RTL_TEXTENCODING_UTF8 )).Append("' "), *this );
223 bIsBroken = sal_True;
224 }
225 aProperties[ aName ] = aValue;
226 }
227 else
228 {
229 rErrorList.AddError( 25, ByteString("Property '").Append(aName).Append("' defined twice "), *this );
230 bIsBroken = sal_True;
231 }
232 }
233 break;
234 default:
235 {
236 aState = TC_INSIDE_STRING;
237 aValue += aPortion;
238 aValue += cDelim;
239 }
240 }
241 break;
242
243 // PROP_FINISHED ' ' -> HAS_TAG_NAME
244 // PROP_FINISHED '/' -> CLOSED
245 // PROP_FINISHED '>' -> FINISHED
246 case TC_PROP_FINISHED:
247 switch ( cDelim )
248 {
249 case ' ': aState = TC_HAS_TAG_NAME;
250 bCheckEmpty = sal_True;
251 break;
252 case '/': aState = TC_CLOSED;
253 bCheckEmpty = sal_True;
254 break;
255 case '>': aState = TC_FINISHED;
256 bCheckEmpty = sal_True;
257 break;
258 default: aState = TC_ERROR;
259 }
260 break;
261
262 // CLOSED ' ' -> CLOSED_SPACE
263 // CLOSED '>' -> FINISHED
264 case TC_CLOSED:
265 switch ( cDelim )
266 {
267 case ' ': aState = TC_CLOSED_SPACE;
268 bCheckEmpty = sal_True;
269 bClosed = sal_True;
270 break;
271 case '>': aState = TC_FINISHED;
272 bCheckEmpty = sal_True;
273 break;
274 default: aState = TC_ERROR;
275 }
276 break;
277
278 // CLOSED_SPACE '>' -> FINISHED
279 case TC_CLOSED_SPACE:
280 switch ( cDelim )
281 {
282 case '>': aState = TC_FINISHED;
283 bCheckEmpty = sal_True;
284 break;
285 default: aState = TC_ERROR;
286 }
287 break;
288
289 // CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
290 // CLOSETAG '>' -> FINISHED
291 case TC_CLOSETAG:
292 bCloseTag = sal_True;
293 switch ( cDelim )
294 {
295 case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME;
296 aTagName = aPortion;
297 bCheckName = sal_True;
298 break;
299 case '>': aState = TC_FINISHED;
300 aTagName = aPortion;
301 bCheckName = sal_True;
302 break;
303 default: aState = TC_ERROR;
304 }
305 break;
306
307 // CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
308 case TC_CLOSETAG_HAS_TAG_NAME:
309 switch ( cDelim )
310 {
311 case '>': aState = TC_FINISHED;
312 bCheckEmpty = sal_True;
313 break;
314 default: aState = TC_ERROR;
315 }
316 break;
317
318
319 default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this );
320 bIsBroken = sal_True;
321
322 }
323
324 if ( bCheckName )
325 {
326 if ( aPortion.Len() == 0 )
327 {
328 rErrorList.AddError( 25, "Tag/Property name missing ", *this );
329 bIsBroken = sal_True;
330 }
331 else
332 {
333 aName = ByteString( aPortion, RTL_TEXTENCODING_UTF8 );
334 // "a-zA-Z_-.0-9"
335 xub_StrLen nCount;
336 sal_Bool bBroken = sal_False;
337 const sal_Char* aBuf = aName.GetBuffer();
338 for ( nCount = 0 ; !bBroken && nCount < aName.Len() ; nCount++ )
339 {
340 bBroken = ! ( ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' )
341 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' )
342 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' )
343 ||( aBuf[nCount] == '_' )
344 ||( aBuf[nCount] == '-' )
345 ||( aBuf[nCount] == '.' )
346 );
347 }
348
349 if ( bBroken )
350 {
351 rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this );
352 bIsBroken = sal_True;
353 }
354 }
355
356 bCheckName = sal_False;
357 }
358
359 if ( bCheckEmpty )
360 {
361 if ( aPortion.Len() )
362 {
363 rErrorList.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion, RTL_TEXTENCODING_UTF8 )).Append("' in Tag "), *this );
364 bIsBroken = sal_True;
365 }
366 bCheckEmpty = sal_False;
367 }
368
369
370 nLastPos = nCheckPos;
371
372 // skip further blanks
373 if ( cDelim == ' ' && aState != TC_INSIDE_STRING )
374 while ( nLastPos < aTokenString.Len() && aTokenString.GetChar( nLastPos ) == ' ')
375 nLastPos++;
376
377 nCheckPos = aTokenString.SearchChar( aDelims.GetBuffer(), nLastPos );
378 }
379 if ( aState != TC_FINISHED )
380 {
381 rErrorList.AddError( 25, "Parsing error in Tag ", *this );
382 bIsBroken = sal_True;
383 }
384 }
385
IsPropertyRelevant(const ByteString & aName,const String & aValue) const386 sal_Bool TokenInfo::IsPropertyRelevant( const ByteString &aName, const String &aValue ) const
387 {
388 if ( aTagName.EqualsAscii( "alt" ) && aName.Equals( "xml-lang" ) )
389 return sal_False;
390 if ( aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "visibility" ) && aValue.EqualsAscii("visible") )
391 return sal_False;
392 if ( aTagName.EqualsAscii( "image" ) && (aName.Equals( "width" ) || aName.Equals( "height" )) )
393 return sal_False;
394
395 return sal_True;
396 }
397
IsPropertyValueValid(const ByteString & aName,const String & aValue) const398 sal_Bool TokenInfo::IsPropertyValueValid( const ByteString &aName, const String &aValue ) const
399 {
400 /* removed due to i56740
401 if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) )
402 {
403 return aValue.EqualsAscii("sys") ||
404 aValue.EqualsAscii("appl") ||
405 aValue.EqualsAscii("distrib");
406 } */
407 if ( aTagName.EqualsAscii( "caseinline" ) && aName.Equals( "select" ) )
408 {
409 return /*!aValue.EqualsAscii("OS2") && removed due to i56740 */
410 !aValue.EqualsAscii("");
411 }
412
413 // we don't know any better so we assume it to be OK
414 return sal_True;
415 }
416
IsPropertyInvariant(const ByteString & aName,const String & aValue) const417 sal_Bool TokenInfo::IsPropertyInvariant( const ByteString &aName, const String &aValue ) const
418 {
419 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "name" ) )
420 return sal_False;
421 if ( aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ) )
422 { // check for external reference
423 if ( aValue.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" )
424 || aValue.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" )
425 || aValue.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) )
426 return sal_False;
427 else
428 return sal_True;
429 }
430 return sal_True;
431 }
432
IsPropertyFixable(const ByteString & aName) const433 sal_Bool TokenInfo::IsPropertyFixable( const ByteString &aName ) const
434 {
435 // name everything that is allowed to be fixed automatically here
436 if ( (aTagName.EqualsAscii( "ahelp" ) && aName.Equals( "hid" ))
437 || (aTagName.EqualsAscii( "link" ) && aName.Equals( "href" ))
438 || (aTagName.EqualsAscii( "alt" ) && aName.Equals( "id" ))
439 || (aTagName.EqualsAscii( "variable" ) && aName.Equals( "id" ))
440 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "src" ))
441 || (aTagName.EqualsAscii( "image" ) && aName.Equals( "id" ) ))
442 return sal_True;
443 return sal_False;
444 }
445
MatchesTranslation(TokenInfo & rInfo,sal_Bool bGenErrors,ParserMessageList & rErrorList,sal_Bool bFixTags) const446 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const
447 {
448 // check if tags are equal
449 // check if all existing properties are in the translation as well and
450 // whether they have a matching content (the same in most cases)
451
452 if ( nId != rInfo.nId )
453 return sal_False;
454
455 if ( !aTagName.Equals( rInfo.aTagName ) )
456 return sal_False;
457
458 // If one of the tags has formatting errors already it does make no sense to check here, so return right away
459 if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) )
460 return sal_True;
461
462 StringHashMap::const_iterator iProp;
463 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
464 {
465 if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() )
466 {
467 if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) )
468 {
469 if ( IsPropertyInvariant( iProp->first, iProp->second ) )
470 {
471 if ( !rInfo.aProperties.find( iProp->first )->second.Equals( iProp->second ) )
472 {
473 if ( bGenErrors )
474 {
475 if ( bFixTags && IsPropertyFixable( iProp->first ) )
476 {
477 rInfo.aProperties.find( iProp->first )->second = iProp->second;
478 rInfo.SetHasBeenFixed();
479 rErrorList.AddWarning( 25, ByteString("Property '").Append(iProp->first).Append("': FIXED different value in Translation "), *this );
480 }
481 else
482 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("': value different in Translation "), *this );
483 }
484 else return sal_False;
485 }
486 }
487 }
488 }
489 else
490 {
491 if ( IsPropertyRelevant( iProp->first, iProp->second ) )
492 {
493 if ( bGenErrors )
494 rErrorList.AddError( 25, ByteString("Property '").Append(iProp->first).Append("' missing in Translation "), *this );
495 else return sal_False;
496 }
497 }
498 }
499 for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp )
500 {
501 if ( aProperties.find( iProp->first ) == aProperties.end() )
502 {
503 if ( IsPropertyRelevant( iProp->first, iProp->second ) )
504 {
505 if ( bGenErrors )
506 rErrorList.AddError( 25, ByteString("Extra Property '").Append(iProp->first).Append("' in Translation "), rInfo );
507 else return sal_False;
508 }
509 }
510 }
511
512 // if we reach here either
513 // the tags match completely or
514 // the tags match but not the properties and we generated errors for that
515 return sal_True;
516 }
517
GetTagName() const518 String TokenInfo::GetTagName() const
519 {
520 return aTagName;
521 }
522
MakeTag() const523 String TokenInfo::MakeTag() const
524 {
525 String aRet;
526 aRet.AppendAscii("\\<");
527 if ( bCloseTag )
528 aRet.AppendAscii("/");
529 aRet.Append( GetTagName() );
530 StringHashMap::const_iterator iProp;
531
532 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
533 {
534 aRet.AppendAscii(" ");
535 aRet.Append( String( iProp->first, RTL_TEXTENCODING_UTF8 ) );
536 aRet.AppendAscii("=\\\"");
537 aRet.Append( iProp->second );
538 aRet.AppendAscii("\\\"");
539 }
540 if ( bClosed )
541 aRet.AppendAscii("/");
542 aRet.AppendAscii("\\>");
543 return aRet;
544 }
545
546
AddError(sal_uInt16 nErrorNr,ByteString aErrorText,const TokenInfo & rTag)547 void ParserMessageList::AddError( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
548 {
549 Insert( new ParserError( nErrorNr, aErrorText, rTag ), LIST_APPEND );
550 }
551
AddWarning(sal_uInt16 nErrorNr,ByteString aErrorText,const TokenInfo & rTag)552 void ParserMessageList::AddWarning( sal_uInt16 nErrorNr, ByteString aErrorText, const TokenInfo &rTag )
553 {
554 Insert( new ParserWarning( nErrorNr, aErrorText, rTag ), LIST_APPEND );
555 }
556
HasErrors()557 sal_Bool ParserMessageList::HasErrors()
558 {
559 sal_uInt16 i;
560 for ( i=0 ; i < Count() ; i++ )
561 if ( GetObject( i )->IsError() )
562 return sal_True;
563 return sal_False;
564 }
565
566 struct Tag
567 {
GetNameTag568 String GetName() const { return String::CreateFromAscii( pName ); };
569 const char* pName;
570 TokenId nTag;
571 };
572
573
574 static const Tag aKnownTags[] =
575 {
576 /* commenting oldstyle tags
577 // { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
578 { "<#BOLD>", TAG_BOLDON },
579 { "<#/BOLD>", TAG_BOLDOFF },
580 { "<#ITALIC>", TAG_ITALICON },
581 { "<#/ITALIC>", TAG_ITALICOFF },
582 { "<#UNDER>", TAG_UNDERLINEON },
583 { "<#/UNDER>", TAG_UNDERLINEOFF },
584
585 // { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
586 { "<#HELPID>", TAG_HELPID },
587 { "<#MODIFY>", TAG_MODIFY },
588 { "<#REFNR>", TAG_REFNR },
589
590 // { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
591 { "<#NAME>", TAG_NAME },
592 { "<#HREF>", TAG_HREF },
593 { "<#AVIS>", TAG_AVIS },
594 { "<#AHID>", TAG_AHID },
595 { "<#AEND>", TAG_AEND },
596
597 { "<#TITEL>", TAG_TITEL },
598 { "<#KEY>", TAG_KEY },
599 { "<#INDEX>", TAG_INDEX },
600
601 { "<#REFSTART>", TAG_REFSTART },
602
603 { "<#GRAPHIC>", TAG_GRAPHIC },
604 { "<#NEXTVERSION>", TAG_NEXTVERSION },
605
606 // { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
607 { "<#WIN>", TAG_WIN },
608 { "<#UNIX>", TAG_UNIX },
609 { "<#MAC>", TAG_MAC },
610 { "<#OS2>", TAG_OS2 },
611
612 // { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
613 { "<#WRITER>", TAG_WRITER },
614 { "<#CALC>", TAG_CALC },
615 { "<#DRAW>", TAG_DRAW },
616 { "<#IMPRESS>", TAG_IMPRESS },
617 { "<#SCHEDULE>", TAG_SCHEDULE },
618 { "<#IMAGE>", TAG_IMAGE },
619 { "<#MATH>", TAG_MATH },
620 { "<#CHART>", TAG_CHART },
621 { "<#OFFICE>", TAG_OFFICE },
622 */
623 // { "<#TAG_GROUP_META>", TAG_GROUP_META },
624 { "$[officefullname]", TAG_OFFICEFULLNAME },
625 { "$[officename]", TAG_OFFICENAME },
626 { "$[officepath]", TAG_OFFICEPATH },
627 { "$[officeversion]", TAG_OFFICEVERSION },
628 { "$[portalname]", TAG_PORTALNAME },
629 { "$[portalfullname]", TAG_PORTALFULLNAME },
630 { "$[portalpath]", TAG_PORTALPATH },
631 { "$[portalversion]", TAG_PORTALVERSION },
632 { "$[portalshortname]", TAG_PORTALSHORTNAME },
633 /* commenting oldstyle tags
634 // { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
635 { "<#REFINSERT>", TAG_REFINSERT },
636
637 // { "<#GROUP_MULTI>", TAG_GROUP_MULTI },
638 { "<#END>", TAG_END },
639 { "<#ELSE>", TAG_ELSE },
640 { "<#VERSIONEND>", TAG_VERSIONEND },
641 { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
642 { "<Common Tag>", TAG_COMMONSTART },
643 { "</Common Tag>", TAG_COMMONEND },
644
645 { "<no more tags>", TAG_NOMORETAGS },
646 { "", TAG_UNKNOWN_TAG },
647 };
648
649
SimpleParser()650 SimpleParser::SimpleParser()
651 : nPos( 0 )
652 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS )
653 {
654 }
655
Parse(String PaSource)656 void SimpleParser::Parse( String PaSource )
657 {
658 aSource = PaSource;
659 nPos = 0;
660 aLastToken.Erase();
661 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
662 aTokenList.Clear();
663 };
664
GetNextToken(ParserMessageList & rErrorList)665 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList )
666 {
667 TokenInfo aResult;
668 sal_uInt16 nTokenStartPos = 0;
669 if ( aNextTag.nId != TAG_NOMORETAGS )
670 {
671 aResult = aNextTag;
672 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
673 }
674 else
675 {
676 aLastToken = GetNextTokenString( rErrorList, nTokenStartPos );
677 if ( aLastToken.Len() == 0 )
678 return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
679
680 // do we have a \< ... \> style tag?
681 if ( aLastToken.Copy(0,2).EqualsAscii( "\\<" ) )
682 {
683 // check for paired \" \"
684 bool bEven = true;
685 sal_uInt16 nQuotePos = 0;
686 sal_uInt16 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"" );
687 sal_uInt16 nQuotedBackPos = aLastToken.SearchAscii( "\\\\" ); // this is only to kick out quoted backslashes
688 while ( nQuotedQuotesPos != STRING_NOTFOUND )
689 {
690 if ( nQuotedBackPos <= nQuotedQuotesPos )
691 nQuotePos = nQuotedBackPos+2;
692 else
693 {
694 nQuotePos = nQuotedQuotesPos+2;
695 bEven = !bEven;
696 }
697 nQuotedQuotesPos = aLastToken.SearchAscii( "\\\"", nQuotePos );
698 nQuotedBackPos = aLastToken.SearchAscii( "\\\\", nQuotePos ); // this is only to kick out quoted backslashes
699 }
700 if ( !bEven )
701 {
702 rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) );
703 }
704
705 // check if we have an end-tag or a start-tag
706 sal_uInt16 nNonBlankStartPos,nNonBlankEndPos;
707 nNonBlankStartPos = 2;
708 while ( aLastToken.GetChar(nNonBlankStartPos) == ' ' )
709 nNonBlankStartPos++;
710 if ( aLastToken.GetChar(nNonBlankStartPos) == '/' )
711 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList );
712 else
713 {
714 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList );
715 nNonBlankEndPos = aLastToken.Len() -3;
716 while ( aLastToken.GetChar(nNonBlankEndPos) == ' ' )
717 nNonBlankEndPos--;
718 if ( aLastToken.GetChar( nNonBlankEndPos ) == '/' )
719 aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, String::CreateFromAscii("\\</").Append(aResult.GetTagName()).AppendAscii("\\>"), rErrorList );
720 }
721 }
722 else
723 {
724 sal_uInt16 i = 0;
725 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
726 aLastToken != aKnownTags[i].GetName() )
727 i++;
728 aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos );
729 }
730 }
731
732 if ( aResult.nId == TAG_UNKNOWN_TAG )
733 aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken );
734 aTokenList.Insert( aResult, LIST_APPEND );
735 return aResult;
736 }
737
GetNextTokenString(ParserMessageList & rErrorList,sal_uInt16 & rTagStartPos)738 String SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_uInt16 &rTagStartPos )
739 {
740 // sal_uInt16 nStyle1StartPos = aSource.SearchAscii( "<#", nPos );
741 sal_uInt16 nStyle2StartPos = aSource.SearchAscii( "$[", nPos );
742 sal_uInt16 nStyle3StartPos = aSource.SearchAscii( "\\<", nPos );
743 sal_uInt16 nStyle4StartPos = aSource.SearchAscii( "\\\\", nPos ); // this is only to kick out quoted backslashes
744
745 rTagStartPos = 0;
746
747 /* removing since a \<... is not likely
748 // check if the tag starts with a letter to avoid things like <> <= ... >
749 while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) )
750 nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 );
751 */
752 if ( STRING_NOTFOUND == nStyle2StartPos && STRING_NOTFOUND == nStyle3StartPos )
753 return String(); // no more tokens
754
755 if ( nStyle4StartPos < nStyle2StartPos && nStyle4StartPos <= nStyle3StartPos ) // <= to make sure \\ is always handled first
756 { // Skip quoted Backslash
757 nPos = nStyle4StartPos +2;
758 return GetNextTokenString( rErrorList, rTagStartPos );
759 }
760
761 /* if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos ) // <= to make sure our special tags are recognized before all others
762 { // test for <# ... > style tokens
763 sal_uInt16 nEndPos = aSource.SearchAscii( ">", nStyle1StartPos );
764 if ( nEndPos == STRING_NOTFOUND )
765 { // Token is incomplete. Skip start and search for better ones
766 nPos = nStyle1StartPos +2;
767 return GetNextTokenString( rErrorList, rTagStartPos );
768 }
769 nPos = nEndPos;
770 rTagStartPos = nStyle1StartPos;
771 return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii();
772 }
773 else*/ if ( nStyle2StartPos < nStyle3StartPos )
774 { // test for $[ ... ] style tokens
775 sal_uInt16 nEndPos = aSource.SearchAscii( "]", nStyle2StartPos);
776 if ( nEndPos == STRING_NOTFOUND )
777 { // Token is incomplete. Skip start and search for better ones
778 nPos = nStyle2StartPos +2;
779 return GetNextTokenString( rErrorList, rTagStartPos );
780 }
781 nPos = nEndPos;
782 rTagStartPos = nStyle2StartPos;
783 return aSource.Copy( nStyle2StartPos, nEndPos-nStyle2StartPos +1 );
784 }
785 else
786 { // test for \< ... \> style tokens
787 sal_uInt16 nEndPos = aSource.SearchAscii( "\\>", nStyle3StartPos);
788 sal_uInt16 nQuotedBackPos = aSource.SearchAscii( "\\\\", nStyle3StartPos ); // this is only to kick out quoted backslashes
789 while ( nQuotedBackPos <= nEndPos && nQuotedBackPos != STRING_NOTFOUND )
790 {
791 nEndPos = aSource.SearchAscii( "\\>", nQuotedBackPos +2);
792 nQuotedBackPos = aSource.SearchAscii( "\\\\", nQuotedBackPos +2 ); // this is only to kick out quoted backslashes
793 }
794 if ( nEndPos == STRING_NOTFOUND )
795 { // Token is incomplete. Skip start and search for better ones
796 nPos = nStyle3StartPos +2;
797 ByteString sTmp( "Tag Start '\\<' without Tag End '\\>': " );
798 rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, aSource.Copy( nStyle3StartPos-10, 20 ) ) );
799 return GetNextTokenString( rErrorList, rTagStartPos );
800 }
801 // check for paired quoted " --> \"sometext\"
802
803 nPos = nEndPos;
804 rTagStartPos = nStyle3StartPos;
805 return aSource.Copy( nStyle3StartPos, nEndPos-nStyle3StartPos +2 );
806 }
807 }
808
GetLexem(TokenInfo const & aToken)809 String SimpleParser::GetLexem( TokenInfo const &aToken )
810 {
811 if ( aToken.aTokenString.Len() )
812 return aToken.aTokenString;
813 else
814 {
815 sal_uInt16 i = 0;
816 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
817 aKnownTags[i].nTag != aToken.nId )
818 i++;
819
820 return aKnownTags[i].GetName();
821 }
822 }
823
TokenParser()824 TokenParser::TokenParser()
825 : pErrorList( NULL )
826 {}
827
Parse(const String & aCode,ParserMessageList * pList)828 void TokenParser::Parse( const String &aCode, ParserMessageList* pList )
829 {
830 pErrorList = pList;
831
832 //Scanner initialisieren
833 aParser.Parse( aCode );
834
835 //erstes Symbol holen
836 aTag = aParser.GetNextToken( *pErrorList );
837
838 nPfCaseOptions = 0;
839 nAppCaseOptions = 0;
840 bPfCaseActive = sal_False;
841 bAppCaseActive = sal_False;
842
843 nActiveRefTypes = 0;
844
845 //Ausfuehren der Start-Produktion
846 Paragraph();
847
848 //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
849 //kein Fehler aufgetreten
850 //=> es wurde ein einleitendes Tag vergessen
851 if ( aTag.nId != TAG_NOMORETAGS )
852 {
853 switch ( aTag.nId )
854 {
855 case TAG_END:
856 {
857 ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag );
858 }
859 break;
860 case TAG_BOLDOFF:
861 {
862 ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag );
863 }
864 break;
865 case TAG_ITALICOFF:
866 {
867 ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag );
868 }
869 break;
870 case TAG_UNDERLINEOFF:
871 {
872 ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag );
873 }
874 break;
875 /* case TAG_MISSPARENTHESIS:
876 {
877 ParseError( 14, "missing closing parenthesis '>'", aTag );
878 }
879 break;*/
880 case TAG_AEND:
881 {
882 ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag );
883 }
884 break;
885 case TAG_ELSE:
886 {
887 ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag );
888 }
889 break;
890 case TAG_UNKNOWN_TAG:
891 {
892 ParseError( 6, "unknown Tag", aTag );
893 }
894 break;
895 default:
896 {
897 ParseError( 6, "unexpected Tag", aTag );
898 }
899 }
900 }
901 pErrorList = NULL;
902 }
903
Paragraph()904 void TokenParser::Paragraph()
905 {
906 switch ( aTag.nId )
907 {
908 case TAG_GRAPHIC:
909 case TAG_NEXTVERSION:
910 {
911 TagRef();
912 Paragraph();
913 }
914 break;
915 case TAG_AVIS:
916 case TAG_AHID:
917 {
918 TagRef();
919 Paragraph();
920 }
921 break;
922 case TAG_HELPID:
923 {
924 SimpleTag();
925 Paragraph();
926 }
927 break;
928 case TAG_OFFICEFULLNAME:
929 case TAG_OFFICENAME:
930 case TAG_OFFICEPATH:
931 case TAG_OFFICEVERSION:
932 case TAG_PORTALNAME:
933 case TAG_PORTALFULLNAME:
934 case TAG_PORTALPATH:
935 case TAG_PORTALVERSION:
936 case TAG_PORTALSHORTNAME:
937 {
938 SimpleTag();
939 Paragraph();
940 }
941 break;
942 case TAG_REFINSERT:
943 {
944 SimpleTag();
945 Paragraph();
946 }
947 break;
948 case TAG_BOLDON:
949 case TAG_ITALICON:
950 case TAG_UNDERLINEON:
951 case TAG_COMMONSTART:
952 {
953 TagPair();
954 Paragraph();
955 }
956 break;
957 case TAG_HREF:
958 case TAG_NAME:
959 case TAG_KEY:
960 case TAG_INDEX:
961 case TAG_TITEL:
962 case TAG_REFSTART:
963 {
964 TagRef();
965 Paragraph();
966 }
967 break;
968 case TAG_OS2:
969 case TAG_WIN:
970 case TAG_UNIX:
971 case TAG_MAC: //...
972 {
973 if ( ! bPfCaseActive )
974 {
975 //PfCases duerfen nicht verschachtelt sein:
976 bPfCaseActive = sal_True;
977 PfCase();
978
979 //So jetzt kann wieder ein PfCase kommen:
980 bPfCaseActive = sal_False;
981 Paragraph();
982 }
983 }
984 break;
985 case TAG_WRITER:
986 case TAG_CALC:
987 case TAG_DRAW:
988 case TAG_IMPRESS:
989 case TAG_SCHEDULE:
990 case TAG_IMAGE:
991 case TAG_MATH:
992 case TAG_CHART:
993 case TAG_OFFICE:
994 {
995 if ( !bAppCaseActive )
996 {
997 //AppCases duerfen nicht verschachtelt sein:
998 bAppCaseActive = sal_True;
999 AppCase();
1000
1001 //jetzt koennen wieder AppCases kommen:
1002 bAppCaseActive = sal_False;
1003 Paragraph();
1004 }
1005 }
1006 break;
1007
1008 //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1009 //nichts tun wg. epsilon-Prod.
1010 }
1011 }
1012
PfCase()1013 void TokenParser::PfCase()
1014 {
1015
1016 //Produktion:
1017 //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1018
1019 PfCaseBegin();
1020
1021 //Jetzt ist eine PfCase-Produktion aktiv:
1022 Paragraph();
1023 switch ( aTag.nId )
1024 {
1025 case TAG_ELSE:
1026 case TAG_END:
1027 {
1028 CaseEnd();
1029 }
1030 break;
1031 case TAG_OS2:
1032 case TAG_WIN:
1033 case TAG_UNIX:
1034 case TAG_MAC: //First (PfBegin)
1035 {
1036 PfCase();
1037 }
1038 break;
1039 default:
1040 ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag );
1041 }
1042 //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1043 nPfCaseOptions = 0;
1044 }
1045
PfCaseBegin()1046 void TokenParser::PfCaseBegin()
1047 {
1048 switch ( aTag.nId )
1049 {
1050 case TAG_OS2:
1051 case TAG_WIN:
1052 case TAG_UNIX:
1053 case TAG_MAC:
1054 {
1055 //Token darf noch nicht vorgekommen sein im
1056 //aktuellen Plattform-Case:
1057 if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1058 {
1059 SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) );
1060 match( aTag, aTag );
1061 }
1062 else {
1063 ParseError( 9, "Tag defined twice in the same platform-case", aTag );
1064 }
1065 }
1066 }
1067 }
1068
AppCase()1069 void TokenParser::AppCase()
1070 {
1071
1072 //Produktion:
1073 //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1074
1075
1076 AppCaseBegin();
1077
1078 Paragraph();
1079
1080 switch ( aTag.nId )
1081 {
1082 case TAG_ELSE:
1083 case TAG_END:
1084 {
1085 CaseEnd();
1086 }
1087 break;
1088 case TAG_WRITER:
1089 case TAG_DRAW:
1090 case TAG_CALC:
1091 case TAG_IMAGE:
1092 case TAG_MATH:
1093 case TAG_CHART:
1094 case TAG_OFFICE:
1095 case TAG_IMPRESS:
1096 case TAG_SCHEDULE: //First (AppBegin)
1097 {
1098 AppCase();
1099 }
1100 break;
1101 default:
1102 ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag );
1103 }
1104
1105 //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1106 nAppCaseOptions = 0;
1107 }
1108
AppCaseBegin()1109 void TokenParser::AppCaseBegin()
1110 {
1111 switch ( aTag.nId )
1112 {
1113 case TAG_WRITER:
1114 case TAG_DRAW:
1115 case TAG_CALC:
1116 case TAG_IMAGE:
1117 case TAG_MATH:
1118 case TAG_CHART:
1119 case TAG_OFFICE:
1120 case TAG_IMPRESS:
1121 case TAG_SCHEDULE:
1122 {
1123 //Token darf noch nicht vorgekommen sein im
1124 //aktuellen Plattform-Case:
1125 if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1126 {
1127 SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) );
1128 match( aTag, aTag );
1129 }
1130 else {
1131 ParseError( 13, "Tag defined twice in the same application-case.", aTag );
1132 }
1133 }
1134 }
1135 }
1136
CaseEnd()1137 void TokenParser::CaseEnd()
1138 {
1139 //Produktion:
1140 //CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1141
1142 switch ( aTag.nId )
1143 {
1144 case TAG_ELSE:
1145 {
1146 match( aTag, TAG_ELSE );
1147 Paragraph();
1148 match( aTag, TAG_END );
1149 }
1150 break;
1151 case TAG_END:
1152 {
1153 match( aTag, TAG_END );
1154 }
1155 break;
1156 default:
1157 ParseError( 2, "<#ELSE> or <#END> expected.", aTag );
1158 }
1159 }
1160
SimpleTag()1161 void TokenParser::SimpleTag()
1162 {
1163
1164 switch ( aTag.nId )
1165 {
1166 case TAG_HELPID:
1167 {
1168 match( aTag, TAG_HELPID );
1169 }
1170 break;
1171 case TAG_OFFICEFULLNAME:
1172 case TAG_OFFICENAME:
1173 case TAG_OFFICEPATH:
1174 case TAG_OFFICEVERSION:
1175 case TAG_PORTALNAME:
1176 case TAG_PORTALFULLNAME:
1177 case TAG_PORTALPATH:
1178 case TAG_PORTALVERSION:
1179 case TAG_PORTALSHORTNAME:
1180
1181 case TAG_REFINSERT:
1182 {
1183 match( aTag, aTag );
1184 }
1185 break;
1186 default:
1187 ParseError( 15, "[<#SimpleTag>] expected.", aTag );
1188 }
1189 }
1190
TagPair()1191 void TokenParser::TagPair()
1192 {
1193 switch ( aTag.nId )
1194 {
1195 case TAG_BOLDON:
1196 {
1197 match( aTag, TAG_BOLDON );
1198 Paragraph();
1199 match( aTag, TAG_BOLDOFF );
1200 }
1201 break;
1202 case TAG_ITALICON:
1203 {
1204 match( aTag, TAG_ITALICON );
1205 Paragraph();
1206 match( aTag, TAG_ITALICOFF );
1207 }
1208 break;
1209 case TAG_UNDERLINEON:
1210 {
1211 match( aTag, TAG_UNDERLINEON );
1212 Paragraph();
1213 match( aTag, TAG_UNDERLINEOFF );
1214 }
1215 break;
1216 case TAG_COMMONSTART:
1217 {
1218 // remember tag so we can give the original tag in case of an error
1219 TokenInfo aEndTag( aTag );
1220 aEndTag.nId = TAG_COMMONEND;
1221 match( aTag, TAG_COMMONSTART );
1222 Paragraph();
1223 match( aTag, aEndTag );
1224 }
1225 break;
1226 default:
1227 ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag );
1228 }
1229 }
1230
1231
TagRef()1232 void TokenParser::TagRef()
1233 {
1234 switch ( aTag.nId )
1235 {
1236 case TAG_GRAPHIC:
1237 case TAG_NEXTVERSION:
1238 {
1239 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1240 {
1241 TokenId aThisToken = aTag.nId;
1242 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1243 match( aTag, aTag );
1244 Paragraph();
1245 if ( aThisToken == TAG_GRAPHIC )
1246 match( aTag, TAG_ENDGRAPHIC );
1247 else
1248 match( aTag, TAG_VERSIONEND );
1249 // don't reset since allowed only once per paragraph
1250 // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1251 }
1252 else
1253 {
1254 ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag );
1255 }
1256 }
1257 break;
1258 case TAG_AVIS:
1259 case TAG_AHID:
1260 {
1261 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1262 {
1263 TokenId aThisToken = aTag.nId;
1264 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1265 match( aTag, aTag );
1266 Paragraph();
1267 match( aTag, TAG_AEND );
1268 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1269 }
1270 else
1271 {
1272 ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag );
1273 }
1274 }
1275 break;
1276 case TAG_HREF:
1277 case TAG_NAME:
1278 {
1279
1280 }
1281 // NOBREAK
1282 case TAG_KEY:
1283 case TAG_INDEX:
1284 case TAG_TITEL:
1285 case TAG_REFSTART:
1286 {
1287 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1288 {
1289 TokenId aThisToken = aTag.nId;
1290 match( aTag, aTag );
1291 if ( aThisToken != TAG_NAME )
1292 { // TAG_NAME has no TAG_END
1293 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1294 Paragraph();
1295 match( aTag, TAG_END );
1296 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1297 }
1298 }
1299 else
1300 {
1301 ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag );
1302 }
1303 }
1304 break;
1305 default:
1306 ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag );
1307 }
1308 }
1309
match(const TokenInfo & aCurrentToken,const TokenId & aExpectedToken)1310 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken )
1311 {
1312 return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) );
1313 }
1314
match(const TokenInfo & aCurrentToken,const TokenInfo & rExpectedToken)1315 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken )
1316 {
1317 TokenInfo aExpectedToken( rExpectedToken );
1318 if ( aCurrentToken.nId == aExpectedToken.nId )
1319 {
1320 if ( ( aCurrentToken.nId == TAG_COMMONEND
1321 && aCurrentToken.GetTagName().Equals( aExpectedToken.GetTagName() ) )
1322 || aCurrentToken.nId != TAG_COMMONEND )
1323 {
1324 aTag = aParser.GetNextToken( *pErrorList );
1325 return sal_True;
1326 }
1327 }
1328
1329 if ( aExpectedToken.nId == TAG_COMMONEND )
1330 {
1331 aExpectedToken.aTokenString.Insert( String::CreateFromAscii( "Close tag for " ), 0 );
1332 }
1333
1334 ByteString sTmp( "Expected Symbol" );
1335 if ( aCurrentToken.nId == TAG_NOMORETAGS )
1336 {
1337 ParseError( 7, sTmp, aExpectedToken );
1338 }
1339 else
1340 {
1341 sTmp += ": ";
1342 sTmp += ByteString( aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8 );
1343 sTmp += " near ";
1344 ParseError( 7, sTmp, aCurrentToken );
1345 }
1346 return sal_False;
1347 }
1348
ParseError(sal_uInt16 nErrNr,ByteString aErrMsg,const TokenInfo & rTag)1349 void TokenParser::ParseError( sal_uInt16 nErrNr, ByteString aErrMsg, const TokenInfo &rTag )
1350 {
1351 pErrorList->AddError( nErrNr, aErrMsg, rTag);
1352
1353 // Das Fehlerhafte Tag ueberspringen
1354 aTag = aParser.GetNextToken( *pErrorList );
1355 }
1356
1357
ParserMessage(sal_uInt16 PnErrorNr,ByteString PaErrorText,const TokenInfo & rTag)1358 ParserMessage::ParserMessage( sal_uInt16 PnErrorNr, ByteString PaErrorText, const TokenInfo &rTag )
1359 : nErrorNr( PnErrorNr )
1360 , aErrorText( PaErrorText )
1361 , nTagBegin( 0 )
1362 , nTagLength( 0 )
1363 {
1364 String aLexem( SimpleParser::GetLexem( rTag ) );
1365 aErrorText.Append(": ");
1366 aErrorText += ByteString( aLexem, RTL_TEXTENCODING_UTF8 );
1367 if ( rTag.nId == TAG_NOMORETAGS )
1368 aErrorText.Append(" at end of line ");
1369 else if ( rTag.nPos != TOK_INVALIDPOS )
1370 {
1371 aErrorText.Append(" at Position ");
1372 aErrorText.Append( ByteString::CreateFromInt32( rTag.nPos ) );
1373 }
1374 nTagBegin = rTag.nPos;
1375 nTagLength = aLexem.Len();
1376 }
1377
ParserError(sal_uInt16 ErrorNr,ByteString ErrorText,const TokenInfo & rTag)1378 ParserError::ParserError( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1379 : ParserMessage( ErrorNr, ErrorText, rTag )
1380 {}
1381
ParserWarning(sal_uInt16 ErrorNr,ByteString ErrorText,const TokenInfo & rTag)1382 ParserWarning::ParserWarning( sal_uInt16 ErrorNr, ByteString ErrorText, const TokenInfo &rTag )
1383 : ParserMessage( ErrorNr, ErrorText, rTag )
1384 {}
1385
IsTagMandatory(TokenInfo const & aToken,TokenId & aMetaTokens)1386 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens )
1387 {
1388 TokenId aTokenId = aToken.nId;
1389 TokenId aTokenGroup = TAG_GROUP( aTokenId );
1390 if ( TAG_GROUP_PROGSWITCH == aTokenGroup
1391 || TAG_REFINSERT == aTokenId
1392 || TAG_REFSTART == aTokenId
1393 || TAG_NAME == aTokenId
1394 || TAG_HREF == aTokenId
1395 || TAG_AVIS == aTokenId
1396 || TAG_AHID == aTokenId
1397 || TAG_GRAPHIC == aTokenId
1398 || TAG_NEXTVERSION == aTokenId
1399 || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) )
1400 {
1401 if ( TAG_GROUP_META == aTokenGroup )
1402 aMetaTokens |= aTokenId;
1403 return sal_True;
1404 }
1405 else if ( TAG_COMMONSTART == aTokenId
1406 || TAG_COMMONEND == aTokenId )
1407 {
1408 String aTagName = aToken.GetTagName();
1409 return !(aTagName.EqualsIgnoreCaseAscii( "comment" )
1410 || aTagName.EqualsIgnoreCaseAscii( "bookmark_value" )
1411 || aTagName.EqualsIgnoreCaseAscii( "emph" )
1412 || aTagName.EqualsIgnoreCaseAscii( "item" )
1413 || aTagName.EqualsIgnoreCaseAscii( "br" ) );
1414 }
1415 return sal_False;
1416 }
1417
CheckTags(TokenList & aReference,TokenList & aTestee,sal_Bool bFixTags)1418 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags )
1419 {
1420 sal_uLong i=0,j=0;
1421 // Clean old Warnings
1422 while ( aCompareWarningList.Count() )
1423 {
1424 delete aCompareWarningList.GetCurObject();
1425 aCompareWarningList.Remove();
1426 }
1427
1428 /* in xml tags, do not require the following tags
1429 comment
1430 bookmark_value
1431 emph
1432 item
1433 br
1434 */
1435
1436 // filter uninteresting Tags
1437 TokenId aMetaTokens = 0;
1438 for ( i=0 ; i < aReference.Count() ; i++ )
1439 {
1440 if ( !IsTagMandatory( aReference.GetObject( i ), aMetaTokens ) )
1441 aReference.GetObject( i ).SetDone();
1442 }
1443
1444 aMetaTokens = 0;
1445 for ( i=0 ; i < aTestee.Count() ; i++ )
1446 {
1447 if ( !IsTagMandatory( aTestee.GetObject( i ), aMetaTokens ) )
1448 aTestee.GetObject( i ).SetDone();
1449 }
1450
1451 // remove all matching tags
1452 for ( i=0 ; i < aReference.Count() ; i++ )
1453 {
1454 if ( aReference.GetObject( i ).IsDone() )
1455 continue;
1456
1457 sal_Bool bTagFound = sal_False;
1458 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1459 {
1460 if ( aTestee.GetObject( j ).IsDone() )
1461 continue;
1462
1463 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_False, aCompareWarningList ) )
1464 {
1465 aReference.GetObject( i ).SetDone();
1466 aTestee.GetObject( j ).SetDone();
1467 bTagFound = sal_True;
1468 }
1469 }
1470 }
1471
1472 sal_Bool bCanFix = sal_True;
1473
1474 if ( bFixTags )
1475 {
1476 // we fix only if its a really simple case
1477 sal_uInt16 nTagCount = 0;
1478 for ( i=0 ; i < aReference.Count() ; i++ )
1479 if ( !aReference.GetObject( i ).IsDone() )
1480 nTagCount++;
1481 if ( nTagCount > 1 )
1482 bCanFix = sal_False;
1483
1484 nTagCount = 0;
1485 for ( i=0 ; i < aTestee.Count() ; i++ )
1486 if ( !aTestee.GetObject( i ).IsDone() )
1487 nTagCount++;
1488 if ( nTagCount > 1 )
1489 bCanFix = sal_False;
1490 }
1491
1492 // generate errors for tags that have differing attributes
1493 for ( i=0 ; i < aReference.Count() ; i++ )
1494 {
1495 if ( aReference.GetObject( i ).IsDone() )
1496 continue;
1497
1498 sal_Bool bTagFound = sal_False;
1499 for ( j=0 ; j < aTestee.Count() && !bTagFound ; j++ )
1500 {
1501 if ( aTestee.GetObject( j ).IsDone() )
1502 continue;
1503
1504 if ( aReference.GetObject( i ).MatchesTranslation( aTestee.GetObject( j ), sal_True, aCompareWarningList, bCanFix && bFixTags ) )
1505 {
1506 aReference.GetObject( i ).SetDone();
1507 aTestee.GetObject( j ).SetDone();
1508 bTagFound = sal_True;
1509 }
1510 }
1511 }
1512
1513 // list remaining tags as errors
1514 for ( i=0 ; i < aReference.Count() ; i++ )
1515 {
1516 if ( aReference.GetObject( i ).IsDone() )
1517 continue;
1518
1519 aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference.GetObject( i ) );
1520 }
1521 for ( i=0 ; i < aTestee.Count() ; i++ )
1522 {
1523 if ( aTestee.GetObject( i ).IsDone() )
1524 continue;
1525
1526 aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee.GetObject( i ) );
1527 }
1528
1529 for ( i=0 ; i < aReference.Count() ; i++ )
1530 aReference.GetObject( i ).SetDone( sal_False );
1531
1532 for ( i=0 ; i < aTestee.Count() ; i++ )
1533 aTestee.GetObject( i ).SetDone( sal_False );
1534 }
1535
CheckReference(GSILine * aReference)1536 void LingTest::CheckReference( GSILine *aReference )
1537 {
1538 aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() );
1539 }
1540
CheckTestee(GSILine * aTestee,sal_Bool bHasSourceLine,sal_Bool bFixTags)1541 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags )
1542 {
1543 aFixedTestee = aTestee->GetUText();
1544 aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() );
1545
1546 if ( bHasSourceLine )
1547 CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags );
1548
1549 if ( bFixTags )
1550 {
1551 TokenList& aTesteeTokens = aTesteeParser.GetTokenList();
1552 sal_Bool bFixesDone = sal_False;
1553 // count backwards to allow replacing from right to left
1554 int i;
1555 for ( i=aTesteeTokens.Count()-1 ; i>=0 ; i-- )
1556 {
1557 if ( aTesteeTokens.GetObject( i ).HasBeenFixed() )
1558 {
1559 bFixesDone = sal_True;
1560 aFixedTestee.Replace( aTesteeTokens.GetObject( i ).nPos, aTesteeTokens.GetObject( i ).aTokenString.Len(), aTesteeTokens.GetObject( i ).MakeTag() );
1561 }
1562 }
1563 if ( bFixesDone )
1564 {
1565 aTestee->SetUText( aFixedTestee );
1566 aTestee->SetFixed();
1567 }
1568 }
1569 }
1570
1571
1572