xref: /trunk/main/l10ntools/source/gsicheck.cxx (revision 3cd96b95)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_l10ntools.hxx"
26 #include <stdio.h>
27 #include <tools/fsys.hxx>
28 #include <tools/stream.hxx>
29 #include <tools/list.hxx>
30 
31 // local includes
32 #include "tagtest.hxx"
33 #include "gsicheck.hxx"
34 
35 #define MAX_GID_LID_LEN 250
36 
37 /*****************************************************************************/
38 void PrintMessage( ByteString aType, ByteString aMsg, ByteString aPrefix,
39 	ByteString aContext, sal_Bool bPrintContext, sal_uLong nLine, ByteString aUniqueId = ByteString() )
40 /*****************************************************************************/
41 {
42 	fprintf( stdout, "%s %s, Line %lu", aType.GetBuffer(), aPrefix.GetBuffer(), nLine );
43 	if ( aUniqueId.Len() )
44 		fprintf( stdout, ", UniqueID %s", aUniqueId.GetBuffer() );
45 	fprintf( stdout, ": %s", aMsg.GetBuffer() );
46 
47 	if ( bPrintContext )
48 		fprintf( stdout, "  \"%s\"", aContext.GetBuffer() );
49 	fprintf( stdout, "\n" );
50 }
51 
52 /*****************************************************************************/
53 void PrintError( ByteString aMsg, ByteString aPrefix,
54 	ByteString aContext, sal_Bool bPrintContext, sal_uLong nLine, ByteString aUniqueId = ByteString() )
55 /*****************************************************************************/
56 {
57     PrintMessage( "Error:", aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
58 }
59 
60 sal_Bool LanguageOK( ByteString aLang )
61 {
62     if ( !aLang.Len() )
63         return sal_False;
64 
65     if ( aLang.IsNumericAscii() )
66         return sal_True;
67 
68     if ( aLang.GetTokenCount( '-' ) == 1 )
69         return aLang.IsAlphaAscii() && aLang.IsLowerAscii();
70     else if ( aLang.GetTokenCount( '-' ) == 2 )
71     {
72         ByteString aTok0( aLang.GetToken( 0, '-' ) );
73         ByteString aTok1( aLang.GetToken( 1, '-' ) );
74         return  aTok0.Len() && aTok0.IsAlphaAscii() && aTok0.IsLowerAscii()
75              && aTok1.Len() && aTok1.IsAlphaAscii() && aTok1.IsUpperAscii()
76              && !aTok1.EqualsIgnoreCaseAscii( aTok0 );
77     }
78 
79     return sal_False;
80 }
81 
82 
83 //
84 // class LazySvFileStream
85 //
86 
87 
88 class LazySvFileStream : public SvFileStream
89 {
90 
91 private:
92     String aFileName;
93     sal_Bool bOpened;
94     StreamMode eOpenMode;
95 
96 public:
97     LazySvFileStream()
98     : aFileName()
99     , bOpened( sal_False )
100     , eOpenMode( 0 )
101     {};
102 
103     void SetOpenParams( const String& rFileName, StreamMode eOpenModeP )
104     {
105         aFileName = rFileName;
106         eOpenMode = eOpenModeP;
107     };
108 
109     void LazyOpen();
110 };
111 
112 void LazySvFileStream::LazyOpen()
113 {
114     if ( !bOpened )
115     {
116         Open( aFileName, eOpenMode );
117 	    if ( !IsOpen())
118 	    {
119 		    fprintf( stderr, "\nERROR: Could not open Output-File %s!\n\n", ByteString( aFileName, RTL_TEXTENCODING_ASCII_US ).GetBuffer() );
120 		    exit ( 4 );
121 	    }
122         bOpened = sal_True;
123     }
124 }
125 
126 
127 //
128 // class GSILine
129 //
130 
131 /*****************************************************************************/
132 GSILine::GSILine( const ByteString &rLine, sal_uLong nLine )
133 /*****************************************************************************/
134 				: ByteString( rLine )
135 				, nLineNumber( nLine )
136 				, bOK( sal_True )
137                 , bFixed ( sal_False )
138 {
139     if ( rLine.GetTokenCount( '\t' ) == 15 )
140     {
141         aFormat = FORMAT_SDF;
142         aUniqId = rLine.GetToken( 0, '\t' );
143         aUniqId.Append("/").Append( rLine.GetToken( 1, '\t' ) ).Append("/").Append( rLine.GetToken( 3, '\t' ) ).Append("/").Append( rLine.GetToken( 4, '\t' ) ).Append("/").Append( rLine.GetToken( 5, '\t' ) ).Append("/").Append( rLine.GetToken( 6, '\t' ) ).Append("/").Append( rLine.GetToken( 7, '\t' ) );
144         aLineType = "";
145         aLangId = rLine.GetToken( 9, '\t' );
146         aText = rLine.GetToken( 10, '\t' );
147         aQuickHelpText = rLine.GetToken( 12, '\t' );
148         aTitle = rLine.GetToken( 13, '\t' );
149 
150         // do some more format checks here
151         if ( !rLine.GetToken( 8, '\t' ).IsNumericAscii() )
152         {
153 		    PrintError( "The length field does not contain a number!", "Line format", rLine.GetToken( 8, '\t' ), sal_True, GetLineNumber(), GetUniqId() );
154 		    NotOK();
155         }
156         if ( !LanguageOK( aLangId ) )
157         {
158 		    PrintError( "The Language is invalid!", "Line format", aLangId, sal_True, GetLineNumber(), GetUniqId() );
159 		    NotOK();
160         }
161         // limit GID and LID to MAX_GID_LID_LEN chars each for database conformity, see #137575#
162         if ( rLine.GetToken( 4, '\t' ).Len() > MAX_GID_LID_LEN || rLine.GetToken( 5, '\t' ).Len() > MAX_GID_LID_LEN )
163         {
164 			PrintError( ByteString("GID and LID may only be ").Append( ByteString::CreateFromInt32(MAX_GID_LID_LEN) ).Append( " chars long each!" ), "Line format", aLangId, sal_True, GetLineNumber(), GetUniqId() );
165 		    NotOK();
166         }
167     }
168     else    // allow tabs in gsi files
169     {
170         aFormat = FORMAT_GSI;
171         ByteString sTmp( rLine );
172         sal_uInt16 nPos = sTmp.Search( "($$)" );
173         sal_uInt16 nStart = 0;
174         if ( nPos != STRING_NOTFOUND )
175         {
176         	aUniqId = sTmp.Copy( nStart, nPos - nStart );
177             nStart = nPos + 4;  // + length of the delemiter
178             nPos = sTmp.Search( "($$)", nStart );
179         }
180         if ( nPos != STRING_NOTFOUND )
181         {
182         	aLineType = sTmp.Copy( nStart, nPos - nStart );
183             nStart = nPos + 4;  // + length of the delemiter
184             nPos = sTmp.Search( "($$)", nStart );
185             aUniqId.Append( "/" );
186             aUniqId.Append( aLineType );
187         }
188         if ( nPos != STRING_NOTFOUND )
189         {
190         	aLangId = sTmp.Copy( nStart, nPos - nStart );
191             nStart = nPos + 4;  // + length of the delemiter
192             nPos = sTmp.Search( "($$)", nStart );
193         }
194         if ( nPos != STRING_NOTFOUND )
195         {
196 //        	ByteString aStatus = sTmp.Copy( nStart, nPos - nStart );     // ext int ...
197             nStart = nPos + 4;  // + length of the delemiter
198         }
199         if ( nPos != STRING_NOTFOUND )
200         	aText = sTmp.Copy( nStart );
201         else
202             aFormat = FORMAT_UNKNOWN;
203     }
204 
205     if ( FORMAT_UNKNOWN == GetLineFormat() )
206         NotOK();
207 }
208 
209 /*****************************************************************************/
210 void GSILine::NotOK()
211 /*****************************************************************************/
212 {
213     bOK = sal_False;
214 }
215 
216 /*****************************************************************************/
217 void GSILine::ReassembleLine()
218 /*****************************************************************************/
219 {
220     ByteString aReassemble;
221     if ( GetLineFormat() == FORMAT_SDF )
222     {
223         sal_uInt16 i;
224         for ( i = 0 ; i < 10 ; i++ )
225         {
226             aReassemble.Append( GetToken( i, '\t' ) );
227             aReassemble.Append( "\t" );
228         }
229         aReassemble.Append( aText );
230         aReassemble.Append( "\t" );
231         aReassemble.Append( GetToken( 11, '\t' ) ); // should be empty but there are some places in sc. Not reflected to sources!!
232         aReassemble.Append( "\t" );
233         aReassemble.Append( aQuickHelpText );
234         aReassemble.Append( "\t" );
235         aReassemble.Append( aTitle );
236         for ( i = 14 ; i < 15 ; i++ )
237         {
238             aReassemble.Append( "\t" );
239             aReassemble.Append( GetToken( i, '\t' ) );
240         }
241         *(ByteString*)this = aReassemble;
242     }
243     else if ( GetLineFormat() == FORMAT_GSI )
244     {
245         sal_uInt16 nPos = Search( "($$)" );
246         sal_uInt16 nStart = 0;
247         if ( nPos != STRING_NOTFOUND )
248         {
249             nStart = nPos + 4;  // + length of the delemiter
250             nPos = Search( "($$)", nStart );
251         }
252         if ( nPos != STRING_NOTFOUND )
253         {
254             nStart = nPos + 4;  // + length of the delemiter
255             nPos = Search( "($$)", nStart );
256         }
257         if ( nPos != STRING_NOTFOUND )
258         {
259             nStart = nPos + 4;  // + length of the delemiter
260             nPos = Search( "($$)", nStart );
261         }
262         if ( nPos != STRING_NOTFOUND )
263         {
264             nStart = nPos + 4;  // + length of the delemiter
265         }
266         if ( nPos != STRING_NOTFOUND )
267         {
268             aReassemble = Copy( 0, nStart );
269             aReassemble += aText;
270             *(ByteString*)this = aReassemble;
271         }
272         else
273             PrintError( "Cannot reassemble GSI line (internal Error).", "Line format", "", sal_False, GetLineNumber(), GetUniqId() );
274     }
275     else
276         PrintError( "Cannot reassemble line of unknown type (internal Error).", "Line format", "", sal_False, GetLineNumber(), GetUniqId() );
277 }
278 
279 //
280 // class GSIBlock
281 //
282 /*****************************************************************************/
283 GSIBlock::GSIBlock( sal_Bool PbPrintContext, sal_Bool bSource, sal_Bool bTrans, sal_Bool bRef, sal_Bool bAllowKID, sal_Bool bAllowSusp )
284 /*****************************************************************************/
285             : pSourceLine( NULL )
286             , pReferenceLine( NULL )
287             , bPrintContext( PbPrintContext )
288             , bCheckSourceLang( bSource )
289             , bCheckTranslationLang( bTrans )
290             , bReference( bRef )
291             , bAllowKeyIDs( bAllowKID )
292             , bAllowSuspicious( bAllowSusp )
293             , bHasBlockError( sal_False )
294 {
295 }
296 
297 /*****************************************************************************/
298 GSIBlock::~GSIBlock()
299 /*****************************************************************************/
300 {
301 	delete pSourceLine;
302 	delete pReferenceLine;
303 
304 	for ( sal_uLong i = 0; i < Count(); i++ )
305 		delete ( GetObject( i ));
306 }
307 
308 /*****************************************************************************/
309 void GSIBlock::InsertLine( GSILine* pLine, ByteString aSourceLang)
310 /*****************************************************************************/
311 {
312 	if ( pLine->GetLanguageId().Equals( aSourceLang ) )
313     {
314         if ( pSourceLine )
315         {
316             PrintError( "Source Language entry double. Treating as Translation.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
317             bHasBlockError = sal_True;
318             pSourceLine->NotOK();
319             pLine->NotOK();
320         }
321         else
322         {
323 		    pSourceLine = pLine;
324             return;
325         }
326     }
327 	sal_uLong nPos = 0;
328 
329     if ( aSourceLang.Len() ) // only check blockstructure if source lang is given
330     {
331 		while ( nPos < Count() )
332         {
333             if ( GetObject( nPos )->GetLanguageId().Equals( pLine->GetLanguageId() ) )
334             {
335                 PrintError( "Translation Language entry double. Checking both.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
336                 bHasBlockError = sal_True;
337                 GetObject( nPos )->NotOK();
338                 pLine->NotOK();
339             }
340 			nPos++;
341         }
342     }
343 	Insert( pLine, LIST_APPEND );
344 }
345 
346 /*****************************************************************************/
347 void GSIBlock::SetReferenceLine( GSILine* pLine )
348 /*****************************************************************************/
349 {
350     pReferenceLine = pLine;
351 }
352 
353 /*****************************************************************************/
354 void GSIBlock::PrintMessage( ByteString aType, ByteString aMsg, ByteString aPrefix,
355 	ByteString aContext, sal_uLong nLine, ByteString aUniqueId )
356 /*****************************************************************************/
357 {
358     ::PrintMessage( aType, aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
359 }
360 
361 /*****************************************************************************/
362 void GSIBlock::PrintError( ByteString aMsg, ByteString aPrefix,
363 	ByteString aContext, sal_uLong nLine, ByteString aUniqueId )
364 /*****************************************************************************/
365 {
366     PrintMessage( "Error:", aMsg, aPrefix, aContext, nLine, aUniqueId );
367 }
368 
369 /*****************************************************************************/
370 void GSIBlock::PrintList( ParserMessageList *pList, ByteString aPrefix,
371 	GSILine *pLine )
372 /*****************************************************************************/
373 {
374 	sal_uLong i;
375 	for ( i = 0 ; i < pList->Count() ; i++ )
376 	{
377 		ParserMessage *pMsg = pList->GetObject( i );
378 		ByteString aContext;
379 		if ( bPrintContext )
380 		{
381 			if ( pMsg->GetTagBegin() == STRING_NOTFOUND )
382 				aContext = pLine->GetText().Copy( 0, 300 );
383 			else
384 				aContext = pLine->Copy( pMsg->GetTagBegin()-150, 300 );
385 			aContext.EraseTrailingChars(' ');
386 			aContext.EraseLeadingChars(' ');
387 		}
388 
389         PrintMessage( pMsg->Prefix(), pMsg->GetErrorText(), aPrefix, aContext, pLine->GetLineNumber(), pLine->GetUniqId() );
390 	}
391 }
392 
393 /*****************************************************************************/
394 sal_Bool GSIBlock::IsUTF8( const ByteString &aTestee, sal_Bool bFixTags, sal_uInt16 &nErrorPos, ByteString &aErrorMsg, sal_Bool &bHasBeenFixed, ByteString &aFixed ) const
395 /*****************************************************************************/
396 {
397     String aUTF8Tester( aTestee, RTL_TEXTENCODING_UTF8 );
398     if ( STRING_MATCH != (nErrorPos = ByteString( aUTF8Tester, RTL_TEXTENCODING_UTF8 ).Match( aTestee )) )
399     {
400         aUTF8Tester = String( aTestee.GetBuffer(), nErrorPos, RTL_TEXTENCODING_UTF8 );
401         nErrorPos = aUTF8Tester.Len();
402         aErrorMsg = ByteString( "UTF8 Encoding seems to be broken" );
403         return sal_False;
404     }
405 
406     nErrorPos = aUTF8Tester.SearchChar( String::CreateFromAscii( "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f"
407                 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" ).GetBuffer() );
408     if ( nErrorPos != STRING_NOTFOUND )
409     {
410         aErrorMsg = ByteString( "String contains illegal character" );
411         return sal_False;
412     }
413 
414     if ( bFixTags )
415     {
416         bHasBeenFixed = sal_False;
417         aFixed.Erase();
418     }
419 
420     if ( !bAllowKeyIDs )
421     {
422         sal_Bool bIsKeyID = sal_False;
423         sal_Bool bNewId = sal_False;
424         ByteString aID( aTestee );
425 		sal_uInt16 nAfterID = 0;
426 
427 		if ( aTestee.Equals( "{&", 0, 2 ) )
428         {   // check for strings from instset_native like "{&Tahoma8}335795.Installation Wiza ..."
429             sal_uInt16 nTagEnd = aTestee.Search( '}' );
430             if ( nTagEnd != STRING_NOTFOUND )
431             {
432                 if ( bFixTags )
433                     aFixed = aTestee.Copy( 0, nTagEnd+1 );
434                 nErrorPos = nTagEnd+1;
435                 aID = aTestee.Copy( nTagEnd+1 );
436 				nAfterID = nTagEnd+1;
437             }
438         }
439 
440 		ByteString aDelimiter( (String)String( sal_Unicode(0x2016) ), RTL_TEXTENCODING_UTF8 );
441 
442         if ( aID.Equals( aDelimiter, 6, aDelimiter.Len() ) )
443         {   // New KeyId     6 Letters, digits and spechial chars followed by delimiter
444             bNewId = sal_True;
445             nErrorPos = 1;
446             aID = aID.Copy( 0, 6 );
447 			nAfterID += 6;
448 			nAfterID = nAfterID + aDelimiter.Len();
449         }
450         else if ( ( aID.GetChar(6) == '*' ) && aID.Equals( aDelimiter, 7, aDelimiter.Len() ) )
451         {   // New KeyId     6 Letters, digits and spechial chars followed by '*delimiter' to indicate translation in progress
452             bNewId = sal_True;
453             nErrorPos = 1;
454             aID = aID.Copy( 0, 6 );
455 			nAfterID += 7;
456 			nAfterID = nAfterID + aDelimiter.Len();
457         }
458         else if ( aID.GetTokenCount( '.' ) > 1 )
459         {	// test for old KeyIDs       5 to 6 digits followed by a dot   '44373.'
460             bNewId = sal_False;
461             nErrorPos = 1;
462             aID = aID.GetToken( 0, '.' );
463 			nAfterID = nAfterID + aID.Len();
464         }
465 		else
466 		{
467 			aID.Erase();
468 		}
469 
470         if ( bNewId )
471             {
472                 if ( aID.Len() == 6 )
473                 {
474                     bIsKeyID = sal_True;
475                     ByteString aDigits("0123456789abcdefghijklmnopqrstuvwxyz+-<=>");
476                     for ( sal_uInt16 i=0 ; i < aID.Len() ;i++ )
477                     {
478                         if ( aDigits.Search( aID.GetChar(i) ) == STRING_NOTFOUND )
479                             bIsKeyID = sal_False;
480                     }
481                 }
482             }
483         else
484         {
485             if ( aID.Len() > 0 && aID.GetChar(aID.Len()-1) == '*' )
486                 aID.Erase( aID.Len()-1 );
487 
488             if ( aID.IsNumericAscii() && aID.Len() >= 5 )
489                 bIsKeyID = sal_True;
490         }
491 
492         if ( bIsKeyID )
493         {
494             aErrorMsg = ByteString( "String contains KeyID" );
495             if ( bFixTags )
496             {
497                 aFixed += aTestee.Copy( nAfterID );
498                 bHasBeenFixed = sal_True;
499                 aErrorMsg = ByteString( "FIXED String containing KeyID" );
500             }
501             else
502                 aErrorMsg = ByteString( "String contains KeyID" );
503             return sal_False;
504         }
505     }
506 
507     return sal_True;
508 }
509 
510 /*****************************************************************************/
511 sal_Bool GSIBlock::TestUTF8( GSILine* pTestee, sal_Bool bFixTags )
512 /*****************************************************************************/
513 {
514     sal_uInt16 nErrorPos = 0;
515     ByteString aErrorMsg;
516     sal_Bool bError = sal_False;
517     ByteString aFixed;
518     sal_Bool bHasBeenFixed = sal_False;
519     if ( !IsUTF8( pTestee->GetText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
520     {
521         ByteString aContext( pTestee->GetText().Copy( nErrorPos, 20 ) );
522         PrintError( aErrorMsg.Append(" in Text at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
523         bError = sal_True;
524         if ( bHasBeenFixed )
525         {
526             pTestee->SetText( aFixed );
527             pTestee->SetFixed();
528         }
529     }
530     if ( !IsUTF8( pTestee->GetQuickHelpText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
531     {
532         ByteString aContext( pTestee->GetQuickHelpText().Copy( nErrorPos, 20 ) );
533         PrintError( aErrorMsg.Append(" in QuickHelpText at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
534         bError = sal_True;
535         if ( bHasBeenFixed )
536         {
537             pTestee->SetQuickHelpText( aFixed );
538             pTestee->SetFixed();
539         }
540     }
541     if ( !IsUTF8( pTestee->GetTitle(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
542     {
543         ByteString aContext( pTestee->GetTitle().Copy( nErrorPos, 20 ) );
544         PrintError( aErrorMsg.Append(" in Title at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
545         bError = sal_True;
546         if ( bHasBeenFixed )
547         {
548             pTestee->SetTitle( aFixed );
549             pTestee->SetFixed();
550         }
551     }
552     if ( bError )
553         pTestee->NotOK();
554     return !bError;
555 }
556 
557 
558 /*****************************************************************************/
559 sal_Bool GSIBlock::HasSuspiciousChars( GSILine* pTestee, GSILine* pSource )
560 /*****************************************************************************/
561 {
562     sal_uInt16 nPos = 0;
563     if ( !bAllowSuspicious && ( nPos = pTestee->GetText().Search("??")) != STRING_NOTFOUND )
564         if ( pSource->GetText().Search("??") == STRING_NOTFOUND )
565         {
566             String aUTF8Tester = String( pTestee->GetText(), 0, nPos, RTL_TEXTENCODING_UTF8 );
567             sal_uInt16 nErrorPos = aUTF8Tester.Len();
568             ByteString aContext( pTestee->GetText().Copy( nPos, 20 ) );
569             PrintError( ByteString("Found double questionmark in translation only. Looks like an encoding problem at Position " ).Append( ByteString::CreateFromInt32( nErrorPos ) ), "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId() );
570             pTestee->NotOK();
571             return sal_True;
572         }
573 
574     return sal_False;
575 }
576 
577 
578 /*****************************************************************************/
579 sal_Bool GSIBlock::CheckSyntax( sal_uLong nLine, sal_Bool bRequireSourceLine, sal_Bool bFixTags )
580 /*****************************************************************************/
581 {
582 	static LingTest aTester;
583     sal_Bool bHasError = sal_False;
584 
585 	if ( !pSourceLine )
586 	{
587         if ( bRequireSourceLine )
588         {
589     		PrintError( "No source language entry defined!", "File format", "", nLine );
590             bHasBlockError = sal_True;
591         }
592 	}
593 	else
594 	{
595         aTester.CheckReference( pSourceLine );
596         if ( pSourceLine->HasMessages() )
597 		{
598 			PrintList( pSourceLine->GetMessageList(), "ReferenceString", pSourceLine );
599 			pSourceLine->NotOK();
600             bHasError = sal_True;
601 		}
602 	}
603     if ( bReference )
604     {
605         if ( !pReferenceLine )
606         {
607             GSILine *pSource;
608             if ( pSourceLine )
609                 pSource = pSourceLine;
610             else
611                 pSource = GetObject( 0 );   // get some other line
612             if ( pSource )
613                 PrintError( "No reference line found. Entry is new in source file", "File format", "", pSource->GetLineNumber(), pSource->GetUniqId() );
614             else
615                 PrintError( "No reference line found. Entry is new in source file", "File format", "", nLine );
616             bHasBlockError = sal_True;
617 	    }
618 	    else
619 	    {
620 		    if ( pSourceLine && !pSourceLine->Equals( *pReferenceLine ) )
621 		    {
622                 xub_StrLen nPos = pSourceLine->Match( *pReferenceLine );
623                 ByteString aContext( pReferenceLine->Copy( nPos - 5, 15) );
624                 aContext.Append( "\" --> \"" ).Append( pSourceLine->Copy( nPos - 5, 15) );
625                 PrintError( "Source Language Entry has changed.", "File format", aContext, pSourceLine->GetLineNumber(), pSourceLine->GetUniqId() );
626 			    pSourceLine->NotOK();
627                 bHasError = sal_True;
628 		    }
629 	    }
630     }
631 
632     if ( pSourceLine )
633         bHasError |= !TestUTF8( pSourceLine, bFixTags );
634 
635 	sal_uLong i;
636 	for ( i = 0; i < Count(); i++ )
637 	{
638 		aTester.CheckTestee( GetObject( i ), pSourceLine != NULL, bFixTags );
639 		if ( GetObject( i )->HasMessages() || aTester.HasCompareWarnings() )
640         {
641             if ( GetObject( i )->HasMessages() || aTester.GetCompareWarnings().HasErrors() )
642 			    GetObject( i )->NotOK();
643             bHasError = sal_True;
644 			PrintList( GetObject( i )->GetMessageList(), "Translation", GetObject( i ) );
645 			PrintList( &(aTester.GetCompareWarnings()), "Translation Tag Missmatch", GetObject( i ) );
646 		}
647         bHasError |= !TestUTF8( GetObject( i ), bFixTags );
648         if ( pSourceLine )
649             bHasError |= HasSuspiciousChars( GetObject( i ), pSourceLine );
650 	}
651 
652 	return bHasError || bHasBlockError;
653 }
654 
655 void GSIBlock::WriteError( LazySvFileStream &aErrOut, sal_Bool bRequireSourceLine  )
656 {
657     if ( pSourceLine && pSourceLine->IsOK() && bCheckSourceLang && !bHasBlockError )
658         return;
659 
660 	sal_Bool bHasError = sal_False;
661 	sal_Bool bCopyAll = ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) || bHasBlockError;
662 	sal_uLong i;
663 	for ( i = 0; i < Count(); i++ )
664 	{
665 		if ( !GetObject( i )->IsOK() || bCopyAll )
666 		{
667 			bHasError = sal_True;
668             aErrOut.LazyOpen();
669 			aErrOut.WriteLine( *GetObject( i ) );
670 		}
671 	}
672 
673 	if ( pSourceLine && ( bHasError || !pSourceLine->IsOK() ) && !( !bHasError && bCheckTranslationLang ) )
674     {
675         aErrOut.LazyOpen();
676 		aErrOut.WriteLine( *pSourceLine );
677     }
678 }
679 
680 void GSIBlock::WriteCorrect( LazySvFileStream &aOkOut, sal_Bool bRequireSourceLine )
681 {
682 	if ( ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) )
683 		return;
684 
685 	sal_Bool bHasOK = sal_False;
686 	sal_uLong i;
687 	for ( i = 0; i < Count(); i++ )
688 	{
689 		if ( ( GetObject( i )->IsOK() || bCheckSourceLang ) && !bHasBlockError )
690 		{
691 			bHasOK = sal_True;
692             aOkOut.LazyOpen();
693 			aOkOut.WriteLine( *GetObject( i ) );
694 		}
695 	}
696 
697 	if ( ( pSourceLine && pSourceLine->IsOK() && ( Count() || !bCheckTranslationLang ) ) || ( bHasOK && bCheckTranslationLang ) )
698     {
699         aOkOut.LazyOpen();
700 		aOkOut.WriteLine( *pSourceLine );
701     }
702 }
703 
704 void GSIBlock::WriteFixed( LazySvFileStream &aFixOut, sal_Bool /*bRequireSourceLine*/ )
705 {
706     if ( pSourceLine && !pSourceLine->IsFixed() && bCheckSourceLang )
707         return;
708 
709 	sal_Bool bHasFixes = sal_False;
710 	sal_uLong i;
711 	for ( i = 0; i < Count(); i++ )
712 	{
713 		if ( GetObject( i )->IsFixed() )
714 		{
715 			bHasFixes = sal_True;
716             aFixOut.LazyOpen();
717 			aFixOut.WriteLine( *GetObject( i ) );
718 		}
719 	}
720 
721 	if ( pSourceLine && ( bHasFixes || pSourceLine->IsFixed() ) )
722     {
723         aFixOut.LazyOpen();
724 		aFixOut.WriteLine( *pSourceLine );
725     }
726 }
727 
728 
729 /*****************************************************************************/
730 /*****************************************************************************/
731 /*****************************************************************************/
732 /*****************************************************************************/
733 /*****************************************************************************/
734 /*****************************************************************************/
735 /*****************************************************************************/
736 
737 /*****************************************************************************/
738 void Help()
739 /*****************************************************************************/
740 {
741 	fprintf( stdout, "\n" );
742 	fprintf( stdout, "gsicheck Version 1.9.0 (c)1999 - 2006 by SUN Microsystems\n" );
743 	fprintf( stdout, "=========================================================\n" );
744 	fprintf( stdout, "\n" );
745 	fprintf( stdout, "gsicheck checks the syntax of tags in GSI-Files and SDF-Files\n" );
746 	fprintf( stdout, "         checks for inconsistencies and malicious UTF8 encoding\n" );
747 	fprintf( stdout, "         checks tags in Online Help\n" );
748 	fprintf( stdout, "         checks for *new* KeyIDs and relax GID/LID length to %s\n", ByteString::CreateFromInt32(MAX_GID_LID_LEN).GetBuffer() );
749 	fprintf( stdout, "\n" );
750 	fprintf( stdout, "Syntax: gsicheck [ -c ] [-f] [ -we ] [ -wef ErrorFilename ] [ -wc ]\n" );
751 	fprintf( stdout, "                 [ -wcf CorrectFilename ] [ -s | -t ] [ -l LanguageID ]\n" );
752 	fprintf( stdout, "                 [ -r ReferenceFile ] filename\n" );
753 	fprintf( stdout, "\n" );
754 	fprintf( stdout, "-c    Add context to error message (Print the line containing the error)\n" );
755 	fprintf( stdout, "-f    try to fix errors. See also -wf -wff \n" );
756 	fprintf( stdout, "-wf   Write File containing all fixed parts\n" );
757 	fprintf( stdout, "-wff  Same as above but give own filename\n" );
758 	fprintf( stdout, "-we   Write File containing all errors\n" );
759 	fprintf( stdout, "-wef  Same as above but give own filename\n" );
760 	fprintf( stdout, "-wc   Write File containing all correct parts\n" );
761 	fprintf( stdout, "-wcf  Same as above but give own filename\n" );
762 	fprintf( stdout, "-s    Check only source language. Should be used before handing out to vendor.\n" );
763 	fprintf( stdout, "-t    Check only Translation language(s). Should be used before merging.\n" );
764 	fprintf( stdout, "-k    Allow KeyIDs to be present in strings\n" );
765     fprintf( stdout, "-e    disable encoding checks. E.g.: double questionmark \'??\' which may be the\n" );
766     fprintf( stdout, "      result of false conversions\n" );
767 	fprintf( stdout, "-l    ISO Languagecode or numerical 2 digits Identifier of the source language.\n" );
768 	fprintf( stdout, "      Default is en-US. Use \"\" (empty string) or 'none'\n" );
769 	fprintf( stdout, "      to disable source language dependent checks\n" );
770 	fprintf( stdout, "-r    Reference filename to check that source language entries\n" );
771 	fprintf( stdout, "      have not been changed\n" );
772    	fprintf( stdout, "\n" );
773 }
774 
775 /*****************************************************************************/
776 #if defined(UNX) || defined(OS2)
777 int main( int argc, char *argv[] )
778 #else
779 int _cdecl main( int argc, char *argv[] )
780 #endif
781 /*****************************************************************************/
782 {
783 
784 	sal_Bool bError = sal_False;
785 	sal_Bool bPrintContext = sal_False;
786 	sal_Bool bCheckSourceLang = sal_False;
787     sal_Bool bCheckTranslationLang = sal_False;
788     sal_Bool bWriteError = sal_False;
789 	sal_Bool bWriteCorrect = sal_False;
790     sal_Bool bWriteFixed = sal_False;
791     sal_Bool bFixTags = sal_False;
792     sal_Bool bAllowKID = sal_False;
793     sal_Bool bAllowSuspicious = sal_False;
794     String aErrorFilename;
795 	String aCorrectFilename;
796     String aFixedFilename;
797     sal_Bool bFileHasError = sal_False;
798     ByteString aSourceLang( "en-US" );     // English is default
799 	ByteString aFilename;
800     ByteString aReferenceFilename;
801     sal_Bool bReferenceFile = sal_False;
802 	for ( sal_uInt16 i = 1 ; i < argc ; i++ )
803 	{
804 		if ( *argv[ i ] == '-' )
805 		{
806 			switch (*(argv[ i ]+1))
807 			{
808 				case 'c':bPrintContext = sal_True;
809 					break;
810 				case 'w':
811 					{
812 						if ( (*(argv[ i ]+2)) == 'e' )
813                         {
814                             if ( (*(argv[ i ]+3)) == 'f' )
815                                 if ( (i+1) < argc )
816                                 {
817                                     aErrorFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
818         							bWriteError = sal_True;
819                                     i++;
820                                 }
821                                 else
822                                 {
823 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
824 					                bError = sal_True;
825                                 }
826                             else
827        							bWriteError = sal_True;
828                         }
829 						else if ( (*(argv[ i ]+2)) == 'c' )
830                             if ( (*(argv[ i ]+3)) == 'f' )
831                                 if ( (i+1) < argc )
832                                 {
833                                     aCorrectFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
834         							bWriteCorrect = sal_True;
835                                     i++;
836                                 }
837                                 else
838                                 {
839 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
840 					                bError = sal_True;
841                                 }
842                             else
843        							bWriteCorrect = sal_True;
844 						else if ( (*(argv[ i ]+2)) == 'f' )
845                             if ( (*(argv[ i ]+3)) == 'f' )
846                                 if ( (i+1) < argc )
847                                 {
848                                     aFixedFilename = String( argv[ i+1 ], RTL_TEXTENCODING_ASCII_US );
849         							bWriteFixed = sal_True;
850                                     bFixTags = sal_True;
851                                     i++;
852                                 }
853                                 else
854                                 {
855 					                fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
856 					                bError = sal_True;
857                                 }
858                             else
859                             {
860        							bWriteFixed = sal_True;
861                                 bFixTags = sal_True;
862                             }
863 						else
864 						{
865 							fprintf( stderr, "\nERROR: Unknown Switch %s!\n\n", argv[ i ] );
866 							bError = sal_True;
867 						}
868 					}
869 					break;
870 				case 's':bCheckSourceLang = sal_True;
871 					break;
872 				case 't':bCheckTranslationLang = sal_True;
873 					break;
874 				case 'l':
875                     {
876                         if ( (i+1) < argc )
877                         {
878                             aSourceLang = ByteString( argv[ i+1 ] );
879                             if ( aSourceLang.EqualsIgnoreCaseAscii( "none" ) )
880                                 aSourceLang.Erase();
881                             i++;
882                         }
883                         else
884                         {
885 					        fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
886 					        bError = sal_True;
887                         }
888                     }
889 					break;
890 				case 'r':
891                     {
892                         if ( (i+1) < argc )
893                         {
894                             aReferenceFilename = argv[ i+1 ];
895                             bReferenceFile = sal_True;
896                             i++;
897                         }
898                         else
899                         {
900 					        fprintf( stderr, "\nERROR: Switch %s requires parameter!\n\n", argv[ i ] );
901 					        bError = sal_True;
902                         }
903                     }
904 					break;
905 				case 'f':
906                     {
907                         bFixTags = sal_True;
908                     }
909 					break;
910 				case 'k':
911                     {
912                         bAllowKID = sal_True;
913                     }
914 					break;
915 				case 'e':
916                     {
917                         bAllowSuspicious = sal_True;
918                     }
919 					break;
920 				default:
921 					fprintf( stderr, "\nERROR: Unknown Switch %s!\n\n", argv[ i ] );
922 					bError = sal_True;
923 			}
924 		}
925 		else
926 		{
927 			if  ( !aFilename.Len())
928 				aFilename = ByteString( argv[ i ] );
929 			else
930 			{
931 				fprintf( stderr, "\nERROR: Only one filename may be specified!\n\n");
932 				bError = sal_True;
933 			}
934 		}
935 	}
936 
937 
938 	if ( !aFilename.Len() || bError )
939 	{
940 		Help();
941 		exit ( 0 );
942 	}
943 
944     if ( aSourceLang.Len() && !LanguageOK( aSourceLang ) )
945     {
946 	    fprintf( stderr, "\nERROR: The Language '%s' is invalid!\n\n", aSourceLang.GetBuffer() );
947 		Help();
948 		exit ( 1 );
949     }
950 
951 	if ( bCheckSourceLang && bCheckTranslationLang )
952     {
953 	    fprintf( stderr, "\nERROR: The Options -s and -t are mutually exclusive.\nUse only one of them.\n\n" );
954 		Help();
955 		exit ( 1 );
956     }
957 
958 
959 
960 	DirEntry aSource = DirEntry( String( aFilename, RTL_TEXTENCODING_ASCII_US ));
961 	if ( !aSource.Exists()) {
962 		fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", aFilename.GetBuffer() );
963 		exit ( 2 );
964 	}
965 
966 	SvFileStream aGSI( String( aFilename, RTL_TEXTENCODING_ASCII_US ), STREAM_STD_READ );
967 	if ( !aGSI.IsOpen()) {
968 		fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", aFilename.GetBuffer() );
969 		exit ( 3 );
970 	}
971 
972     SvFileStream aReferenceGSI;
973 	if ( bReferenceFile )
974     {
975         DirEntry aReferenceSource = DirEntry( String( aReferenceFilename, RTL_TEXTENCODING_ASCII_US ));
976 	    if ( !aReferenceSource.Exists()) {
977 		    fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", aFilename.GetBuffer() );
978 		    exit ( 2 );
979 	    }
980 
981 	    aReferenceGSI.Open( String( aReferenceFilename, RTL_TEXTENCODING_ASCII_US ), STREAM_STD_READ );
982 	    if ( !aReferenceGSI.IsOpen()) {
983 		    fprintf( stderr, "\nERROR: Could not open Input-File %s!\n\n", aFilename.GetBuffer() );
984 		    exit ( 3 );
985 	    }
986     }
987 
988 	LazySvFileStream aOkOut;
989 	String aBaseName = aSource.GetBase();
990 	if ( bWriteCorrect )
991 	{
992     	if ( !aCorrectFilename.Len() )
993         {
994 		    String sTmpBase( aBaseName );
995 		    sTmpBase += String( "_ok", RTL_TEXTENCODING_ASCII_US );
996 		    aSource.SetBase( sTmpBase );
997 		    aCorrectFilename = aSource.GetFull();
998         }
999 		aOkOut.SetOpenParams( aCorrectFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1000 	}
1001 
1002 	LazySvFileStream aErrOut;
1003 	if ( bWriteError )
1004 	{
1005     	if ( !aErrorFilename.Len() )
1006         {
1007 		    String sTmpBase( aBaseName );
1008 		    sTmpBase += String( "_err", RTL_TEXTENCODING_ASCII_US );
1009 		    aSource.SetBase( sTmpBase );
1010 		    aErrorFilename = aSource.GetFull();
1011         }
1012 		aErrOut.SetOpenParams( aErrorFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1013 	}
1014 
1015 	LazySvFileStream aFixOut;
1016 	if ( bWriteFixed )
1017 	{
1018     	if ( !aFixedFilename.Len() )
1019         {
1020 		    String sTmpBase( aBaseName );
1021 		    sTmpBase += String( "_fix", RTL_TEXTENCODING_ASCII_US );
1022 		    aSource.SetBase( sTmpBase );
1023 		    aFixedFilename = aSource.GetFull();
1024         }
1025 		aFixOut.SetOpenParams( aFixedFilename , STREAM_STD_WRITE | STREAM_TRUNC );
1026 	}
1027 
1028 
1029     ByteString sReferenceLine;
1030 	GSILine* pReferenceLine = NULL;
1031 	ByteString aOldReferenceId("No Valid ID");   // just set to something which can never be an ID
1032 	sal_uLong nReferenceLine = 0;
1033 
1034 	ByteString sGSILine;
1035 	GSILine* pGSILine = NULL;
1036 	ByteString aOldId("No Valid ID");   // just set to something which can never be an ID
1037 	GSIBlock *pBlock = NULL;
1038 	sal_uLong nLine = 0;
1039 
1040 	while ( !aGSI.IsEof() )
1041     {
1042 		aGSI.ReadLine( sGSILine );
1043 		nLine++;
1044         pGSILine = new GSILine( sGSILine, nLine );
1045         sal_Bool bDelete = sal_True;
1046 
1047 
1048 		if ( pGSILine->Len() )
1049         {
1050             if ( FORMAT_UNKNOWN == pGSILine->GetLineFormat() )
1051 	        {
1052 		        PrintError( "Format of line is unknown. Ignoring!", "Line format", pGSILine->Copy( 0,40 ), bPrintContext, pGSILine->GetLineNumber() );
1053 		        pGSILine->NotOK();
1054 				if ( bWriteError )
1055                 {
1056 					bFileHasError = sal_True;
1057                     aErrOut.LazyOpen();
1058                     aErrOut.WriteLine( *pGSILine );
1059                 }
1060 	        }
1061             else if ( pGSILine->GetLineType().EqualsIgnoreCaseAscii("res-comment") )
1062             {   // ignore comment lines, but write them to Correct Items File
1063 			    if ( bWriteCorrect )
1064                 {
1065                     aOkOut.LazyOpen();
1066                		aOkOut.WriteLine( *pGSILine );
1067                 }
1068             }
1069             else
1070             {
1071                 ByteString aId = pGSILine->GetUniqId();
1072 			    if ( aId != aOldId )
1073                 {
1074 				    if ( pBlock )
1075 				    {
1076 					    bFileHasError |= pBlock->CheckSyntax( nLine, aSourceLang.Len() != 0, bFixTags );
1077 
1078 					    if ( bWriteError )
1079 						    pBlock->WriteError( aErrOut, aSourceLang.Len() != 0 );
1080 					    if ( bWriteCorrect )
1081 						    pBlock->WriteCorrect( aOkOut, aSourceLang.Len() != 0 );
1082 					    if ( bWriteFixed )
1083 						    pBlock->WriteFixed( aFixOut, aSourceLang.Len() != 0 );
1084 
1085 					    delete pBlock;
1086 				    }
1087 				    pBlock = new GSIBlock( bPrintContext, bCheckSourceLang, bCheckTranslationLang, bReferenceFile, bAllowKID, bAllowSuspicious );
1088 
1089 				    aOldId = aId;
1090 
1091 
1092                     // find corresponding line in reference file
1093                     if ( bReferenceFile )
1094                     {
1095                         sal_Bool bContinueSearching = sal_True;
1096                         while ( ( !aReferenceGSI.IsEof() || pReferenceLine ) && bContinueSearching )
1097                         {
1098                             if ( !pReferenceLine )
1099                             {
1100 		                        aReferenceGSI.ReadLine( sReferenceLine );
1101 		                        nReferenceLine++;
1102                                 pReferenceLine = new GSILine( sReferenceLine, nReferenceLine );
1103                             }
1104                             if ( pReferenceLine->GetLineFormat() != FORMAT_UNKNOWN )
1105                             {
1106                                 if ( pReferenceLine->GetUniqId() == aId && pReferenceLine->GetLanguageId().Equals( aSourceLang ) )
1107                                 {
1108                                     pBlock->SetReferenceLine( pReferenceLine );
1109                                     pReferenceLine = NULL;
1110                                 }
1111                                 else if ( pReferenceLine->GetUniqId() > aId )
1112                                 {
1113 //                                    if ( pGSILine->GetLanguageId() == aSourceLang )
1114 //                    		            PrintError( "No reference line found. Entry is new in source file", "File format", "", bPrintContext, pGSILine->GetLineNumber(), aId );
1115                                     bContinueSearching = sal_False;
1116                                 }
1117                                 else
1118                                 {
1119                                     if ( pReferenceLine->GetUniqId() < aId  && pReferenceLine->GetLanguageId().Equals( aSourceLang ) )
1120                 		                PrintError( "No Entry in source file found. Entry has been removed from source file", "File format", "", bPrintContext, pGSILine->GetLineNumber(), pReferenceLine->GetUniqId() );
1121                                     delete pReferenceLine;
1122                                     pReferenceLine = NULL;
1123                                 }
1124                             }
1125                             else
1126                             {
1127                                 delete pReferenceLine;
1128                                 pReferenceLine = NULL;
1129                             }
1130 
1131                         }
1132                     }
1133 
1134     		    }
1135 
1136 			    pBlock->InsertLine( pGSILine, aSourceLang );
1137                 bDelete = sal_False;
1138             }
1139 		}
1140         if ( bDelete )
1141             delete pGSILine;
1142 
1143 	}
1144 	if ( pBlock )
1145 	{
1146 		bFileHasError |= pBlock->CheckSyntax( nLine, aSourceLang.Len() != 0, bFixTags );
1147 
1148 		if ( bWriteError )
1149 			pBlock->WriteError( aErrOut, aSourceLang.Len() != 0 );
1150 		if ( bWriteCorrect )
1151 			pBlock->WriteCorrect( aOkOut, aSourceLang.Len() != 0 );
1152 		if ( bWriteFixed )
1153 			pBlock->WriteFixed( aFixOut, aSourceLang.Len() != 0 );
1154 
1155 		delete pBlock;
1156 	}
1157 	aGSI.Close();
1158 
1159 	if ( bWriteError )
1160 		aErrOut.Close();
1161 	if ( bWriteCorrect )
1162 		aOkOut.Close();
1163 	if ( bWriteFixed )
1164 		aFixOut.Close();
1165 
1166     if ( bFileHasError )
1167         return 55;
1168     else
1169 	    return 0;
1170 }
1171