1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_l10ntools.hxx"
26 #include <stdio.h>
27 #include <tools/fsys.hxx>
28 #include <tools/stream.hxx>
29
30 // local includes
31 #include "utf8conv.hxx"
32
33 #define GSI_FILE_UNKNOWN 0x0000
34 #define GSI_FILE_OLDSTYLE 0x0001
35 #define GSI_FILE_L10NFRAMEWORK 0x0002
36
37 /*****************************************************************************/
GetGSIFileType(SvStream & rStream)38 sal_uInt16 GetGSIFileType( SvStream &rStream )
39 /*****************************************************************************/
40 {
41 sal_uInt16 nFileType = GSI_FILE_UNKNOWN;
42
43 sal_uLong nPos( rStream.Tell());
44 rStream.Seek( STREAM_SEEK_TO_BEGIN );
45
46 ByteString sLine;
47 while( !rStream.IsEof() && !sLine.Len())
48 rStream.ReadLine( sLine );
49
50 if( sLine.Len()) {
51 if( sLine.Search( "($$)" ) != STRING_NOTFOUND )
52 nFileType = GSI_FILE_OLDSTYLE;
53 else
54 nFileType = GSI_FILE_L10NFRAMEWORK;
55 }
56
57 rStream.Seek( nPos );
58
59 return nFileType;
60 }
61
62 /*****************************************************************************/
GetGSILineId(const ByteString & rLine,sal_uInt16 nFileType)63 ByteString GetGSILineId( const ByteString &rLine, sal_uInt16 nFileType )
64 /*****************************************************************************/
65 {
66 ByteString sId;
67 switch ( nFileType ) {
68 case GSI_FILE_OLDSTYLE:
69 sId = rLine;
70 sId.SearchAndReplaceAll( "($$)", "\t" );
71 sId = sId.GetToken( 0, '\t' );
72 break;
73
74 case GSI_FILE_L10NFRAMEWORK:
75 sId = rLine.GetToken( 0, '\t' );
76 sId += "\t";
77 sId += rLine.GetToken( 1, '\t' );
78 sId += "\t";
79 sId += rLine.GetToken( 4, '\t' );
80 sId += "\t";
81 sId += rLine.GetToken( 5, '\t' );
82 break;
83 }
84 return sId;
85 }
86
87 /*****************************************************************************/
GetGSILineLangId(const ByteString & rLine,sal_uInt16 nFileType)88 ByteString GetGSILineLangId( const ByteString &rLine, sal_uInt16 nFileType )
89 /*****************************************************************************/
90 {
91 ByteString sLangId;
92 switch ( nFileType ) {
93 case GSI_FILE_OLDSTYLE:
94 sLangId = rLine;
95 sLangId.SearchAndReplaceAll( "($$)", "\t" );
96 sLangId = sLangId.GetToken( 2, '\t' );
97 break;
98
99 case GSI_FILE_L10NFRAMEWORK:
100 sLangId = rLine.GetToken( 9, '\t' );
101 break;
102 }
103 return sLangId;
104 }
105
106 /*****************************************************************************/
ConvertGSILine(sal_Bool bToUTF8,ByteString & rLine,rtl_TextEncoding nEncoding,sal_uInt16 nFileType)107 void ConvertGSILine( sal_Bool bToUTF8, ByteString &rLine,
108 rtl_TextEncoding nEncoding, sal_uInt16 nFileType )
109 /*****************************************************************************/
110 {
111 switch ( nFileType ) {
112 case GSI_FILE_OLDSTYLE:
113 if ( bToUTF8 )
114 rLine = UTF8Converter::ConvertToUTF8( rLine, nEncoding );
115 else
116 rLine = UTF8Converter::ConvertFromUTF8( rLine, nEncoding );
117 break;
118
119 case GSI_FILE_L10NFRAMEWORK: {
120 ByteString sConverted;
121 for ( sal_uInt16 i = 0; i < rLine.GetTokenCount( '\t' ); i++ ) {
122 ByteString sToken = rLine.GetToken( i, '\t' );
123 if (( i > 9 ) && ( i < 14 )) {
124 if( bToUTF8 )
125 sToken = UTF8Converter::ConvertToUTF8( sToken, nEncoding );
126 else
127 sToken = UTF8Converter::ConvertFromUTF8( sToken, nEncoding );
128 }
129 if ( i )
130 sConverted += "\t";
131 sConverted += sToken;
132 }
133 rLine = sConverted;
134 }
135 break;
136 }
137 }
138
139 /*****************************************************************************/
Help()140 void Help()
141 /*****************************************************************************/
142 {
143 fprintf( stdout, "\n" );
144 fprintf( stdout, "gsiconv (c)1999 by StarOffice Entwicklungs GmbH\n" );
145 fprintf( stdout, "===============================================\n" );
146 fprintf( stdout, "\n" );
147 fprintf( stdout, "gsiconv converts strings in GSI-Files (Gutschmitt Interface) from or to UTF-8\n" );
148 fprintf( stdout, "\n" );
149 fprintf( stdout, "Syntax: gsiconv (-t|-f langid charset)|(-p n) filename\n" );
150 fprintf( stdout, "Switches: -t => conversion from charset to UTF-8\n" );
151 fprintf( stdout, " -f => conversion from UTF-8 to charset\n" );
152 fprintf( stdout, " -p n => creates several files with ca. n lines\n" );
153 fprintf( stdout, "\n" );
154 fprintf( stdout, "Allowed charsets:\n" );
155 fprintf( stdout, " MS_932 => Japanese\n" );
156 fprintf( stdout, " MS_936 => Chinese Simplified\n" );
157 fprintf( stdout, " MS_949 => Korean\n" );
158 fprintf( stdout, " MS_950 => Chinese Traditional\n" );
159 fprintf( stdout, " MS_1250 => East Europe\n" );
160 fprintf( stdout, " MS_1251 => Cyrillic\n" );
161 fprintf( stdout, " MS_1252 => West Europe\n" );
162 fprintf( stdout, " MS_1253 => Greek\n" );
163 fprintf( stdout, " MS_1254 => Turkish\n" );
164 fprintf( stdout, " MS_1255 => Hebrew\n" );
165 fprintf( stdout, " MS_1256 => Arabic\n" );
166 fprintf( stdout, "\n" );
167 fprintf( stdout, "Allowed langids:\n" );
168 fprintf( stdout, " 1 => ENGLISH_US\n" );
169 fprintf( stdout, " 3 => PORTUGUESE \n" );
170 fprintf( stdout, " 4 => GERMAN_DE (new german style)\n" );
171 fprintf( stdout, " 7 => RUSSIAN\n" );
172 fprintf( stdout, " 30 => GREEK\n" );
173 fprintf( stdout, " 31 => DUTCH\n" );
174 fprintf( stdout, " 33 => FRENCH\n" );
175 fprintf( stdout, " 34 => SPANISH\n" );
176 fprintf( stdout, " 35 => FINNISH\n" );
177 fprintf( stdout, " 36 => HUNGARIAN\n" );
178 fprintf( stdout, " 39 => ITALIAN\n" );
179 fprintf( stdout, " 42 => CZECH\n" );
180 fprintf( stdout, " 44 => ENGLISH (UK)\n" );
181 fprintf( stdout, " 45 => DANISH\n" );
182 fprintf( stdout, " 46 => SWEDISH\n" );
183 fprintf( stdout, " 47 => NORWEGIAN\n" );
184 fprintf( stdout, " 49 => GERMAN (old german style)\n" );
185 fprintf( stdout, " 55 => PORTUGUESE_BRAZILIAN\n" );
186 fprintf( stdout, " 81 => JAPANESE\n" );
187 fprintf( stdout, " 82 => KOREAN\n" );
188 fprintf( stdout, " 86 => CHINESE_SIMPLIFIED\n" );
189 fprintf( stdout, " 88 => CHINESE_TRADITIONAL\n" );
190 fprintf( stdout, " 90 => TURKISH\n" );
191 fprintf( stdout, " 96 => ARABIC\n" );
192 fprintf( stdout, " 97 => HEBREW\n" );
193 fprintf( stdout, "\n" );
194 }
195
196 /*****************************************************************************/
197 #if defined(UNX) || defined(OS2)
main(int argc,char * argv[])198 int main( int argc, char *argv[] )
199 #else
200 int _cdecl main( int argc, char *argv[] )
201 #endif
202 /*****************************************************************************/
203 {
204 if (( argc != 5 ) && ( argc != 4 )) {
205 Help();
206 exit ( 0 );
207 }
208
209 if ( argc == 4 ) {
210 if ( ByteString( argv[ 1 ] ) == "-p" ) {
211
212 DirEntry aSource = DirEntry( String( argv[ 3 ], RTL_TEXTENCODING_ASCII_US ));
213 if ( !aSource.Exists()) {
214 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
215 exit ( 2 );
216 }
217
218 DirEntry aOutput( aSource );
219
220 String sBase = aOutput.GetBase();
221 String sExt = aOutput.GetExtension();
222
223 String sGSI( argv[ 3 ], RTL_TEXTENCODING_ASCII_US );
224 SvFileStream aGSI( sGSI, STREAM_STD_READ );
225 if ( !aGSI.IsOpen()) {
226 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
227 exit ( 3 );
228 }
229
230 sal_uInt16 nFileType( GetGSIFileType( aGSI ));
231
232 sal_uLong nMaxLines = (sal_uLong) ByteString( argv[ 2 ] ).ToInt64();
233 if ( !nMaxLines ) {
234 fprintf( stderr, "\nERROR: Linecount must be at least 1!\n\n" );
235 exit ( 3 );
236 }
237
238 ByteString sGSILine;
239 ByteString sOldId;
240 sal_uLong nLine = 0;
241 sal_uLong nOutputFile = 1;
242
243 String sOutput( sBase );
244 sOutput += String( "_", RTL_TEXTENCODING_ASCII_US );
245 sOutput += String::CreateFromInt64( nOutputFile );
246 if ( sExt.Len()) {
247 sOutput += String( ".", RTL_TEXTENCODING_ASCII_US );
248 sOutput += sExt;
249 }
250 nOutputFile ++;
251
252 aOutput.SetName( sOutput );
253 SvFileStream aOutputStream( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
254
255 while ( !aGSI.IsEof()) {
256
257 aGSI.ReadLine( sGSILine );
258 ByteString sId( GetGSILineId( sGSILine, nFileType ));
259
260 nLine++;
261
262 if (( nLine >= nMaxLines ) && ( sId != sOldId )) {
263 aOutputStream.Close();
264
265 ByteString sText( aOutput.GetFull(), gsl_getSystemTextEncoding());
266 sText += " with ";
267 sText += ByteString::CreateFromInt64( nLine );
268 sText += " lines written.";
269
270 fprintf( stdout, "%s\n", sText.GetBuffer());
271 String sOutput1( sBase );
272 sOutput1 += String( "_", RTL_TEXTENCODING_ASCII_US );
273 sOutput1 += String::CreateFromInt64( nOutputFile );
274 if ( sExt.Len()) {
275 sOutput1 += String( ".", RTL_TEXTENCODING_ASCII_US );
276 sOutput1 += sExt;
277 }
278 nOutputFile ++;
279
280 aOutput.SetName( sOutput1 );
281
282 aOutputStream.Open( aOutput.GetFull(), STREAM_STD_WRITE | STREAM_TRUNC );
283 nLine = 0;
284 }
285
286 aOutputStream.WriteLine( sGSILine );
287
288 sOldId = sId;
289 }
290
291 aGSI.Close();
292 aOutputStream.Close();
293
294 ByteString sText( aOutput.GetFull(), RTL_TEXTENCODING_ASCII_US );
295 sText += " with ";
296 sText += ByteString::CreateFromInt64( nLine );
297 sText += " lines written.";
298 }
299 else {
300 Help();
301 exit( 1 );
302 }
303 }
304 else {
305 if ( ByteString( argv[ 1 ] ) == "-t" || ByteString( argv[ 1 ] ) == "-f" ) {
306 rtl_TextEncoding nEncoding;
307
308 ByteString sCurLangId( argv[ 2 ] );
309
310 ByteString sCharset( argv[ 3 ] );
311 sCharset.ToUpperAscii();
312
313 if ( sCharset == "MS_932" ) nEncoding = RTL_TEXTENCODING_MS_932;
314 else if ( sCharset == "MS_936" ) nEncoding = RTL_TEXTENCODING_MS_936;
315 else if ( sCharset == "MS_949" ) nEncoding = RTL_TEXTENCODING_MS_949;
316 else if ( sCharset == "MS_950" ) nEncoding = RTL_TEXTENCODING_MS_950;
317 else if ( sCharset == "MS_1250" ) nEncoding = RTL_TEXTENCODING_MS_1250;
318 else if ( sCharset == "MS_1251" ) nEncoding = RTL_TEXTENCODING_MS_1251;
319 else if ( sCharset == "MS_1252" ) nEncoding = RTL_TEXTENCODING_MS_1252;
320 else if ( sCharset == "MS_1253" ) nEncoding = RTL_TEXTENCODING_MS_1253;
321 else if ( sCharset == "MS_1254" ) nEncoding = RTL_TEXTENCODING_MS_1254;
322 else if ( sCharset == "MS_1255" ) nEncoding = RTL_TEXTENCODING_MS_1255;
323 else if ( sCharset == "MS_1256" ) nEncoding = RTL_TEXTENCODING_MS_1256;
324 else if ( sCharset == "MS_1257" ) nEncoding = RTL_TEXTENCODING_MS_1257;
325 else if ( sCharset == "UTF8" ) nEncoding = RTL_TEXTENCODING_UTF8;
326
327 else {
328 Help();
329 exit ( 1 );
330 }
331
332 DirEntry aSource = DirEntry( String( argv[ 4 ], RTL_TEXTENCODING_ASCII_US ));
333 if ( !aSource.Exists()) {
334 fprintf( stderr, "\nERROR: GSI-File %s not found!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
335 exit ( 2 );
336 }
337
338 String sGSI( argv[ 4 ], RTL_TEXTENCODING_ASCII_US );
339 SvFileStream aGSI( sGSI, STREAM_STD_READ );
340 if ( !aGSI.IsOpen()) {
341 fprintf( stderr, "\nERROR: Could not open GSI-File %s!\n\n", ByteString( argv[ 3 ] ).GetBuffer());
342 exit ( 3 );
343 }
344 sal_uInt16 nFileType( GetGSIFileType( aGSI ));
345
346 ByteString sGSILine;
347 while ( !aGSI.IsEof()) {
348
349 aGSI.ReadLine( sGSILine );
350 ByteString sLangId( GetGSILineLangId( sGSILine, nFileType ));
351 if ( sLangId == sCurLangId )
352 ConvertGSILine(( ByteString( argv[ 1 ] ) == "-t" ), sGSILine, nEncoding, nFileType );
353
354 fprintf( stdout, "%s\n", sGSILine.GetBuffer());
355 }
356
357 aGSI.Close();
358 }
359 else {
360 Help();
361 exit( 1 );
362 }
363 }
364 return 0;
365 }
366