1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <sal/main.h>
31 #include <sal/types.h>
32 #include <rtl/strbuf.hxx>
33 #include <rtl/ustring.hxx>
34
35 #include <vector>
36
37 using namespace ::rtl;
38
39 void make_hhc_char(FILE *sfp, FILE *cfp);
40 void make_stc_char(FILE *sfp, FILE *cfp);
41 void make_stc_word(FILE *sfp, FILE *cfp);
42
43 /* Main Procedure */
44
SAL_IMPLEMENT_MAIN_WITH_ARGS(argc,argv)45 SAL_IMPLEMENT_MAIN_WITH_ARGS(argc, argv)
46 {
47 FILE *sfp, *cfp;
48
49 if (argc < 4) exit(-1);
50
51
52 sfp = fopen(argv[2], "rb"); // open the source file for read;
53 if (sfp == NULL)
54 {
55 printf("Open the dictionary source file failed.");
56 return -1;
57 }
58
59 // create the C source file to write
60 cfp = fopen(argv[3], "wb");
61 if (cfp == NULL) {
62 fclose(sfp);
63 printf("Can't create the C source file.");
64 return -1;
65 }
66
67 fprintf(cfp, "/*\n");
68 fprintf(cfp, " * Copyright(c) 1999 - 2000, Sun Microsystems, Inc.\n");
69 fprintf(cfp, " * All Rights Reserved.\n");
70 fprintf(cfp, " */\n\n");
71 fprintf(cfp, "/* !!!The file is generated automatically. DONOT edit the file manually!!! */\n\n");
72 fprintf(cfp, "#include <sal/types.h>\n");
73 fprintf(cfp, "#include <textconversion.hxx>\n");
74 fprintf(cfp, "\nextern \"C\" {\n");
75
76 if (strcmp(argv[1], "hhc_char") == 0)
77 make_hhc_char(sfp, cfp);
78 else if (strcmp(argv[1], "stc_char") == 0)
79 make_stc_char(sfp, cfp);
80 else if (strcmp(argv[1], "stc_word") == 0)
81 make_stc_word(sfp, cfp);
82
83 fprintf (cfp, "}\n");
84
85 fclose(sfp);
86 fclose(cfp);
87
88 return 0;
89 } // end of main
90
91 // Hangul/Hanja character conversion
make_hhc_char(FILE * sfp,FILE * cfp)92 void make_hhc_char(FILE *sfp, FILE *cfp)
93 {
94 sal_Int32 count, address, i, j, k;
95 sal_Unicode Hanja2HangulData[0x10000];
96 for (i = 0; i < 0x10000; i++) {
97 Hanja2HangulData[i] = 0;
98 }
99 sal_uInt16 Hangul2HanjaData[10000][3];
100
101 // generate main dict. data array
102 fprintf(cfp, "\nstatic const sal_Unicode Hangul2HanjaData[] = {");
103
104 sal_Char Cstr[1024];
105 count = 0;
106 address = 0;
107 while (fgets(Cstr, 1024, sfp)) {
108 // input file is in UTF-8 encoding (Hangul:Hanja)
109 // don't convert last new line character to Ostr.
110 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
111 const sal_Unicode *Ustr = Ostr.getStr();
112 sal_Int32 len = Ostr.getLength();
113
114 Hangul2HanjaData[count][0] = Ustr[0];
115 Hangul2HanjaData[count][1] = sal::static_int_cast<sal_uInt16>( address );
116 Hangul2HanjaData[count][2] = sal::static_int_cast<sal_uInt16>( len - 2 );
117 count++;
118
119 for (i = 2; i < len; i++) {
120 Hanja2HangulData[Ustr[i]] = Ustr[0];
121 if (address++ % 16 == 0)
122 fprintf(cfp, "\n\t");
123 fprintf(cfp, "0x%04x, ", Ustr[i]);
124 }
125 }
126 fprintf(cfp, "\n};\n");
127
128 fprintf(cfp, "\nstatic const com::sun::star::i18n::Hangul_Index Hangul2HanjaIndex[] = {\n");
129 for (i = 0; i < count; i++)
130 fprintf(cfp, "\t{ 0x%04x, 0x%04x, 0x%02x },\n",
131 Hangul2HanjaData[i][0],
132 Hangul2HanjaData[i][1],
133 Hangul2HanjaData[i][2]);
134 fprintf(cfp, "};\n");
135
136 fprintf(cfp, "\nstatic const sal_uInt16 Hanja2HangulIndex[] = {");
137
138 address=0;
139 for (i = 0; i < 0x10; i++) {
140 fprintf(cfp, "\n\t");
141 for (j = 0; j < 0x10; j++) {
142 for (k = 0; k < 0x100; k++) {
143 if (Hanja2HangulData[((i*0x10)+j)*0x100+k] != 0)
144 break;
145 }
146 fprintf(
147 cfp, "0x%04lx, ",
148 sal::static_int_cast< unsigned long >(
149 k < 0x100 ? (address++)*0x100 : 0xFFFF));
150 }
151 }
152 fprintf(cfp, "\n};\n");
153
154 fprintf(cfp, "\nstatic const sal_Unicode Hanja2HangulData[] = {");
155
156 for (i = 0; i < 0x100; i++) {
157 for (j = 0; j < 0x100; j++) {
158 if (Hanja2HangulData[i*0x100+j] != 0)
159 break;
160 }
161 if (j < 0x100) {
162 for (j = 0; j < 0x10; j++) {
163 fprintf(cfp, "\n\t");
164 for (k = 0; k < 0x10; k++) {
165 sal_Unicode c = Hanja2HangulData[((i*0x10+j)*0x10)+k];
166 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
167 }
168 }
169 }
170 }
171 fprintf(cfp, "\n};\n");
172
173 // create function to return arrays
174 fprintf (cfp, "\tconst sal_Unicode* getHangul2HanjaData() { return Hangul2HanjaData; }\n");
175 fprintf (cfp, "\tconst com::sun::star::i18n::Hangul_Index* getHangul2HanjaIndex() { return Hangul2HanjaIndex; }\n");
176 fprintf (cfp, "\tsal_Int16 getHangul2HanjaIndexCount() { return sizeof(Hangul2HanjaIndex) / sizeof(com::sun::star::i18n::Hangul_Index); }\n");
177 fprintf (cfp, "\tconst sal_uInt16* getHanja2HangulIndex() { return Hanja2HangulIndex; }\n");
178 fprintf (cfp, "\tconst sal_Unicode* getHanja2HangulData() { return Hanja2HangulData; }\n");
179 }
180
181 // Simplified/Traditional Chinese character conversion
make_stc_char(FILE * sfp,FILE * cfp)182 void make_stc_char(FILE *sfp, FILE *cfp)
183 {
184 sal_Int32 address, i, j, k;
185 sal_Unicode SChinese2TChineseData[0x10000];
186 sal_Unicode SChinese2VChineseData[0x10000];
187 sal_Unicode TChinese2SChineseData[0x10000];
188 for (i = 0; i < 0x10000; i++) {
189 SChinese2TChineseData[i] = 0;
190 SChinese2VChineseData[i] = 0;
191 TChinese2SChineseData[i] = 0;
192 }
193
194 sal_Char Cstr[1024];
195 while (fgets(Cstr, 1024, sfp)) {
196 // input file is in UTF-8 encoding (SChinese:TChinese)
197 // don't convert last new line character to Ostr.
198 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
199 const sal_Unicode *Ustr = Ostr.getStr();
200 sal_Int32 len = Ostr.getLength();
201 if (Ustr[1] == sal_Unicode('v'))
202 SChinese2VChineseData[Ustr[0]] = Ustr[2];
203 else {
204 SChinese2TChineseData[Ustr[0]] = Ustr[2];
205 if (SChinese2VChineseData[Ustr[0]] == 0)
206 SChinese2VChineseData[Ustr[0]] = Ustr[2];
207 }
208 for (i = 2; i < len; i++)
209 TChinese2SChineseData[Ustr[i]] = Ustr[0];
210 }
211
212 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2T[] = {");
213
214 address=0;
215 for (i = 0; i < 0x10; i++) {
216 fprintf(cfp, "\n\t");
217 for (j = 0; j < 0x10; j++) {
218 for (k = 0; k < 0x100; k++) {
219 if (SChinese2TChineseData[((i*0x10)+j)*0x100+k] != 0)
220 break;
221 }
222 fprintf(
223 cfp, "0x%04lx, ",
224 sal::static_int_cast< unsigned long >(
225 k < 0x100 ? (address++)*0x100 : 0xFFFF));
226 }
227 }
228 fprintf(cfp, "\n};\n");
229
230 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2T[] = {");
231
232 for (i = 0; i < 0x100; i++) {
233 for (j = 0; j < 0x100; j++) {
234 if (SChinese2TChineseData[i*0x100+j] != 0)
235 break;
236 }
237 if (j < 0x100) {
238 for (j = 0; j < 0x10; j++) {
239 fprintf(cfp, "\n\t");
240 for (k = 0; k < 0x10; k++) {
241 sal_Unicode c = SChinese2TChineseData[((i*0x10+j)*0x10)+k];
242 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
243 }
244 }
245 }
246 }
247 fprintf(cfp, "\n};\n");
248
249 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_S2V[] = {");
250
251 address=0;
252 for (i = 0; i < 0x10; i++) {
253 fprintf(cfp, "\n\t");
254 for (j = 0; j < 0x10; j++) {
255 for (k = 0; k < 0x100; k++) {
256 if (SChinese2VChineseData[((i*0x10)+j)*0x100+k] != 0)
257 break;
258 }
259 fprintf(
260 cfp, "0x%04lx, ",
261 sal::static_int_cast< unsigned long >(
262 k < 0x100 ? (address++)*0x100 : 0xFFFF));
263 }
264 }
265 fprintf(cfp, "\n};\n");
266
267 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_S2V[] = {");
268
269 for (i = 0; i < 0x100; i++) {
270 for (j = 0; j < 0x100; j++) {
271 if (SChinese2VChineseData[i*0x100+j] != 0)
272 break;
273 }
274 if (j < 0x100) {
275 for (j = 0; j < 0x10; j++) {
276 fprintf(cfp, "\n\t");
277 for (k = 0; k < 0x10; k++) {
278 sal_Unicode c = SChinese2VChineseData[((i*0x10+j)*0x10)+k];
279 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
280 }
281 }
282 }
283 }
284 fprintf(cfp, "\n};\n");
285
286 fprintf(cfp, "\nstatic const sal_uInt16 STC_CharIndex_T2S[] = {");
287
288 address=0;
289 for (i = 0; i < 0x10; i++) {
290 fprintf(cfp, "\n\t");
291 for (j = 0; j < 0x10; j++) {
292 for (k = 0; k < 0x100; k++) {
293 if (TChinese2SChineseData[((i*0x10)+j)*0x100+k] != 0)
294 break;
295 }
296 fprintf(
297 cfp, "0x%04lx, ",
298 sal::static_int_cast< unsigned long >(
299 k < 0x100 ? (address++)*0x100 : 0xFFFF));
300 }
301 }
302 fprintf(cfp, "\n};\n");
303
304 fprintf(cfp, "\nstatic const sal_Unicode STC_CharData_T2S[] = {");
305
306 for (i = 0; i < 0x100; i++) {
307 for (j = 0; j < 0x100; j++) {
308 if (TChinese2SChineseData[i*0x100+j] != 0)
309 break;
310 }
311 if (j < 0x100) {
312 for (j = 0; j < 0x10; j++) {
313 fprintf(cfp, "\n\t");
314 for (k = 0; k < 0x10; k++) {
315 sal_Unicode c = TChinese2SChineseData[((i*0x10+j)*0x10)+k];
316 fprintf(cfp, "0x%04x, ", c ? c : 0xFFFF);
317 }
318 }
319 }
320 }
321 fprintf(cfp, "\n};\n");
322
323 // create function to return arrays
324 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2T() { return STC_CharIndex_S2T; }\n");
325 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2T() { return STC_CharData_S2T; }\n");
326 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_S2V() { return STC_CharIndex_S2V; }\n");
327 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_S2V() { return STC_CharData_S2V; }\n");
328 fprintf (cfp, "\tconst sal_uInt16* getSTC_CharIndex_T2S() { return STC_CharIndex_T2S; }\n");
329 fprintf (cfp, "\tconst sal_Unicode* getSTC_CharData_T2S() { return STC_CharData_T2S; }\n");
330 }
331
332
333 typedef struct {
334 sal_uInt16 address;
335 sal_Int32 len;
336 sal_Unicode *data;
337 } Index;
338
339 extern "C" {
Index_comp(const void * s1,const void * s2)340 int Index_comp(const void* s1, const void* s2)
341 {
342 Index *p1 = (Index*)s1, *p2 = (Index*)s2;
343 int result = p1->len - p2->len;
344 for (int i = 0; result == 0 && i < p1->len; i++)
345 result = *(p1->data+i) - *(p2->data+i);
346 return result;
347 }
348 }
349
350 // Simplified/Traditional Chinese word conversion
make_stc_word(FILE * sfp,FILE * cfp)351 void make_stc_word(FILE *sfp, FILE *cfp)
352 {
353 sal_Int32 count, i, length;
354 sal_Unicode STC_WordData[0x10000];
355 std::vector<Index> STC_WordEntry_S2T(0x10000);
356 std::vector<Index> STC_WordEntry_T2S(0x10000);
357 sal_Int32 count_S2T = 0, count_T2S = 0;
358 sal_Int32 line = 0, char_total = 0;
359 sal_Char Cstr[1024];
360
361 while (fgets(Cstr, 1024, sfp)) {
362 // input file is in UTF-8 encoding (SChinese:TChinese)
363 // don't convert last new line character to Ostr.
364 OUString Ostr((const sal_Char *)Cstr, strlen(Cstr) - 1, RTL_TEXTENCODING_UTF8);
365 sal_Int32 len = Ostr.getLength();
366 if (char_total + len + 1 > 0xFFFF) {
367 fprintf(stderr, "Word Dictionary stc_word.dic is too big (line %ld)", sal::static_int_cast< long >(line));
368 return;
369 }
370 sal_Int32 sep=-1, eq=-1, gt=-1, lt=-1;
371 if (((sep = eq = Ostr.indexOf(sal_Unicode('='))) > 0) ||
372 ((sep = gt = Ostr.indexOf(sal_Unicode('>'))) > 0) ||
373 ((sep = lt = Ostr.indexOf(sal_Unicode('<'))) > 0)) {
374
375 if (eq > 0 || gt > 0) {
376 STC_WordEntry_S2T[count_S2T].address = sal::static_int_cast<sal_uInt16>( char_total );
377 STC_WordEntry_S2T[count_S2T].len = sep;
378 STC_WordEntry_S2T[count_S2T++].data = &STC_WordData[char_total];
379 }
380 if (eq > 0 || lt > 0) {
381 STC_WordEntry_T2S[count_T2S].address = sal::static_int_cast<sal_uInt16>( char_total + sep + 1 );
382 STC_WordEntry_T2S[count_T2S].len = len - sep - 1;
383 STC_WordEntry_T2S[count_T2S++].data = &STC_WordData[char_total + sep + 1];
384 }
385 for (i = 0; i < len; i++)
386 STC_WordData[char_total++] = (i == sep) ? 0 : Ostr[i];
387 STC_WordData[char_total++] = 0;
388 } else {
389 fprintf(stderr, "Invalid entry in stc_word.dic (line %ld)", sal::static_int_cast< long >(line));
390 return;
391 }
392 line++;
393 }
394
395 if (char_total > 0) {
396 fprintf(cfp, "\nstatic const sal_Unicode STC_WordData[] = {");
397 for (i = 0; i < char_total; i++) {
398 if (i % 32 == 0) fprintf(cfp, "\n\t");
399 fprintf(cfp, "0x%04x, ", STC_WordData[i]);
400 }
401 fprintf(cfp, "\n};\n");
402
403 fprintf(cfp, "\nstatic sal_Int32 STC_WordData_Count = %ld;\n", sal::static_int_cast< long >(char_total));
404
405 // create function to return arrays
406 fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = STC_WordData_Count; return STC_WordData; }\n");
407 } else {
408 fprintf (cfp, "\tconst sal_Unicode* getSTC_WordData(sal_Int32& count) { count = 0; return NULL; }\n");
409 }
410
411 sal_uInt16 STC_WordIndex[0x100];
412
413 if (count_S2T > 0) {
414 qsort(&STC_WordEntry_S2T[0], count_S2T, sizeof(Index), Index_comp);
415
416 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_S2T[] = {");
417 count = 0;
418 length = 0;
419 for (i = 0; i < count_S2T; i++) {
420 if (i % 32 == 0) fprintf(cfp, "\n\t");
421 fprintf(cfp, "0x%04x, ", STC_WordEntry_S2T[i].address);
422 if (STC_WordEntry_S2T[i].len != length) {
423 length = STC_WordEntry_S2T[i].len;
424 while (count <= length)
425 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
426 }
427 }
428 fprintf(cfp, "\n};\n");
429 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
430
431 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_S2T[] = {");
432 for (i = 0; i < count; i++) {
433 if (i % 16 == 0) fprintf(cfp, "\n\t");
434 fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
435 }
436 fprintf(cfp, "\n};\n");
437
438 fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_S2T_Count = %ld;\n", sal::static_int_cast< long >(length));
439 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return STC_WordEntry_S2T; }\n");
440 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = STC_WordIndex_S2T_Count; return STC_WordIndex_S2T; }\n");
441 } else {
442 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_S2T() { return NULL; }\n");
443 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_S2T(sal_Int32& count) { count = 0; return NULL; }\n");
444 }
445
446 if (count_T2S > 0) {
447 qsort(&STC_WordEntry_T2S[0], count_T2S, sizeof(Index), Index_comp);
448
449 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordEntry_T2S[] = {");
450 count = 0;
451 length = 0;
452 for (i = 0; i < count_T2S; i++) {
453 if (i % 32 == 0) fprintf(cfp, "\n\t");
454 fprintf(cfp, "0x%04x, ", STC_WordEntry_T2S[i].address);
455 if (STC_WordEntry_T2S[i].len != length) {
456 length = STC_WordEntry_T2S[i].len;
457 while (count <= length)
458 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
459 }
460 }
461 STC_WordIndex[count++] = sal::static_int_cast<sal_uInt16>(i);
462 fprintf(cfp, "\n};\n");
463
464 fprintf(cfp, "\nstatic const sal_uInt16 STC_WordIndex_T2S[] = {");
465 for (i = 0; i < count; i++) {
466 if (i % 16 == 0) fprintf(cfp, "\n\t");
467 fprintf(cfp, "0x%04x, ", STC_WordIndex[i]);
468 }
469 fprintf(cfp, "\n};\n");
470
471 fprintf(cfp, "\nstatic sal_Int32 STC_WordIndex_T2S_Count = %ld;\n\n", sal::static_int_cast< long >(length));
472 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return STC_WordEntry_T2S; }\n");
473 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = STC_WordIndex_T2S_Count; return STC_WordIndex_T2S; }\n");
474 } else {
475 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordEntry_T2S() { return NULL; }\n");
476 fprintf (cfp, "\tconst sal_uInt16* getSTC_WordIndex_T2S(sal_Int32& count) { count = 0; return NULL; }\n");
477 }
478 }
479
480