1#!/usr/bin/perl
2#*************************************************************************
3#
4# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5#
6# Copyright 2000, 2010 Oracle and/or its affiliates.
7#
8# OpenOffice.org - a multi-platform office productivity suite
9#
10# This file is part of OpenOffice.org.
11#
12# OpenOffice.org is free software: you can redistribute it and/or modify
13# it under the terms of the GNU Lesser General Public License version 3
14# only, as published by the Free Software Foundation.
15#
16# OpenOffice.org is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU Lesser General Public License version 3 for more details
20# (a copy is included in the LICENSE file that accompanied this code).
21#
22# You should have received a copy of the GNU Lesser General Public License
23# version 3 along with OpenOffice.org.  If not, see
24# <http://www.openoffice.org/license.html>
25# for a copy of the LGPLv3 License.
26#
27#*************************************************************************
28
29# The following files must be available in a ./input subdir:
30
31# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>:
32#  "Unicode version: 3.1.1    Table version: 1.1    Date: 28 June 2001"
33#  contains descriptions for:
34#   U+3400..4DFF CJK Unified Ideographs Extension A
35#   U+4E00..9FFF CJK Unified Ideographs
36#   U+F900..FAFF CJK Compatibility Ideographs
37#   U+20000..2F7FF CJK Unified Ideographs Extension B
38#   U+2F800..2FFFF CJK Compatibility Ideographs Supplement
39
40# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>:
41#  "Unicode version: 1.1    Table version: 0.0d1    Date: 21 October 1994"
42#  contains mappings for CNS 11643-1986
43
44# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>:
45#  "Unicode version: 1.1    Table version: 0.49    Date: 26 March 1998"
46#  contains mappings for CNS 11643-1992 that are incompatible with
47#   CNS11643.TXT
48
49$id = "Cns116431992";
50
51sub isValidUtf32
52{
53    my $utf32 = $_[0];
54    return $utf32 >= 0 && $utf32 <= 0x10FFFF
55           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
56           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
57           && ($utf32 & 0xFFFF) < 0xFFFE;
58}
59
60sub printUtf32
61{
62    my $utf32 = $_[0];
63    return sprintf("U+%04X", $utf32);
64}
65
66sub isValidCns116431992
67{
68    my $plane = $_[0];
69    my $row = $_[1];
70    my $column = $_[2];
71    return $plane >= 1 && $plane <= 16
72           && $row >= 1 && $row <= 94
73           && $column >= 1 && $column <= 94;
74}
75
76sub printCns116431992
77{
78    my $plane = $_[0];
79    my $row = $_[1];
80    my $column = $_[2];
81    return sprintf("%d-%02d/%02d", $plane, $row, $column);
82}
83
84sub printStats
85{
86    my $used = $_[0];
87    my $space = $_[1];
88    return sprintf("%d/%d bytes (%.1f%%)",
89                   $used,
90                   $space,
91                   $used * 100 / $space);
92}
93
94sub printSpaces
95{
96    my $column_width = $_[0];
97    my $columns_per_line = $_[1];
98    my $end = $_[2];
99    $output = "";
100    for ($i = int($end / $columns_per_line) * $columns_per_line;
101         $i < $end;
102         ++$i)
103    {
104        for ($j = 0; $j < $column_width; ++$j)
105        {
106            $output = $output . " ";
107        }
108    }
109    return $output;
110}
111
112$count_Unihan_txt = 0;
113$count_CNS11643_TXT = 0;
114$count_Uni2CNS = 0;
115
116if (1)
117{
118    $filename = "Unihan.txt";
119    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
120    while (<IN>)
121    {
122        if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
123        {
124            $utf32 = oct("0x" . $1);
125            $cns_plane = oct("0x" . $2);
126            $cns_row = oct("0x" . $3) - 0x20;
127            $cns_column = oct("0x" . $4) - 0x20;
128            isValidUtf32($utf32)
129                or die "Bad UTF32 char U+" . printUtf32($utf32);
130            isValidCns116431992($cns_plane, $cns_row, $cns_column)
131                or die "Bad CNS11643-1992 char "
132                           . printCns116431992($cns_plane,
133                                               $cns_row,
134                                               $cns_column);
135            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
136            {
137                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
138                $cns_plane_used[$cns_plane] = 1;
139                ++$count_Unihan_txt;
140            }
141            else
142            {
143                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
144                    or die "Mapping "
145                               . printCns116431992($cns_plane,
146                                                   $cns_row,
147                                                   $cns_column)
148                               . " to "
149                               . printUtf32($cns_map[$cns_plane]
150                                                    [$cns_row]
151                                                    [$cns_column])
152                               . ", NOT "
153                               . printUtf32($utf32);
154            }
155        }
156        elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/)
157        {
158            $utf32 = oct("0x" . $1);
159            $cns_plane = oct("0x" . $2);
160            $cns_row = oct("0x" . $3) - 0x20;
161            $cns_column = oct("0x" . $4) - 0x20;
162            isValidUtf32($utf32)
163                or die "Bad UTF32 char U+" . printUtf32($utf32);
164            isValidCns116431992($cns_plane, $cns_row, $cns_column)
165                or die "Bad CNS11643-1992 char "
166                           . printCns116431992($cns_plane,
167                                               $cns_row,
168                                               $cns_column);
169            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
170            {
171                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
172                $cns_plane_used[$cns_plane] = 1;
173                ++$count_Unihan_txt;
174            }
175            else
176            {
177                ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
178                    or print "WARNING!  Mapping ",
179                             printCns116431992($cns_plane,
180                                               $cns_row,
181                                               $cns_column),
182                             " to ",
183                             printUtf32($cns_map[$cns_plane]
184                                                [$cns_row]
185                                                [$cns_column]),
186                             ", NOT ",
187                             printUtf32($utf32),
188                             "\n";
189            }
190        }
191        elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/)
192        {
193            die "Bad format";
194        }
195    }
196    close IN;
197}
198
199if (1)
200{
201    $filename = "CNS11643.TXT";
202    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
203    while (<IN>)
204    {
205        if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/)
206        {
207            $utf32 = oct("0x" . $4);
208            $cns_plane = oct("0x" . $1);
209            $cns_row = oct("0x" . $2) - 0x20;
210            $cns_column = oct("0x" . $3) - 0x20;
211            isValidUtf32($utf32)
212                or die "Bad UTF32 char U+" . printUtf32($utf32);
213            isValidCns116431992($cns_plane, $cns_row, $cns_column)
214                or die "Bad CNS11643-1992 char "
215                           . printCns116431992($cns_plane,
216                                               $cns_row,
217                                               $cns_column);
218            if ($cns_plane <= 2)
219            {
220                if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
221                {
222                    $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
223                    $cns_plane_used[$cns_plane] = 1;
224                    ++$count_CNS11643_TXT;
225                }
226                else
227                {
228                    ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
229                        or die "Mapping "
230                                   . printCns116431992($cns_plane,
231                                                       $cns_row,
232                                                       $cns_column)
233                                   . " to "
234                                   . printUtf32($cns_map[$cns_plane]
235                                                        [$cns_row]
236                                                        [$cns_column])
237                                   . ", NOT "
238                                   . printUtf32($utf32);
239                }
240            }
241        }
242    }
243    close IN;
244}
245
246if (0)
247{
248    $filename = "Uni2CNS";
249    open IN, ("input/" . $filename) or die "Cannot read " . $filename;
250    while (<IN>)
251    {
252        if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/)
253        {
254            $utf32 = oct("0x" . $1);
255            $cns_plane = oct("0x" . $2);
256            $cns_row = oct("0x" . $3) - 0x20;
257            $cns_column = oct("0x" . $4) - 0x20;
258            isValidUtf32($utf32)
259                or die "Bad UTF32 char U+" . printUtf32($utf32);
260            isValidCns116431992($cns_plane, $cns_row, $cns_column)
261                or die "Bad CNS11643-1992 char "
262                           . printCns116431992($cns_plane,
263                                               $cns_row,
264                                               $cns_column);
265            if (!defined($cns_map[$cns_plane][$cns_row][$cns_column]))
266            {
267                $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32;
268                $cns_plane_used[$cns_plane] = 1;
269                ++$count_Uni2CNS;
270            }
271            else
272            {
273#               ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32)
274#                   or die "Mapping "
275#                              . printCns116431992($cns_plane,
276#                                                  $cns_row,
277#                                                  $cns_column)
278#                              . " to "
279#                              . printUtf32($cns_map[$cns_plane]
280#                                                   [$cns_row]
281#                                                   [$cns_column])
282#                              . ", NOT "
283#                              . printUtf32($utf32);
284            }
285            if ($cns_plane == 1)
286            {
287                print printCns116431992($cns_plane, $cns_row, $cns_column),
288                      "\n";
289            }
290        }
291    }
292    close IN;
293}
294
295for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
296{
297    if (defined($cns_plane_used[$cns_plane]))
298    {
299        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
300        {
301            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
302            {
303                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
304                {
305                    $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
306                    $uni_plane = $utf32 >> 16;
307                    $uni_page = ($utf32 >> 8) & 0xFF;
308                    $uni_index = $utf32 & 0xFF;
309                    if (!defined($uni_plane_used[$uni_plane])
310                        || !defined($uni_page_used[$uni_plane][$uni_page])
311                        || !defined($uni_map[$uni_plane]
312                                            [$uni_page]
313                                            [$uni_index]))
314                    {
315                        $uni_map[$uni_plane][$uni_page][$uni_index]
316                            = ($cns_plane << 16)
317                                  | ($cns_row << 8)
318                                  | $cns_column;
319                        $uni_plane_used[$uni_plane] = 1;
320                        $uni_page_used[$uni_plane][$uni_page] = 1;
321                    }
322                    else
323                    {
324                        $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index];
325                        $cns1_plane = $cns1 >> 16;
326                        $cns1_row = ($cns1 >> 8) & 0xFF;
327                        $cns1_column = $cns1 & 0xFF;
328
329                        # Do not map from Unicode to Fictious Character Set
330                        # Extensions (Lunde, p. 131), if possible:
331                        if ($cns_plane == 3
332                            && ($cns_row == 66 && $cns_column > 38
333                                || $cns_row > 66))
334                        {
335                            print " (",
336                                  printUtf32($utf32),
337                                  " to fictious ",
338                                  printCns116431992($cns_plane,
339                                                    $cns_row,
340                                                    $cns_column),
341                                  " ignored, favouring ",
342                                  printCns116431992($cns1_plane,
343                                                    $cns1_row,
344                                                    $cns1_column),
345                                  ")\n";
346                        }
347                        elsif ($cns1_plane == 3
348                               && ($cns1_row == 66 && $cns1_column > 38
349                                   || $cns1_row > 66))
350                        {
351                            $uni_map[$uni_plane][$uni_page][$uni_index]
352                                = ($cns_plane << 16)
353                                       | ($cns_row << 8)
354                                       | $cns_column;
355                            print " (",
356                                  printUtf32($utf32),
357                                  " to fictious ",
358                                  printCns116431992($cns1_plane,
359                                                    $cns1_row,
360                                                    $cns1_column),
361                                  " ignored, favouring ",
362                                  printCns116431992($cns_plane,
363                                                    $cns_row,
364                                                    $cns_column),
365                                  ")\n";
366                        }
367                        else
368                        {
369                            print "WARNING!  Mapping ",
370                                  printUtf32($utf32),
371                                  " to ",
372                                  printCns116431992($cns1_plane,
373                                                    $cns1_row,
374                                                    $cns1_column),
375                                  ", NOT ",
376                                  printCns116431992($cns_plane,
377                                                    $cns_row,
378                                                    $cns_column),
379                                  "\n";
380                        }
381                    }
382                }
383            }
384        }
385    }
386}
387if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
388{
389    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
390    {
391        if (defined($uni_map[0][0][$uni_index]))
392        {
393            $cns = $uni_map[0][0][$utf32];
394            die "Mapping "
395                    . printUtf32($utf32)
396                    . " to "
397                    . printCns116431992($cns >> 16,
398                                        ($cns >> 8) & 0xFF,
399                                        $cns & 0xFF);
400        }
401    }
402}
403
404$filename = lc($id) . ".tab";
405open OUT, ("> " . $filename) or die "Cannot write " . $filename;
406
407{
408    $filename = lc($id). ".pl";
409    open IN, $filename or die "Cannot read ". $filename;
410    $first = 1;
411    while (<IN>)
412    {
413        if (/^\#!.*$/)
414        {
415        }
416        elsif (/^\#(\*.*)$/)
417        {
418            if ($first == 1)
419            {
420                print OUT "/", $1, "\n";
421                $first = 0;
422            }
423            else
424            {
425                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
426            }
427        }
428        elsif (/^\# (.*)$/)
429        {
430            print OUT " *", $1, "\n";
431        }
432        elsif (/^\#(.*)$/)
433        {
434            print OUT " *", $1, "\n";
435        }
436        else
437        {
438            goto done;
439        }
440    }
441  done:
442}
443
444print OUT "\n",
445          "#ifndef _SAL_TYPES_H_\n",
446          "#include \"sal/types.h\"\n",
447          "#endif\n",
448          "\n";
449
450print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
451$cns_data_index = 0;
452for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
453{
454    if (defined($cns_plane_used[$cns_plane]))
455    {
456        $cns_rows = 0;
457        $cns_chars = 0;
458        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
459        {
460            $cns_row_first = -1;
461            for ($cns_column = 1; $cns_column <= 94; ++$cns_column)
462            {
463                if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
464                {
465                    if ($cns_row_first == -1)
466                    {
467                        $cns_row_first = $cns_column;
468                    }
469                    $cns_row_last = $cns_column;
470                }
471            }
472            if ($cns_row_first != -1)
473            {
474                $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index;
475                ++$cns_rows;
476                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
477                          " */\n";
478
479                $cns_row_surrogates_first = -1;
480                $cns_row_chars = 0;
481                $cns_row_surrogates = 0;
482
483                print OUT "  ", $cns_row_first, " | (", $cns_row_last,
484                          " << 8), /* first, last */\n";
485                ++$cns_data_index;
486
487                print OUT "  ", printSpaces(7, 10, $cns_row_first);
488                $bol = 0;
489                for ($cns_column = $cns_row_first;
490                     $cns_column <= $cns_row_last;
491                     ++$cns_column)
492                {
493                    if ($bol == 1)
494                    {
495                        print OUT "  ";
496                        $bol = 0;
497                    }
498                    if (defined($cns_map[$cns_plane][$cns_row][$cns_column]))
499                    {
500                        $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column];
501                        ++$cns_row_chars;
502                        if ($utf32 <= 0xFFFF)
503                        {
504                            printf OUT "0x%04X,", $utf32;
505                        }
506                        else
507                        {
508                            ++$cns_row_surrogates;
509                            printf OUT "0x%04X,",
510                                       (0xD800 | (($utf32 - 0x10000) >> 10));
511                            if ($cns_row_surrogates_first == -1)
512                            {
513                                $cns_row_surrogates_first = $cns_column;
514                            }
515                            $cns_row_surrogates_last = $cns_column;
516                        }
517                    }
518                    else
519                    {
520                        printf OUT "0xffff,";
521                    }
522                    ++$cns_data_index;
523                    if ($cns_column % 10 == 9)
524                    {
525                        print OUT "\n";
526                        $bol = 1;
527                    }
528                }
529                if ($bol == 0)
530                {
531                    print OUT "\n";
532                }
533
534                if ($cns_row_surrogates_first != -1)
535                {
536                    print OUT "  ", $cns_row_surrogates_first,
537                              ", /* first low-surrogate */\n";
538                    ++$cns_data_index;
539
540                    print OUT "  ",
541                              printSpaces(7, 10, $cns_row_surrogates_first);
542                    $bol = 0;
543                    for ($cns_column = $cns_row_surrogates_first;
544                         $cns_column <= $cns_row_surrogates_last;
545                         ++$cns_column)
546                    {
547                        if ($bol == 1)
548                        {
549                            print OUT "  ";
550                            $bol = 0;
551                        }
552                        $utf32 = 0;
553                        if (defined($cns_map[$cns_plane]
554                                            [$cns_row]
555                                            [$cns_column]))
556                        {
557                            $utf32
558                                = $cns_map[$cns_plane][$cns_row][$cns_column];
559                        }
560                        if ($utf32 <= 0xFFFF)
561                        {
562                            printf OUT "     0,";
563                        }
564                        else
565                        {
566                            printf OUT "0x%04X,",
567                                       (0xDC00
568                                            | (($utf32 - 0x10000) & 0x3FF));
569                        }
570                        ++$cns_data_index;
571                        if ($cns_column % 10 == 9)
572                        {
573                            print OUT "\n";
574                            $bol = 1;
575                        }
576                    }
577                    if ($bol == 0)
578                    {
579                        print OUT "\n";
580                    }
581                }
582
583                $cns_chars += $cns_row_chars;
584                $cns_data_space[$cns_plane][$cns_row]
585                    = ($cns_data_index
586                           - $cns_data_offsets[$cns_plane][$cns_row]) * 2;
587                $cns_data_used[$cns_plane][$cns_row]
588                    = (1 + $cns_row_chars
589                           + ($cns_row_surrogates == 0 ?
590                                  0 : 1 + $cns_row_surrogates)) * 2;
591            }
592            else
593            {
594                print OUT " /* plane ", $cns_plane, ", row ", $cns_row,
595                          ": --- */\n";
596                $cns_data_offsets[$cns_plane][$cns_row] = -1;
597            }
598        }
599        print "cns plane ",
600              $cns_plane,
601              ": rows = ",
602              $cns_rows,
603              ", chars = ",
604              $cns_chars,
605              "\n";
606    }
607}
608print OUT "};\n\n";
609
610print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
611for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
612{
613    if (defined ($cns_plane_used[$cns_plane]))
614    {
615        $cns_rowoffsets_used[$cns_plane] = 0;
616        for ($cns_row = 1; $cns_row <= 94; ++$cns_row)
617        {
618            if ($cns_data_offsets[$cns_plane][$cns_row] == -1)
619            {
620                print OUT "  -1, /* plane ",
621                          $cns_plane,
622                          ", row ",
623                          $cns_row,
624                          " */\n";
625            }
626            else
627            {
628                print OUT "  ",
629                          $cns_data_offsets[$cns_plane][$cns_row],
630                          ", /* plane ",
631                          $cns_plane,
632                          ", row ",
633                          $cns_row,
634                          "; ",
635                          printStats($cns_data_used[$cns_plane][$cns_row],
636                                     $cns_data_space[$cns_plane][$cns_row]),
637                          " */\n";
638                $cns_rowoffsets_used[$cns_plane] += 4;
639            }
640        }
641    }
642    else
643    {
644        print OUT "  /* plane ", $cns_plane, ": --- */\n";
645    }
646}
647print OUT "};\n\n";
648
649print OUT "static sal_Int32 const aImpl",
650          $id,
651          "ToUnicodePlaneOffsets[] = {\n";
652$cns_row_offset = 0;
653for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane)
654{
655    if (defined ($cns_plane_used[$cns_plane]))
656    {
657        print OUT "  ",
658                  $cns_row_offset++,
659                  " * 94, /* plane ",
660                  $cns_plane,
661                  "; ",
662                  printStats($cns_rowoffsets_used[$cns_plane], 94 * 4),
663                  " */\n";
664    }
665    else
666    {
667        print OUT "  -1, /* plane ", $cns_plane, " */\n";
668    }
669}
670print OUT "};\n\n";
671
672print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n";
673$uni_data_index = 0;
674for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
675{
676    if (defined($uni_plane_used[$uni_plane]))
677    {
678        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
679        {
680            if (defined($uni_page_used[$uni_plane][$uni_page]))
681            {
682                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
683                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
684                          " */\n";
685
686                $uni_page_first = -1;
687                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
688                {
689                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
690                    {
691                        if ($uni_page_first == -1)
692                        {
693                            $uni_page_first = $uni_index;
694                        }
695                        $uni_page_last = $uni_index;
696                    }
697                }
698
699                $uni_data_used[$uni_plane][$uni_page] = 0;
700
701                print OUT "  ", $uni_page_first, ", ", $uni_page_last,
702                          ", /* first, last */\n";
703                $uni_data_index += 2;
704                $uni_data_used[$uni_plane][$uni_page] += 2;
705
706                print OUT "  ", printSpaces(9, 8, $uni_page_first);
707                $bol = 0;
708                for ($uni_index = $uni_page_first;
709                     $uni_index <= $uni_page_last;
710                     ++$uni_index)
711                {
712                    if ($bol == 1)
713                    {
714                        print OUT "  ";
715                        $bol = 0;
716                    }
717                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
718                    {
719                        $cns = $uni_map[$uni_plane][$uni_page][$uni_index];
720                        printf OUT "%2d,%2d,%2d,",
721                                   $cns >> 16,
722                                   $cns >> 8 & 0xFF,
723                                   $cns & 0xFF;
724                        $uni_data_used[$uni_plane][$uni_page] += 3;
725                    }
726                    else
727                    {
728                        print OUT " 0, 0, 0,";
729                    }
730                    $uni_data_index += 3;
731                    if ($uni_index % 8 == 7)
732                    {
733                        print OUT "\n";
734                        $bol = 1;
735                    }
736                }
737                if ($bol == 0)
738                {
739                    print OUT "\n";
740                }
741
742                $uni_data_space[$uni_plane][$uni_page]
743                    = $uni_data_index
744                          - $uni_data_offsets[$uni_plane][$uni_page];
745            }
746            else
747            {
748                $uni_data_offsets[$uni_plane][$uni_page] = -1;
749                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
750                          ": --- */\n";
751            }
752        }
753    }
754    else
755    {
756        print OUT " /* plane ", $uni_plane, ": --- */\n";
757    }
758}
759print OUT "};\n\n";
760
761print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
762for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
763{
764    if (defined($uni_plane_used[$uni_plane]))
765    {
766        $uni_pageoffsets_used[$uni_plane] = 0;
767        $uni_data_used_sum[$uni_plane] = 0;
768        $uni_data_space_sum[$uni_plane] = 0;
769        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
770        {
771            $offset = $uni_data_offsets[$uni_plane][$uni_page];
772            if ($offset == -1)
773            {
774                print OUT "  -1, /* plane ",
775                          $uni_plane,
776                          ", page ",
777                          $uni_page,
778                          " */\n";
779            }
780            else
781            {
782                print OUT "  ",
783                          $offset,
784                          ", /* plane ",
785                          $uni_plane,
786                          ", page ",
787                          $uni_page,
788                          "; ",
789                          printStats($uni_data_used[$uni_plane][$uni_page],
790                                     $uni_data_space[$uni_plane][$uni_page]),
791                          " */\n";
792                $uni_pageoffsets_used[$uni_plane] += 4;
793                $uni_data_used_sum[$uni_plane]
794                    += $uni_data_used[$uni_plane][$uni_page];
795                $uni_data_space_sum[$uni_plane]
796                    += $uni_data_space[$uni_plane][$uni_page];
797            }
798        }
799    }
800    else
801    {
802        print OUT "  /* plane ", $uni_plane, ": --- */\n";
803    }
804}
805print OUT "};\n\n";
806
807print OUT "static sal_Int32 const aImplUnicodeTo",
808          $id,
809          "PlaneOffsets[] = {\n";
810$uni_page_offset = 0;
811$uni_planeoffsets_used = 0;
812$uni_pageoffsets_used_sum = 0;
813$uni_pageoffsets_space_sum = 0;
814$uni_data_used_sum2 = 0;
815$uni_data_space_sum2 = 0;
816for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
817{
818    if (defined ($uni_plane_used[$uni_plane]))
819    {
820        print OUT "  ",
821                  $uni_page_offset++,
822                  " * 256, /* plane ",
823                  $uni_plane,
824                  "; ",
825                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
826                  ", ",
827                  printStats($uni_data_used_sum[$uni_plane],
828                             $uni_data_space_sum[$uni_plane]),
829                  " */\n";
830        $uni_planeoffsets_used += 4;
831        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
832        $uni_pageoffsets_space_sum += 256 * 4;
833        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
834        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
835    }
836    else
837    {
838        print OUT "  -1, /* plane ", $uni_plane, " */\n";
839    }
840}
841print OUT " /* ",
842          printStats($uni_planeoffsets_used, 17 * 4),
843          ", ",
844          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
845          ", ",
846          printStats($uni_data_used_sum2, $uni_data_space_sum2),
847          " */\n};\n";
848
849close OUT;
850
851print "Unihan.txt = ", $count_Unihan_txt,
852      ", CNS11643.TXT = ", $count_CNS11643_TXT,
853      ", Uni2CNS = ", $count_Uni2CNS,
854      ", total = ",
855          ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS),
856      "\n";
857