1#!/usr/bin/perl
2#**************************************************************
3#
4#  Licensed to the Apache Software Foundation (ASF) under one
5#  or more contributor license agreements.  See the NOTICE file
6#  distributed with this work for additional information
7#  regarding copyright ownership.  The ASF licenses this file
8#  to you under the Apache License, Version 2.0 (the
9#  "License"); you may not use this file except in compliance
10#  with the License.  You may obtain a copy of the License at
11#
12#    http://www.apache.org/licenses/LICENSE-2.0
13#
14#  Unless required by applicable law or agreed to in writing,
15#  software distributed under the License is distributed on an
16#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17#  KIND, either express or implied.  See the License for the
18#  specific language governing permissions and limitations
19#  under the License.
20#
21#**************************************************************
22
23
24
25# The following files must be available in a ./input subdir:
26
27# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
28
29# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
30#  "Unicode version: 1.1    Table version: 0.0d3    Date: 11 February 1994"
31#  Only used to track Unicode characters that are mapped from both Big5 and
32#  HKSCS.
33
34# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
35#  "Unicode version: 2.0    Table version: 2.01    Date: 1/7/2000"
36#  Only used to track Unicode characters that are mapped from both CP950 and
37#  HKSCS.
38
39$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
40
41$id = "Big5Hkscs2001";
42
43sub isValidUtf32
44{
45    my $utf32 = $_[0];
46    return $utf32 >= 0 && $utf32 <= 0x10FFFF
47           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
48           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
49           && ($utf32 & 0xFFFF) < 0xFFFE;
50}
51
52sub printUtf32
53{
54    my $utf32 = $_[0];
55    return sprintf("U+%04X", $utf32);
56}
57
58sub isValidBig5
59{
60    my $big5 = $_[0];
61    my $big5_row = $big5 >> 8;
62    my $big5_column = $big5 & 0xFF;
63    return $big5_row >= 0x81 && $big5_row <= 0xFE
64           && ($big5_column >= 0x40 && $big5_column <= 0x7E
65               || $big5_column >= 0xA1 && $big5_column <= 0xFE);
66}
67
68sub printBig5
69{
70    my $big5 = $_[0];
71    return sprintf("%04X", $big5);
72}
73
74sub printStats
75{
76    my $used = $_[0];
77    my $space = $_[1];
78    return sprintf("%d/%d bytes (%.1f%%)",
79                   $used,
80                   $space,
81                   $used * 100 / $space);
82}
83
84sub printSpaces
85{
86    my $column_width = $_[0];
87    my $columns_per_line = $_[1];
88    my $end = $_[2];
89    $output = "";
90    for ($i = int($end / $columns_per_line) * $columns_per_line;
91         $i < $end;
92         ++$i)
93    {
94        for ($j = 0; $j < $column_width; ++$j)
95        {
96            $output = $output . " ";
97        }
98    }
99    return $output;
100}
101
102sub addMapping
103{
104	my $utf32 = $_[0];
105	my $big5 = $_[1];
106	my $comp = $_[2];
107
108	$uni_plane = $utf32 >> 16;
109	$uni_page = ($utf32 >> 8) & 0xFF;
110	$uni_index = $utf32 & 0xFF;
111
112	if (!defined($uni_plane_used[$uni_plane])
113		|| !defined($uni_page_used[$uni_plane][$uni_page])
114		|| !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
115	{
116		$uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
117		$uni_plane_used[$uni_plane] = 1;
118		$uni_page_used[$uni_plane][$uni_page] = 1;
119		if ($comp != -1)
120		{
121			++$compat[$comp];
122		}
123	}
124	else
125	{
126		$big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
127		print "WARNING!  Mapping ", printUtf32($utf32), " to ",
128		      printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
129		      printBig5($big5), "\n";
130	}
131}
132
133# Build mappings to track Unicode characters that are mapped from both Big5/
134# CP950 and HKSCS:
135{
136	$filename = "BIG5.TXT";
137	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
138	while (<IN>)
139	{
140		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
141		{
142			$big5 = oct($1);
143			$utf32 = oct($2);
144			isValidBig5($big5)
145				or die "Bad Big5 char " . printBig5($big5);
146			isValidUtf32($utf32)
147				or die "Bad UTF32 char " . printUtf32($utf32);
148			if ($utf32 != 0xFFFD)
149			{
150				if (defined($underlying_big5[$utf32]))
151				{
152					print "WARNING!  In ", $filename, ", both ",
153					      printBig5($underlying_big5[$utf32]), " and ",
154					      printBig5($big5), " map to ", printUtf32($utf32),
155					      "\n";
156				}
157				else
158				{
159					$underlying_big5[$utf32] = $big5;
160				}
161			}
162		}
163	}
164	close IN;
165
166	$filename = "CP950.TXT";
167	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
168	while (<IN>)
169	{
170		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
171		{
172			$big5 = oct($1);
173			$utf32 = oct($2);
174			isValidBig5($big5)
175				or die "Bad Big5 char " . printBig5($big5);
176			isValidUtf32($utf32)
177				or die "Bad UTF32 char " . printUtf32($utf32);
178			if (defined($underlying_cp950[$utf32]))
179			{
180				print "WARNING!  In ", $filename, ", both ",
181				      printBig5($underlying_cp950[$utf32]), " and ",
182				      printBig5($big5), " map to ", printUtf32($utf32), "\n";
183			}
184			else
185			{
186				$underlying_cp950[$utf32] = $big5;
187			}
188		}
189	}
190	close IN;
191}
192
193# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
194# nonstandard Unicode points, so they are explicitly mentioned here to map
195# to the standard Unicode PUA points.  (In the other direction, the unofficial
196# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
197# since all Unicode characters involved are already covered by the official
198# Big5-HKSCS mappings.)
199$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
200$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
201$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
202$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
203$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
204$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
205
206# The following implements the mapping of Big5-HKSCS compatibility points
207# (GCCS characters unified with other HKSCS characters) to Unicode.  In the
208# other direction, characters from Unicode's PUA will map to these Big5-HKSCS
209# compatibility points.  (See the first list in <http://www.info.gov.hk/
210# digital21/eng/hkscs/download/big5cmp.txt>.)
211$big5_map[0x8E][0x69] = 0x7BB8;
212$big5_map[0x8E][0x6F] = 0x7C06;
213$big5_map[0x8E][0x7E] = 0x7CCE;
214$big5_map[0x8E][0xAB] = 0x7DD2;
215$big5_map[0x8E][0xB4] = 0x7E1D;
216$big5_map[0x8E][0xCD] = 0x8005;
217$big5_map[0x8E][0xD0] = 0x8028;
218$big5_map[0x8F][0x57] = 0x83C1;
219$big5_map[0x8F][0x69] = 0x84A8;
220$big5_map[0x8F][0x6E] = 0x840F;
221$big5_map[0x8F][0xCB] = 0x89A6;
222$big5_map[0x8F][0xCC] = 0x89A9;
223$big5_map[0x8F][0xFE] = 0x8D77;
224$big5_map[0x90][0x6D] = 0x90FD;
225$big5_map[0x90][0x7A] = 0x92B9;
226$big5_map[0x90][0xDC] = 0x975C;
227$big5_map[0x90][0xF1] = 0x97FF;
228$big5_map[0x91][0xBF] = 0x9F16;
229$big5_map[0x92][0x44] = 0x8503;
230$big5_map[0x92][0xAF] = 0x5159;
231$big5_map[0x92][0xB0] = 0x515B;
232$big5_map[0x92][0xB1] = 0x515D;
233$big5_map[0x92][0xB2] = 0x515E;
234$big5_map[0x92][0xC8] = 0x936E;
235$big5_map[0x92][0xD1] = 0x7479;
236$big5_map[0x94][0x47] = 0x6D67;
237$big5_map[0x94][0xCA] = 0x799B;
238$big5_map[0x95][0xD9] = 0x9097;
239$big5_map[0x96][0x44] = 0x975D;
240$big5_map[0x96][0xED] = 0x701E;
241$big5_map[0x96][0xFC] = 0x5B28;
242$big5_map[0x9B][0x76] = 0x7201;
243$big5_map[0x9B][0x78] = 0x77D7;
244$big5_map[0x9B][0x7B] = 0x7E87;
245$big5_map[0x9B][0xC6] = 0x99D6;
246$big5_map[0x9B][0xDE] = 0x91D4;
247$big5_map[0x9B][0xEC] = 0x60DE;
248$big5_map[0x9B][0xF6] = 0x6FB6;
249$big5_map[0x9C][0x42] = 0x8F36;
250$big5_map[0x9C][0x53] = 0x4FBB;
251$big5_map[0x9C][0x62] = 0x71DF;
252$big5_map[0x9C][0x68] = 0x9104;
253$big5_map[0x9C][0x6B] = 0x9DF0;
254$big5_map[0x9C][0x77] = 0x83CF;
255$big5_map[0x9C][0xBC] = 0x5C10;
256$big5_map[0x9C][0xBD] = 0x79E3;
257$big5_map[0x9C][0xD0] = 0x5A67;
258$big5_map[0x9D][0x57] = 0x8F0B;
259$big5_map[0x9D][0x5A] = 0x7B51;
260$big5_map[0x9D][0xC4] = 0x62D0;
261$big5_map[0x9E][0xA9] = 0x6062;
262$big5_map[0x9E][0xEF] = 0x75F9;
263$big5_map[0x9E][0xFD] = 0x6C4A;
264$big5_map[0x9F][0x60] = 0x9B2E;
265$big5_map[0x9F][0x66] = 0x9F17;
266$big5_map[0x9F][0xCB] = 0x50ED;
267$big5_map[0x9F][0xD8] = 0x5F0C;
268$big5_map[0xA0][0x63] = 0x880F;
269$big5_map[0xA0][0x77] = 0x62CE;
270$big5_map[0xA0][0xD5] = 0x7468;
271$big5_map[0xA0][0xDF] = 0x7162;
272$big5_map[0xA0][0xE4] = 0x7250;
273$big5_map[0xFA][0x5F] = 0x5029;
274$big5_map[0xFA][0x66] = 0x507D;
275$big5_map[0xFA][0xBD] = 0x5305;
276$big5_map[0xFA][0xC5] = 0x5344;
277$big5_map[0xFA][0xD5] = 0x537F;
278$big5_map[0xFB][0x48] = 0x5605;
279$big5_map[0xFB][0xB8] = 0x5A77;
280$big5_map[0xFB][0xF3] = 0x5E75;
281$big5_map[0xFB][0xF9] = 0x5ED0;
282$big5_map[0xFC][0x4F] = 0x5F58;
283$big5_map[0xFC][0x6C] = 0x60A4;
284$big5_map[0xFC][0xB9] = 0x6490;
285$big5_map[0xFC][0xE2] = 0x6674;
286$big5_map[0xFC][0xF1] = 0x675E;
287$big5_map[0xFD][0xB7] = 0x6C9C;
288$big5_map[0xFD][0xB8] = 0x6E1D;
289$big5_map[0xFD][0xBB] = 0x6E2F;
290$big5_map[0xFD][0xF1] = 0x716E;
291$big5_map[0xFE][0x52] = 0x732A;
292$big5_map[0xFE][0x6F] = 0x745C;
293$big5_map[0xFE][0xAA] = 0x74E9;
294$big5_map[0xFE][0xDD] = 0x7809;
295
296$pua = 0;
297$compat[0] = 0; # 1993
298$compat[1] = 0; # 2000
299$compat[2] = 0; # 2001
300
301$filename = "big5-iso.txt";
302open IN, ("input/" . $filename) or die "Cannot read " . $filename;
303while (<IN>)
304{
305    if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
306    {
307        $big5 = oct("0x" . $1);
308		$utf32_1993 = oct("0x" . $2);
309        $utf32_2000 = oct("0x" . $3);
310        $utf32_2001 = oct("0x" . $4);
311        isValidBig5($big5)
312            or die "Bad Big5 char " . printBig5($big5);
313        isValidUtf32($utf32_1993)
314            or die "Bad UTF32 char " . printUtf32($utf32_1993);
315        isValidUtf32($utf32_2000)
316            or die "Bad UTF32 char " . printUtf32($utf32_2000);
317        isValidUtf32($utf32_2001)
318            or die "Bad UTF32 char " . printUtf32($utf32_2001);
319
320		$utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
321
322		if (defined($underlying_big5[$utf32])
323			|| defined($underlying_cp950[$utf32]))
324		{
325			if (defined($underlying_big5[$utf32])
326				&& defined($underlying_cp950[$utf32])
327				&& $underlying_big5[$utf32] == $underlying_cp950[$utf32]
328				&& $underlying_big5[$utf32] == $big5
329				||
330				defined($underlying_big5[$utf32])
331				&& !defined($underlying_cp950[$utf32])
332				&& $underlying_big5[$utf32] == $big5
333				||
334				!defined($underlying_big5[$utf32])
335				&& defined($underlying_cp950[$utf32])
336				&& $underlying_cp950[$utf32] == $big5)
337			{
338				# ignore
339
340				# Depending on real underlying mapping (cf.
341				# ../convertbig5hkscs.tab), it would be possible to save some
342				# table space by dropping those HKSCS code points that are
343				# already covered by the underlying mapping.
344			}
345			else
346			{
347				print "XXX mapping underlying";
348				if (defined($underlying_big5[$utf32])
349					&& defined($underlying_cp950[$utf32])
350					&& $underlying_big5[$utf32] == $underlying_cp950[$utf32])
351				{
352					print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
353				}
354				else
355				{
356					if (defined($underlying_big5[$utf32]))
357					{
358						print " Big5 ", printBig5($underlying_big5[$utf32]);
359					}
360					if (defined($underlying_cp950[$utf32]))
361					{
362						print " CP950 ", printBig5($underlying_cp950[$utf32]);
363					}
364				}
365				print " and HKSCS ", printBig5($big5), " to ",
366				      printUtf32($utf32), "\n";
367			}
368		}
369
370        if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
371        {
372            ++$pua;
373        }
374
375        $big5_row = $big5 >> 8;
376        $big5_column = $big5 & 0xFF;
377        if (defined($big5_map[$big5_row][$big5_column]))
378        {
379            die "Bad Big5 mapping " . printBig5($big5);
380        }
381        $big5_map[$big5_row][$big5_column] = $utf32;
382
383		addMapping($utf32, $big5, -1);
384
385		if ($utf32_2001 != $utf32)
386		{
387			addMapping($utf32_2001, $big5, 2);
388		}
389		if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
390		{
391			addMapping($utf32_2000, $big5, 1);
392		}
393		if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
394			&& $utf32_1993 != $utf32_2001)
395		{
396			addMapping($utf32_1993, $big5, 0);
397		}
398    }
399}
400close IN;
401
402print $pua, " mappings to PUA\n";
403print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
404print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
405print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
406
407if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
408{
409    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
410    {
411        if (defined($uni_map[0][0][$uni_index]))
412        {
413            $big5 = $uni_map[0][0][$utf32];
414            die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
415        }
416    }
417}
418
419$filename = lc($id) . ".tab";
420open OUT, ("> " . $filename) or die "Cannot write " . $filename;
421
422{
423    $filename = lc($id). ".pl";
424    open IN, $filename or die "Cannot read ". $filename;
425    $first = 1;
426    while (<IN>)
427    {
428        if (/^\#!.*$/)
429        {
430        }
431        elsif (/^\#(\*.*)$/)
432        {
433            if ($first == 1)
434            {
435                print OUT "/", $1, "\n";
436                $first = 0;
437            }
438            else
439            {
440                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
441            }
442        }
443        elsif (/^\# (.*)$/)
444        {
445            print OUT " *", $1, "\n";
446        }
447        elsif (/^\#(.*)$/)
448        {
449            print OUT " *", $1, "\n";
450        }
451        else
452        {
453            goto done;
454        }
455    }
456  done:
457}
458
459print OUT "\n",
460          "#ifndef _SAL_TYPES_H_\n",
461          "#include \"sal/types.h\"\n",
462          "#endif\n",
463          "\n";
464
465print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
466$big5_data_index = 0;
467$big5_rows = 0;
468$big5_chars = 0;
469for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
470{
471    $big5_row_first = -1;
472    for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
473    {
474        if (defined($big5_map[$big5_row][$big5_column]))
475        {
476            if ($big5_row_first == -1)
477            {
478                $big5_row_first = $big5_column;
479            }
480            $big5_row_last = $big5_column;
481        }
482    }
483    if ($big5_row_first != -1)
484    {
485        $big5_data_offsets[$big5_row] = $big5_data_index;
486        ++$big5_rows;
487        print OUT " /* row ", $big5_row, " */\n";
488
489        $big5_row_surrogates_first = -1;
490        $big5_row_chars = 0;
491        $big5_row_surrogates = 0;
492
493        print OUT "  ", $big5_row_first, " | (", $big5_row_last,
494                  " << 8), /* first, last */\n";
495        ++$big5_data_index;
496
497        print OUT "  ", printSpaces(7, 10, $big5_row_first);
498        $bol = 0;
499        for ($big5_column = $big5_row_first;
500             $big5_column <= $big5_row_last;
501             ++$big5_column)
502        {
503            if ($bol == 1)
504            {
505                print OUT "  ";
506                $bol = 0;
507            }
508            if (defined($big5_map[$big5_row][$big5_column]))
509            {
510                $utf32 = $big5_map[$big5_row][$big5_column];
511                ++$big5_row_chars;
512                if ($utf32 <= 0xFFFF)
513                {
514                    printf OUT "0x%04X,", $utf32;
515                }
516                else
517                {
518                    ++$big5_row_surrogates;
519                    printf OUT "0x%04X,",
520                               (0xD800 | (($utf32 - 0x10000) >> 10));
521                    if ($big5_row_surrogates_first == -1)
522                    {
523                        $big5_row_surrogates_first = $big5_column;
524                    }
525                    $big5_row_surrogates_last = $big5_column;
526                }
527            }
528            else
529            {
530                printf OUT "0xffff,";
531            }
532            ++$big5_data_index;
533            if ($big5_column % 10 == 9)
534            {
535                print OUT "\n";
536                $bol = 1;
537            }
538        }
539        if ($bol == 0)
540        {
541            print OUT "\n";
542        }
543
544        if ($big5_row_surrogates_first != -1)
545        {
546            print OUT "  ", $big5_row_surrogates_first,
547                      ", /* first low-surrogate */\n";
548            ++$big5_data_index;
549
550            print OUT "  ", printSpaces(7, 10, $big5_row_surrogates_first);
551            $bol = 0;
552            for ($big5_column = $big5_row_surrogates_first;
553                 $big5_column <= $big5_row_surrogates_last;
554                 ++$big5_column)
555            {
556                if ($bol == 1)
557                {
558                    print OUT "  ";
559                    $bol = 0;
560                }
561                $utf32 = 0;
562                if (defined($big5_map[$big5_row][$big5_column]))
563                {
564                    $utf32 = $big5_map[$big5_row][$big5_column];
565                }
566                if ($utf32 <= 0xFFFF)
567                {
568                    printf OUT "     0,";
569                }
570                else
571                {
572                    printf OUT "0x%04X,",
573                               (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
574                }
575                ++$big5_data_index;
576                if ($big5_column % 10 == 9)
577                {
578                    print OUT "\n";
579                    $bol = 1;
580                }
581            }
582            if ($bol == 0)
583            {
584                print OUT "\n";
585            }
586        }
587
588        $big5_chars += $big5_row_chars;
589        $big5_data_space[$big5_row]
590            = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
591        $big5_data_used[$big5_row]
592            = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
593                                          0 : 1 + $big5_row_surrogates))
594                  * 2;
595    }
596    else
597    {
598        print OUT " /* row ", $big5_row, ": --- */\n";
599        $big5_data_offsets[$big5_row] = -1;
600    }
601}
602print OUT "};\n\n";
603print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
604
605print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
606$big5_rowoffsets_used = 0;
607for ($big5_row = 0; $big5_row <= 255; ++$big5_row)
608{
609    if ($big5_data_offsets[$big5_row] == -1)
610    {
611        print OUT "  -1, /* row ", $big5_row, " */\n";
612    }
613    else
614    {
615        print OUT "  ",
616                  $big5_data_offsets[$big5_row],
617                  ", /* row ",
618                  $big5_row,
619                  "; ",
620                  printStats($big5_data_used[$big5_row],
621                             $big5_data_space[$big5_row]),
622                  " */\n";
623        $big5_rowoffsets_used += 4;
624    }
625}
626print OUT "};\n\n";
627
628print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
629$uni_data_index = 0;
630for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
631{
632    if (defined($uni_plane_used[$uni_plane]))
633    {
634        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
635        {
636            if (defined($uni_page_used[$uni_plane][$uni_page]))
637            {
638                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
639                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
640                          " */\n";
641
642                $uni_page_first = -1;
643                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
644                {
645                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
646                    {
647                        if ($uni_page_first == -1)
648                        {
649                            $uni_page_first = $uni_index;
650                        }
651                        $uni_page_last = $uni_index;
652                    }
653                }
654
655                $uni_data_used[$uni_plane][$uni_page] = 0;
656
657                print OUT "  ", $uni_page_first, " | (", $uni_page_last,
658                          " << 8), /* first, last */\n";
659                ++$uni_data_index;
660                $uni_data_used[$uni_plane][$uni_page] += 2;
661
662                print OUT "  ", printSpaces(7, 10, $uni_page_first);
663                $bol = 0;
664                for ($uni_index = $uni_page_first;
665                     $uni_index <= $uni_page_last;
666                     ++$uni_index)
667                {
668                    if ($bol == 1)
669                    {
670                        print OUT "  ";
671                        $bol = 0;
672                    }
673                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
674                    {
675                        $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
676                        printf OUT "0x%04X,", $big5;
677                        $uni_data_used[$uni_plane][$uni_page] += 2;
678                    }
679                    else
680                    {
681                        print OUT "     0,";
682                    }
683                    ++$uni_data_index;
684                    if ($uni_index % 10 == 9)
685                    {
686                        print OUT "\n";
687                        $bol = 1;
688                    }
689                }
690                if ($bol == 0)
691                {
692                    print OUT "\n";
693                }
694
695                $uni_data_space[$uni_plane][$uni_page]
696                    = ($uni_data_index
697                       - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
698            }
699            else
700            {
701                $uni_data_offsets[$uni_plane][$uni_page] = -1;
702                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
703                          ": --- */\n";
704            }
705        }
706    }
707    else
708    {
709        print OUT " /* plane ", $uni_plane, ": --- */\n";
710    }
711}
712print OUT "};\n\n";
713
714print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
715for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
716{
717    if (defined($uni_plane_used[$uni_plane]))
718    {
719        $uni_pageoffsets_used[$uni_plane] = 0;
720        $uni_data_used_sum[$uni_plane] = 0;
721        $uni_data_space_sum[$uni_plane] = 0;
722        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
723        {
724            $offset = $uni_data_offsets[$uni_plane][$uni_page];
725            if ($offset == -1)
726            {
727                print OUT "  -1, /* plane ",
728                          $uni_plane,
729                          ", page ",
730                          $uni_page,
731                          " */\n";
732            }
733            else
734            {
735                print OUT "  ",
736                          $offset,
737                          ", /* plane ",
738                          $uni_plane,
739                          ", page ",
740                          $uni_page,
741                          "; ",
742                          printStats($uni_data_used[$uni_plane][$uni_page],
743                                     $uni_data_space[$uni_plane][$uni_page]),
744                          " */\n";
745                $uni_pageoffsets_used[$uni_plane] += 4;
746                $uni_data_used_sum[$uni_plane]
747                    += $uni_data_used[$uni_plane][$uni_page];
748                $uni_data_space_sum[$uni_plane]
749                    += $uni_data_space[$uni_plane][$uni_page];
750            }
751        }
752    }
753    else
754    {
755        print OUT "  /* plane ", $uni_plane, ": --- */\n";
756    }
757}
758print OUT "};\n\n";
759
760print OUT "static sal_Int32 const aImplUnicodeTo",
761          $id,
762          "PlaneOffsets[] = {\n";
763$uni_page_offset = 0;
764$uni_planeoffsets_used = 0;
765$uni_pageoffsets_used_sum = 0;
766$uni_pageoffsets_space_sum = 0;
767$uni_data_used_sum2 = 0;
768$uni_data_space_sum2 = 0;
769for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
770{
771    if (defined ($uni_plane_used[$uni_plane]))
772    {
773        print OUT "  ",
774                  $uni_page_offset++,
775                  " * 256, /* plane ",
776                  $uni_plane,
777                  "; ",
778                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
779                  ", ",
780                  printStats($uni_data_used_sum[$uni_plane],
781                             $uni_data_space_sum[$uni_plane]),
782                  " */\n";
783        $uni_planeoffsets_used += 4;
784        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
785        $uni_pageoffsets_space_sum += 256 * 4;
786        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
787        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
788    }
789    else
790    {
791        print OUT "  -1, /* plane ", $uni_plane, " */\n";
792    }
793}
794print OUT " /* ",
795          printStats($uni_planeoffsets_used, 17 * 4),
796          ", ",
797          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
798          ", ",
799          printStats($uni_data_used_sum2, $uni_data_space_sum2),
800          " */\n};\n";
801
802close OUT;
803