1*b1cdbd2cSJim Jagielski#!/usr/bin/perl
2*b1cdbd2cSJim Jagielski#**************************************************************
3*b1cdbd2cSJim Jagielski#
4*b1cdbd2cSJim Jagielski#  Licensed to the Apache Software Foundation (ASF) under one
5*b1cdbd2cSJim Jagielski#  or more contributor license agreements.  See the NOTICE file
6*b1cdbd2cSJim Jagielski#  distributed with this work for additional information
7*b1cdbd2cSJim Jagielski#  regarding copyright ownership.  The ASF licenses this file
8*b1cdbd2cSJim Jagielski#  to you under the Apache License, Version 2.0 (the
9*b1cdbd2cSJim Jagielski#  "License"); you may not use this file except in compliance
10*b1cdbd2cSJim Jagielski#  with the License.  You may obtain a copy of the License at
11*b1cdbd2cSJim Jagielski#
12*b1cdbd2cSJim Jagielski#    http://www.apache.org/licenses/LICENSE-2.0
13*b1cdbd2cSJim Jagielski#
14*b1cdbd2cSJim Jagielski#  Unless required by applicable law or agreed to in writing,
15*b1cdbd2cSJim Jagielski#  software distributed under the License is distributed on an
16*b1cdbd2cSJim Jagielski#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17*b1cdbd2cSJim Jagielski#  KIND, either express or implied.  See the License for the
18*b1cdbd2cSJim Jagielski#  specific language governing permissions and limitations
19*b1cdbd2cSJim Jagielski#  under the License.
20*b1cdbd2cSJim Jagielski#
21*b1cdbd2cSJim Jagielski#**************************************************************
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski
25*b1cdbd2cSJim Jagielski# The following files must be available in a ./input subdir:
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
28*b1cdbd2cSJim Jagielski
29*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
30*b1cdbd2cSJim Jagielski#  "Unicode version: 1.1    Table version: 0.0d3    Date: 11 February 1994"
31*b1cdbd2cSJim Jagielski#  Only used to track Unicode characters that are mapped from both Big5 and
32*b1cdbd2cSJim Jagielski#  HKSCS.
33*b1cdbd2cSJim Jagielski
34*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
35*b1cdbd2cSJim Jagielski#  "Unicode version: 2.0    Table version: 2.01    Date: 1/7/2000"
36*b1cdbd2cSJim Jagielski#  Only used to track Unicode characters that are mapped from both CP950 and
37*b1cdbd2cSJim Jagielski#  HKSCS.
38*b1cdbd2cSJim Jagielski
39*b1cdbd2cSJim Jagielski$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
40*b1cdbd2cSJim Jagielski
41*b1cdbd2cSJim Jagielski$id = "Big5Hkscs2001";
42*b1cdbd2cSJim Jagielski
43*b1cdbd2cSJim Jagielskisub isValidUtf32
44*b1cdbd2cSJim Jagielski{
45*b1cdbd2cSJim Jagielski    my $utf32 = $_[0];
46*b1cdbd2cSJim Jagielski    return $utf32 >= 0 && $utf32 <= 0x10FFFF
47*b1cdbd2cSJim Jagielski           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
48*b1cdbd2cSJim Jagielski           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
49*b1cdbd2cSJim Jagielski           && ($utf32 & 0xFFFF) < 0xFFFE;
50*b1cdbd2cSJim Jagielski}
51*b1cdbd2cSJim Jagielski
52*b1cdbd2cSJim Jagielskisub printUtf32
53*b1cdbd2cSJim Jagielski{
54*b1cdbd2cSJim Jagielski    my $utf32 = $_[0];
55*b1cdbd2cSJim Jagielski    return sprintf("U+%04X", $utf32);
56*b1cdbd2cSJim Jagielski}
57*b1cdbd2cSJim Jagielski
58*b1cdbd2cSJim Jagielskisub isValidBig5
59*b1cdbd2cSJim Jagielski{
60*b1cdbd2cSJim Jagielski    my $big5 = $_[0];
61*b1cdbd2cSJim Jagielski    my $big5_row = $big5 >> 8;
62*b1cdbd2cSJim Jagielski    my $big5_column = $big5 & 0xFF;
63*b1cdbd2cSJim Jagielski    return $big5_row >= 0x81 && $big5_row <= 0xFE
64*b1cdbd2cSJim Jagielski           && ($big5_column >= 0x40 && $big5_column <= 0x7E
65*b1cdbd2cSJim Jagielski               || $big5_column >= 0xA1 && $big5_column <= 0xFE);
66*b1cdbd2cSJim Jagielski}
67*b1cdbd2cSJim Jagielski
68*b1cdbd2cSJim Jagielskisub printBig5
69*b1cdbd2cSJim Jagielski{
70*b1cdbd2cSJim Jagielski    my $big5 = $_[0];
71*b1cdbd2cSJim Jagielski    return sprintf("%04X", $big5);
72*b1cdbd2cSJim Jagielski}
73*b1cdbd2cSJim Jagielski
74*b1cdbd2cSJim Jagielskisub printStats
75*b1cdbd2cSJim Jagielski{
76*b1cdbd2cSJim Jagielski    my $used = $_[0];
77*b1cdbd2cSJim Jagielski    my $space = $_[1];
78*b1cdbd2cSJim Jagielski    return sprintf("%d/%d bytes (%.1f%%)",
79*b1cdbd2cSJim Jagielski                   $used,
80*b1cdbd2cSJim Jagielski                   $space,
81*b1cdbd2cSJim Jagielski                   $used * 100 / $space);
82*b1cdbd2cSJim Jagielski}
83*b1cdbd2cSJim Jagielski
84*b1cdbd2cSJim Jagielskisub printSpaces
85*b1cdbd2cSJim Jagielski{
86*b1cdbd2cSJim Jagielski    my $column_width = $_[0];
87*b1cdbd2cSJim Jagielski    my $columns_per_line = $_[1];
88*b1cdbd2cSJim Jagielski    my $end = $_[2];
89*b1cdbd2cSJim Jagielski    $output = "";
90*b1cdbd2cSJim Jagielski    for ($i = int($end / $columns_per_line) * $columns_per_line;
91*b1cdbd2cSJim Jagielski         $i < $end;
92*b1cdbd2cSJim Jagielski         ++$i)
93*b1cdbd2cSJim Jagielski    {
94*b1cdbd2cSJim Jagielski        for ($j = 0; $j < $column_width; ++$j)
95*b1cdbd2cSJim Jagielski        {
96*b1cdbd2cSJim Jagielski            $output = $output . " ";
97*b1cdbd2cSJim Jagielski        }
98*b1cdbd2cSJim Jagielski    }
99*b1cdbd2cSJim Jagielski    return $output;
100*b1cdbd2cSJim Jagielski}
101*b1cdbd2cSJim Jagielski
102*b1cdbd2cSJim Jagielskisub addMapping
103*b1cdbd2cSJim Jagielski{
104*b1cdbd2cSJim Jagielski	my $utf32 = $_[0];
105*b1cdbd2cSJim Jagielski	my $big5 = $_[1];
106*b1cdbd2cSJim Jagielski	my $comp = $_[2];
107*b1cdbd2cSJim Jagielski
108*b1cdbd2cSJim Jagielski	$uni_plane = $utf32 >> 16;
109*b1cdbd2cSJim Jagielski	$uni_page = ($utf32 >> 8) & 0xFF;
110*b1cdbd2cSJim Jagielski	$uni_index = $utf32 & 0xFF;
111*b1cdbd2cSJim Jagielski
112*b1cdbd2cSJim Jagielski	if (!defined($uni_plane_used[$uni_plane])
113*b1cdbd2cSJim Jagielski		|| !defined($uni_page_used[$uni_plane][$uni_page])
114*b1cdbd2cSJim Jagielski		|| !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
115*b1cdbd2cSJim Jagielski	{
116*b1cdbd2cSJim Jagielski		$uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
117*b1cdbd2cSJim Jagielski		$uni_plane_used[$uni_plane] = 1;
118*b1cdbd2cSJim Jagielski		$uni_page_used[$uni_plane][$uni_page] = 1;
119*b1cdbd2cSJim Jagielski		if ($comp != -1)
120*b1cdbd2cSJim Jagielski		{
121*b1cdbd2cSJim Jagielski			++$compat[$comp];
122*b1cdbd2cSJim Jagielski		}
123*b1cdbd2cSJim Jagielski	}
124*b1cdbd2cSJim Jagielski	else
125*b1cdbd2cSJim Jagielski	{
126*b1cdbd2cSJim Jagielski		$big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
127*b1cdbd2cSJim Jagielski		print "WARNING!  Mapping ", printUtf32($utf32), " to ",
128*b1cdbd2cSJim Jagielski		      printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
129*b1cdbd2cSJim Jagielski		      printBig5($big5), "\n";
130*b1cdbd2cSJim Jagielski	}
131*b1cdbd2cSJim Jagielski}
132*b1cdbd2cSJim Jagielski
133*b1cdbd2cSJim Jagielski# Build mappings to track Unicode characters that are mapped from both Big5/
134*b1cdbd2cSJim Jagielski# CP950 and HKSCS:
135*b1cdbd2cSJim Jagielski{
136*b1cdbd2cSJim Jagielski	$filename = "BIG5.TXT";
137*b1cdbd2cSJim Jagielski	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
138*b1cdbd2cSJim Jagielski	while (<IN>)
139*b1cdbd2cSJim Jagielski	{
140*b1cdbd2cSJim Jagielski		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
141*b1cdbd2cSJim Jagielski		{
142*b1cdbd2cSJim Jagielski			$big5 = oct($1);
143*b1cdbd2cSJim Jagielski			$utf32 = oct($2);
144*b1cdbd2cSJim Jagielski			isValidBig5($big5)
145*b1cdbd2cSJim Jagielski				or die "Bad Big5 char " . printBig5($big5);
146*b1cdbd2cSJim Jagielski			isValidUtf32($utf32)
147*b1cdbd2cSJim Jagielski				or die "Bad UTF32 char " . printUtf32($utf32);
148*b1cdbd2cSJim Jagielski			if ($utf32 != 0xFFFD)
149*b1cdbd2cSJim Jagielski			{
150*b1cdbd2cSJim Jagielski				if (defined($underlying_big5[$utf32]))
151*b1cdbd2cSJim Jagielski				{
152*b1cdbd2cSJim Jagielski					print "WARNING!  In ", $filename, ", both ",
153*b1cdbd2cSJim Jagielski					      printBig5($underlying_big5[$utf32]), " and ",
154*b1cdbd2cSJim Jagielski					      printBig5($big5), " map to ", printUtf32($utf32),
155*b1cdbd2cSJim Jagielski					      "\n";
156*b1cdbd2cSJim Jagielski				}
157*b1cdbd2cSJim Jagielski				else
158*b1cdbd2cSJim Jagielski				{
159*b1cdbd2cSJim Jagielski					$underlying_big5[$utf32] = $big5;
160*b1cdbd2cSJim Jagielski				}
161*b1cdbd2cSJim Jagielski			}
162*b1cdbd2cSJim Jagielski		}
163*b1cdbd2cSJim Jagielski	}
164*b1cdbd2cSJim Jagielski	close IN;
165*b1cdbd2cSJim Jagielski
166*b1cdbd2cSJim Jagielski	$filename = "CP950.TXT";
167*b1cdbd2cSJim Jagielski	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
168*b1cdbd2cSJim Jagielski	while (<IN>)
169*b1cdbd2cSJim Jagielski	{
170*b1cdbd2cSJim Jagielski		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
171*b1cdbd2cSJim Jagielski		{
172*b1cdbd2cSJim Jagielski			$big5 = oct($1);
173*b1cdbd2cSJim Jagielski			$utf32 = oct($2);
174*b1cdbd2cSJim Jagielski			isValidBig5($big5)
175*b1cdbd2cSJim Jagielski				or die "Bad Big5 char " . printBig5($big5);
176*b1cdbd2cSJim Jagielski			isValidUtf32($utf32)
177*b1cdbd2cSJim Jagielski				or die "Bad UTF32 char " . printUtf32($utf32);
178*b1cdbd2cSJim Jagielski			if (defined($underlying_cp950[$utf32]))
179*b1cdbd2cSJim Jagielski			{
180*b1cdbd2cSJim Jagielski				print "WARNING!  In ", $filename, ", both ",
181*b1cdbd2cSJim Jagielski				      printBig5($underlying_cp950[$utf32]), " and ",
182*b1cdbd2cSJim Jagielski				      printBig5($big5), " map to ", printUtf32($utf32), "\n";
183*b1cdbd2cSJim Jagielski			}
184*b1cdbd2cSJim Jagielski			else
185*b1cdbd2cSJim Jagielski			{
186*b1cdbd2cSJim Jagielski				$underlying_cp950[$utf32] = $big5;
187*b1cdbd2cSJim Jagielski			}
188*b1cdbd2cSJim Jagielski		}
189*b1cdbd2cSJim Jagielski	}
190*b1cdbd2cSJim Jagielski	close IN;
191*b1cdbd2cSJim Jagielski}
192*b1cdbd2cSJim Jagielski
193*b1cdbd2cSJim Jagielski# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
194*b1cdbd2cSJim Jagielski# nonstandard Unicode points, so they are explicitly mentioned here to map
195*b1cdbd2cSJim Jagielski# to the standard Unicode PUA points.  (In the other direction, the unofficial
196*b1cdbd2cSJim Jagielski# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
197*b1cdbd2cSJim Jagielski# since all Unicode characters involved are already covered by the official
198*b1cdbd2cSJim Jagielski# Big5-HKSCS mappings.)
199*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
200*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
201*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
202*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
203*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
204*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
205*b1cdbd2cSJim Jagielski
206*b1cdbd2cSJim Jagielski# The following implements the mapping of Big5-HKSCS compatibility points
207*b1cdbd2cSJim Jagielski# (GCCS characters unified with other HKSCS characters) to Unicode.  In the
208*b1cdbd2cSJim Jagielski# other direction, characters from Unicode's PUA will map to these Big5-HKSCS
209*b1cdbd2cSJim Jagielski# compatibility points.  (See the first list in <http://www.info.gov.hk/
210*b1cdbd2cSJim Jagielski# digital21/eng/hkscs/download/big5cmp.txt>.)
211*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x69] = 0x7BB8;
212*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x6F] = 0x7C06;
213*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x7E] = 0x7CCE;
214*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xAB] = 0x7DD2;
215*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xB4] = 0x7E1D;
216*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xCD] = 0x8005;
217*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xD0] = 0x8028;
218*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x57] = 0x83C1;
219*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x69] = 0x84A8;
220*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x6E] = 0x840F;
221*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xCB] = 0x89A6;
222*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xCC] = 0x89A9;
223*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xFE] = 0x8D77;
224*b1cdbd2cSJim Jagielski$big5_map[0x90][0x6D] = 0x90FD;
225*b1cdbd2cSJim Jagielski$big5_map[0x90][0x7A] = 0x92B9;
226*b1cdbd2cSJim Jagielski$big5_map[0x90][0xDC] = 0x975C;
227*b1cdbd2cSJim Jagielski$big5_map[0x90][0xF1] = 0x97FF;
228*b1cdbd2cSJim Jagielski$big5_map[0x91][0xBF] = 0x9F16;
229*b1cdbd2cSJim Jagielski$big5_map[0x92][0x44] = 0x8503;
230*b1cdbd2cSJim Jagielski$big5_map[0x92][0xAF] = 0x5159;
231*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB0] = 0x515B;
232*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB1] = 0x515D;
233*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB2] = 0x515E;
234*b1cdbd2cSJim Jagielski$big5_map[0x92][0xC8] = 0x936E;
235*b1cdbd2cSJim Jagielski$big5_map[0x92][0xD1] = 0x7479;
236*b1cdbd2cSJim Jagielski$big5_map[0x94][0x47] = 0x6D67;
237*b1cdbd2cSJim Jagielski$big5_map[0x94][0xCA] = 0x799B;
238*b1cdbd2cSJim Jagielski$big5_map[0x95][0xD9] = 0x9097;
239*b1cdbd2cSJim Jagielski$big5_map[0x96][0x44] = 0x975D;
240*b1cdbd2cSJim Jagielski$big5_map[0x96][0xED] = 0x701E;
241*b1cdbd2cSJim Jagielski$big5_map[0x96][0xFC] = 0x5B28;
242*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x76] = 0x7201;
243*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x78] = 0x77D7;
244*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x7B] = 0x7E87;
245*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xC6] = 0x99D6;
246*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xDE] = 0x91D4;
247*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xEC] = 0x60DE;
248*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xF6] = 0x6FB6;
249*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x42] = 0x8F36;
250*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x53] = 0x4FBB;
251*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x62] = 0x71DF;
252*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x68] = 0x9104;
253*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x6B] = 0x9DF0;
254*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x77] = 0x83CF;
255*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xBC] = 0x5C10;
256*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xBD] = 0x79E3;
257*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xD0] = 0x5A67;
258*b1cdbd2cSJim Jagielski$big5_map[0x9D][0x57] = 0x8F0B;
259*b1cdbd2cSJim Jagielski$big5_map[0x9D][0x5A] = 0x7B51;
260*b1cdbd2cSJim Jagielski$big5_map[0x9D][0xC4] = 0x62D0;
261*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xA9] = 0x6062;
262*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xEF] = 0x75F9;
263*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xFD] = 0x6C4A;
264*b1cdbd2cSJim Jagielski$big5_map[0x9F][0x60] = 0x9B2E;
265*b1cdbd2cSJim Jagielski$big5_map[0x9F][0x66] = 0x9F17;
266*b1cdbd2cSJim Jagielski$big5_map[0x9F][0xCB] = 0x50ED;
267*b1cdbd2cSJim Jagielski$big5_map[0x9F][0xD8] = 0x5F0C;
268*b1cdbd2cSJim Jagielski$big5_map[0xA0][0x63] = 0x880F;
269*b1cdbd2cSJim Jagielski$big5_map[0xA0][0x77] = 0x62CE;
270*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xD5] = 0x7468;
271*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xDF] = 0x7162;
272*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xE4] = 0x7250;
273*b1cdbd2cSJim Jagielski$big5_map[0xFA][0x5F] = 0x5029;
274*b1cdbd2cSJim Jagielski$big5_map[0xFA][0x66] = 0x507D;
275*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xBD] = 0x5305;
276*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xC5] = 0x5344;
277*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xD5] = 0x537F;
278*b1cdbd2cSJim Jagielski$big5_map[0xFB][0x48] = 0x5605;
279*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xB8] = 0x5A77;
280*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xF3] = 0x5E75;
281*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xF9] = 0x5ED0;
282*b1cdbd2cSJim Jagielski$big5_map[0xFC][0x4F] = 0x5F58;
283*b1cdbd2cSJim Jagielski$big5_map[0xFC][0x6C] = 0x60A4;
284*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xB9] = 0x6490;
285*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xE2] = 0x6674;
286*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xF1] = 0x675E;
287*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xB7] = 0x6C9C;
288*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xB8] = 0x6E1D;
289*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xBB] = 0x6E2F;
290*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xF1] = 0x716E;
291*b1cdbd2cSJim Jagielski$big5_map[0xFE][0x52] = 0x732A;
292*b1cdbd2cSJim Jagielski$big5_map[0xFE][0x6F] = 0x745C;
293*b1cdbd2cSJim Jagielski$big5_map[0xFE][0xAA] = 0x74E9;
294*b1cdbd2cSJim Jagielski$big5_map[0xFE][0xDD] = 0x7809;
295*b1cdbd2cSJim Jagielski
296*b1cdbd2cSJim Jagielski$pua = 0;
297*b1cdbd2cSJim Jagielski$compat[0] = 0; # 1993
298*b1cdbd2cSJim Jagielski$compat[1] = 0; # 2000
299*b1cdbd2cSJim Jagielski$compat[2] = 0; # 2001
300*b1cdbd2cSJim Jagielski
301*b1cdbd2cSJim Jagielski$filename = "big5-iso.txt";
302*b1cdbd2cSJim Jagielskiopen IN, ("input/" . $filename) or die "Cannot read " . $filename;
303*b1cdbd2cSJim Jagielskiwhile (<IN>)
304*b1cdbd2cSJim Jagielski{
305*b1cdbd2cSJim Jagielski    if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
306*b1cdbd2cSJim Jagielski    {
307*b1cdbd2cSJim Jagielski        $big5 = oct("0x" . $1);
308*b1cdbd2cSJim Jagielski		$utf32_1993 = oct("0x" . $2);
309*b1cdbd2cSJim Jagielski        $utf32_2000 = oct("0x" . $3);
310*b1cdbd2cSJim Jagielski        $utf32_2001 = oct("0x" . $4);
311*b1cdbd2cSJim Jagielski        isValidBig5($big5)
312*b1cdbd2cSJim Jagielski            or die "Bad Big5 char " . printBig5($big5);
313*b1cdbd2cSJim Jagielski        isValidUtf32($utf32_1993)
314*b1cdbd2cSJim Jagielski            or die "Bad UTF32 char " . printUtf32($utf32_1993);
315*b1cdbd2cSJim Jagielski        isValidUtf32($utf32_2000)
316*b1cdbd2cSJim Jagielski            or die "Bad UTF32 char " . printUtf32($utf32_2000);
317*b1cdbd2cSJim Jagielski        isValidUtf32($utf32_2001)
318*b1cdbd2cSJim Jagielski            or die "Bad UTF32 char " . printUtf32($utf32_2001);
319*b1cdbd2cSJim Jagielski
320*b1cdbd2cSJim Jagielski		$utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
321*b1cdbd2cSJim Jagielski
322*b1cdbd2cSJim Jagielski		if (defined($underlying_big5[$utf32])
323*b1cdbd2cSJim Jagielski			|| defined($underlying_cp950[$utf32]))
324*b1cdbd2cSJim Jagielski		{
325*b1cdbd2cSJim Jagielski			if (defined($underlying_big5[$utf32])
326*b1cdbd2cSJim Jagielski				&& defined($underlying_cp950[$utf32])
327*b1cdbd2cSJim Jagielski				&& $underlying_big5[$utf32] == $underlying_cp950[$utf32]
328*b1cdbd2cSJim Jagielski				&& $underlying_big5[$utf32] == $big5
329*b1cdbd2cSJim Jagielski				||
330*b1cdbd2cSJim Jagielski				defined($underlying_big5[$utf32])
331*b1cdbd2cSJim Jagielski				&& !defined($underlying_cp950[$utf32])
332*b1cdbd2cSJim Jagielski				&& $underlying_big5[$utf32] == $big5
333*b1cdbd2cSJim Jagielski				||
334*b1cdbd2cSJim Jagielski				!defined($underlying_big5[$utf32])
335*b1cdbd2cSJim Jagielski				&& defined($underlying_cp950[$utf32])
336*b1cdbd2cSJim Jagielski				&& $underlying_cp950[$utf32] == $big5)
337*b1cdbd2cSJim Jagielski			{
338*b1cdbd2cSJim Jagielski				# ignore
339*b1cdbd2cSJim Jagielski
340*b1cdbd2cSJim Jagielski				# Depending on real underlying mapping (cf.
341*b1cdbd2cSJim Jagielski				# ../convertbig5hkscs.tab), it would be possible to save some
342*b1cdbd2cSJim Jagielski				# table space by dropping those HKSCS code points that are
343*b1cdbd2cSJim Jagielski				# already covered by the underlying mapping.
344*b1cdbd2cSJim Jagielski			}
345*b1cdbd2cSJim Jagielski			else
346*b1cdbd2cSJim Jagielski			{
347*b1cdbd2cSJim Jagielski				print "XXX mapping underlying";
348*b1cdbd2cSJim Jagielski				if (defined($underlying_big5[$utf32])
349*b1cdbd2cSJim Jagielski					&& defined($underlying_cp950[$utf32])
350*b1cdbd2cSJim Jagielski					&& $underlying_big5[$utf32] == $underlying_cp950[$utf32])
351*b1cdbd2cSJim Jagielski				{
352*b1cdbd2cSJim Jagielski					print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
353*b1cdbd2cSJim Jagielski				}
354*b1cdbd2cSJim Jagielski				else
355*b1cdbd2cSJim Jagielski				{
356*b1cdbd2cSJim Jagielski					if (defined($underlying_big5[$utf32]))
357*b1cdbd2cSJim Jagielski					{
358*b1cdbd2cSJim Jagielski						print " Big5 ", printBig5($underlying_big5[$utf32]);
359*b1cdbd2cSJim Jagielski					}
360*b1cdbd2cSJim Jagielski					if (defined($underlying_cp950[$utf32]))
361*b1cdbd2cSJim Jagielski					{
362*b1cdbd2cSJim Jagielski						print " CP950 ", printBig5($underlying_cp950[$utf32]);
363*b1cdbd2cSJim Jagielski					}
364*b1cdbd2cSJim Jagielski				}
365*b1cdbd2cSJim Jagielski				print " and HKSCS ", printBig5($big5), " to ",
366*b1cdbd2cSJim Jagielski				      printUtf32($utf32), "\n";
367*b1cdbd2cSJim Jagielski			}
368*b1cdbd2cSJim Jagielski		}
369*b1cdbd2cSJim Jagielski
370*b1cdbd2cSJim Jagielski        if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
371*b1cdbd2cSJim Jagielski        {
372*b1cdbd2cSJim Jagielski            ++$pua;
373*b1cdbd2cSJim Jagielski        }
374*b1cdbd2cSJim Jagielski
375*b1cdbd2cSJim Jagielski        $big5_row = $big5 >> 8;
376*b1cdbd2cSJim Jagielski        $big5_column = $big5 & 0xFF;
377*b1cdbd2cSJim Jagielski        if (defined($big5_map[$big5_row][$big5_column]))
378*b1cdbd2cSJim Jagielski        {
379*b1cdbd2cSJim Jagielski            die "Bad Big5 mapping " . printBig5($big5);
380*b1cdbd2cSJim Jagielski        }
381*b1cdbd2cSJim Jagielski        $big5_map[$big5_row][$big5_column] = $utf32;
382*b1cdbd2cSJim Jagielski
383*b1cdbd2cSJim Jagielski		addMapping($utf32, $big5, -1);
384*b1cdbd2cSJim Jagielski
385*b1cdbd2cSJim Jagielski		if ($utf32_2001 != $utf32)
386*b1cdbd2cSJim Jagielski		{
387*b1cdbd2cSJim Jagielski			addMapping($utf32_2001, $big5, 2);
388*b1cdbd2cSJim Jagielski		}
389*b1cdbd2cSJim Jagielski		if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
390*b1cdbd2cSJim Jagielski		{
391*b1cdbd2cSJim Jagielski			addMapping($utf32_2000, $big5, 1);
392*b1cdbd2cSJim Jagielski		}
393*b1cdbd2cSJim Jagielski		if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
394*b1cdbd2cSJim Jagielski			&& $utf32_1993 != $utf32_2001)
395*b1cdbd2cSJim Jagielski		{
396*b1cdbd2cSJim Jagielski			addMapping($utf32_1993, $big5, 0);
397*b1cdbd2cSJim Jagielski		}
398*b1cdbd2cSJim Jagielski    }
399*b1cdbd2cSJim Jagielski}
400*b1cdbd2cSJim Jagielskiclose IN;
401*b1cdbd2cSJim Jagielski
402*b1cdbd2cSJim Jagielskiprint $pua, " mappings to PUA\n";
403*b1cdbd2cSJim Jagielskiprint $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
404*b1cdbd2cSJim Jagielskiprint $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
405*b1cdbd2cSJim Jagielskiprint $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
406*b1cdbd2cSJim Jagielski
407*b1cdbd2cSJim Jagielskiif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
408*b1cdbd2cSJim Jagielski{
409*b1cdbd2cSJim Jagielski    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
410*b1cdbd2cSJim Jagielski    {
411*b1cdbd2cSJim Jagielski        if (defined($uni_map[0][0][$uni_index]))
412*b1cdbd2cSJim Jagielski        {
413*b1cdbd2cSJim Jagielski            $big5 = $uni_map[0][0][$utf32];
414*b1cdbd2cSJim Jagielski            die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
415*b1cdbd2cSJim Jagielski        }
416*b1cdbd2cSJim Jagielski    }
417*b1cdbd2cSJim Jagielski}
418*b1cdbd2cSJim Jagielski
419*b1cdbd2cSJim Jagielski$filename = lc($id) . ".tab";
420*b1cdbd2cSJim Jagielskiopen OUT, ("> " . $filename) or die "Cannot write " . $filename;
421*b1cdbd2cSJim Jagielski
422*b1cdbd2cSJim Jagielski{
423*b1cdbd2cSJim Jagielski    $filename = lc($id). ".pl";
424*b1cdbd2cSJim Jagielski    open IN, $filename or die "Cannot read ". $filename;
425*b1cdbd2cSJim Jagielski    $first = 1;
426*b1cdbd2cSJim Jagielski    while (<IN>)
427*b1cdbd2cSJim Jagielski    {
428*b1cdbd2cSJim Jagielski        if (/^\#!.*$/)
429*b1cdbd2cSJim Jagielski        {
430*b1cdbd2cSJim Jagielski        }
431*b1cdbd2cSJim Jagielski        elsif (/^\#(\*.*)$/)
432*b1cdbd2cSJim Jagielski        {
433*b1cdbd2cSJim Jagielski            if ($first == 1)
434*b1cdbd2cSJim Jagielski            {
435*b1cdbd2cSJim Jagielski                print OUT "/", $1, "\n";
436*b1cdbd2cSJim Jagielski                $first = 0;
437*b1cdbd2cSJim Jagielski            }
438*b1cdbd2cSJim Jagielski            else
439*b1cdbd2cSJim Jagielski            {
440*b1cdbd2cSJim Jagielski                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
441*b1cdbd2cSJim Jagielski            }
442*b1cdbd2cSJim Jagielski        }
443*b1cdbd2cSJim Jagielski        elsif (/^\# (.*)$/)
444*b1cdbd2cSJim Jagielski        {
445*b1cdbd2cSJim Jagielski            print OUT " *", $1, "\n";
446*b1cdbd2cSJim Jagielski        }
447*b1cdbd2cSJim Jagielski        elsif (/^\#(.*)$/)
448*b1cdbd2cSJim Jagielski        {
449*b1cdbd2cSJim Jagielski            print OUT " *", $1, "\n";
450*b1cdbd2cSJim Jagielski        }
451*b1cdbd2cSJim Jagielski        else
452*b1cdbd2cSJim Jagielski        {
453*b1cdbd2cSJim Jagielski            goto done;
454*b1cdbd2cSJim Jagielski        }
455*b1cdbd2cSJim Jagielski    }
456*b1cdbd2cSJim Jagielski  done:
457*b1cdbd2cSJim Jagielski}
458*b1cdbd2cSJim Jagielski
459*b1cdbd2cSJim Jagielskiprint OUT "\n",
460*b1cdbd2cSJim Jagielski          "#ifndef _SAL_TYPES_H_\n",
461*b1cdbd2cSJim Jagielski          "#include \"sal/types.h\"\n",
462*b1cdbd2cSJim Jagielski          "#endif\n",
463*b1cdbd2cSJim Jagielski          "\n";
464*b1cdbd2cSJim Jagielski
465*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
466*b1cdbd2cSJim Jagielski$big5_data_index = 0;
467*b1cdbd2cSJim Jagielski$big5_rows = 0;
468*b1cdbd2cSJim Jagielski$big5_chars = 0;
469*b1cdbd2cSJim Jagielskifor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
470*b1cdbd2cSJim Jagielski{
471*b1cdbd2cSJim Jagielski    $big5_row_first = -1;
472*b1cdbd2cSJim Jagielski    for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
473*b1cdbd2cSJim Jagielski    {
474*b1cdbd2cSJim Jagielski        if (defined($big5_map[$big5_row][$big5_column]))
475*b1cdbd2cSJim Jagielski        {
476*b1cdbd2cSJim Jagielski            if ($big5_row_first == -1)
477*b1cdbd2cSJim Jagielski            {
478*b1cdbd2cSJim Jagielski                $big5_row_first = $big5_column;
479*b1cdbd2cSJim Jagielski            }
480*b1cdbd2cSJim Jagielski            $big5_row_last = $big5_column;
481*b1cdbd2cSJim Jagielski        }
482*b1cdbd2cSJim Jagielski    }
483*b1cdbd2cSJim Jagielski    if ($big5_row_first != -1)
484*b1cdbd2cSJim Jagielski    {
485*b1cdbd2cSJim Jagielski        $big5_data_offsets[$big5_row] = $big5_data_index;
486*b1cdbd2cSJim Jagielski        ++$big5_rows;
487*b1cdbd2cSJim Jagielski        print OUT " /* row ", $big5_row, " */\n";
488*b1cdbd2cSJim Jagielski
489*b1cdbd2cSJim Jagielski        $big5_row_surrogates_first = -1;
490*b1cdbd2cSJim Jagielski        $big5_row_chars = 0;
491*b1cdbd2cSJim Jagielski        $big5_row_surrogates = 0;
492*b1cdbd2cSJim Jagielski
493*b1cdbd2cSJim Jagielski        print OUT "  ", $big5_row_first, " | (", $big5_row_last,
494*b1cdbd2cSJim Jagielski                  " << 8), /* first, last */\n";
495*b1cdbd2cSJim Jagielski        ++$big5_data_index;
496*b1cdbd2cSJim Jagielski
497*b1cdbd2cSJim Jagielski        print OUT "  ", printSpaces(7, 10, $big5_row_first);
498*b1cdbd2cSJim Jagielski        $bol = 0;
499*b1cdbd2cSJim Jagielski        for ($big5_column = $big5_row_first;
500*b1cdbd2cSJim Jagielski             $big5_column <= $big5_row_last;
501*b1cdbd2cSJim Jagielski             ++$big5_column)
502*b1cdbd2cSJim Jagielski        {
503*b1cdbd2cSJim Jagielski            if ($bol == 1)
504*b1cdbd2cSJim Jagielski            {
505*b1cdbd2cSJim Jagielski                print OUT "  ";
506*b1cdbd2cSJim Jagielski                $bol = 0;
507*b1cdbd2cSJim Jagielski            }
508*b1cdbd2cSJim Jagielski            if (defined($big5_map[$big5_row][$big5_column]))
509*b1cdbd2cSJim Jagielski            {
510*b1cdbd2cSJim Jagielski                $utf32 = $big5_map[$big5_row][$big5_column];
511*b1cdbd2cSJim Jagielski                ++$big5_row_chars;
512*b1cdbd2cSJim Jagielski                if ($utf32 <= 0xFFFF)
513*b1cdbd2cSJim Jagielski                {
514*b1cdbd2cSJim Jagielski                    printf OUT "0x%04X,", $utf32;
515*b1cdbd2cSJim Jagielski                }
516*b1cdbd2cSJim Jagielski                else
517*b1cdbd2cSJim Jagielski                {
518*b1cdbd2cSJim Jagielski                    ++$big5_row_surrogates;
519*b1cdbd2cSJim Jagielski                    printf OUT "0x%04X,",
520*b1cdbd2cSJim Jagielski                               (0xD800 | (($utf32 - 0x10000) >> 10));
521*b1cdbd2cSJim Jagielski                    if ($big5_row_surrogates_first == -1)
522*b1cdbd2cSJim Jagielski                    {
523*b1cdbd2cSJim Jagielski                        $big5_row_surrogates_first = $big5_column;
524*b1cdbd2cSJim Jagielski                    }
525*b1cdbd2cSJim Jagielski                    $big5_row_surrogates_last = $big5_column;
526*b1cdbd2cSJim Jagielski                }
527*b1cdbd2cSJim Jagielski            }
528*b1cdbd2cSJim Jagielski            else
529*b1cdbd2cSJim Jagielski            {
530*b1cdbd2cSJim Jagielski                printf OUT "0xffff,";
531*b1cdbd2cSJim Jagielski            }
532*b1cdbd2cSJim Jagielski            ++$big5_data_index;
533*b1cdbd2cSJim Jagielski            if ($big5_column % 10 == 9)
534*b1cdbd2cSJim Jagielski            {
535*b1cdbd2cSJim Jagielski                print OUT "\n";
536*b1cdbd2cSJim Jagielski                $bol = 1;
537*b1cdbd2cSJim Jagielski            }
538*b1cdbd2cSJim Jagielski        }
539*b1cdbd2cSJim Jagielski        if ($bol == 0)
540*b1cdbd2cSJim Jagielski        {
541*b1cdbd2cSJim Jagielski            print OUT "\n";
542*b1cdbd2cSJim Jagielski        }
543*b1cdbd2cSJim Jagielski
544*b1cdbd2cSJim Jagielski        if ($big5_row_surrogates_first != -1)
545*b1cdbd2cSJim Jagielski        {
546*b1cdbd2cSJim Jagielski            print OUT "  ", $big5_row_surrogates_first,
547*b1cdbd2cSJim Jagielski                      ", /* first low-surrogate */\n";
548*b1cdbd2cSJim Jagielski            ++$big5_data_index;
549*b1cdbd2cSJim Jagielski
550*b1cdbd2cSJim Jagielski            print OUT "  ", printSpaces(7, 10, $big5_row_surrogates_first);
551*b1cdbd2cSJim Jagielski            $bol = 0;
552*b1cdbd2cSJim Jagielski            for ($big5_column = $big5_row_surrogates_first;
553*b1cdbd2cSJim Jagielski                 $big5_column <= $big5_row_surrogates_last;
554*b1cdbd2cSJim Jagielski                 ++$big5_column)
555*b1cdbd2cSJim Jagielski            {
556*b1cdbd2cSJim Jagielski                if ($bol == 1)
557*b1cdbd2cSJim Jagielski                {
558*b1cdbd2cSJim Jagielski                    print OUT "  ";
559*b1cdbd2cSJim Jagielski                    $bol = 0;
560*b1cdbd2cSJim Jagielski                }
561*b1cdbd2cSJim Jagielski                $utf32 = 0;
562*b1cdbd2cSJim Jagielski                if (defined($big5_map[$big5_row][$big5_column]))
563*b1cdbd2cSJim Jagielski                {
564*b1cdbd2cSJim Jagielski                    $utf32 = $big5_map[$big5_row][$big5_column];
565*b1cdbd2cSJim Jagielski                }
566*b1cdbd2cSJim Jagielski                if ($utf32 <= 0xFFFF)
567*b1cdbd2cSJim Jagielski                {
568*b1cdbd2cSJim Jagielski                    printf OUT "     0,";
569*b1cdbd2cSJim Jagielski                }
570*b1cdbd2cSJim Jagielski                else
571*b1cdbd2cSJim Jagielski                {
572*b1cdbd2cSJim Jagielski                    printf OUT "0x%04X,",
573*b1cdbd2cSJim Jagielski                               (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
574*b1cdbd2cSJim Jagielski                }
575*b1cdbd2cSJim Jagielski                ++$big5_data_index;
576*b1cdbd2cSJim Jagielski                if ($big5_column % 10 == 9)
577*b1cdbd2cSJim Jagielski                {
578*b1cdbd2cSJim Jagielski                    print OUT "\n";
579*b1cdbd2cSJim Jagielski                    $bol = 1;
580*b1cdbd2cSJim Jagielski                }
581*b1cdbd2cSJim Jagielski            }
582*b1cdbd2cSJim Jagielski            if ($bol == 0)
583*b1cdbd2cSJim Jagielski            {
584*b1cdbd2cSJim Jagielski                print OUT "\n";
585*b1cdbd2cSJim Jagielski            }
586*b1cdbd2cSJim Jagielski        }
587*b1cdbd2cSJim Jagielski
588*b1cdbd2cSJim Jagielski        $big5_chars += $big5_row_chars;
589*b1cdbd2cSJim Jagielski        $big5_data_space[$big5_row]
590*b1cdbd2cSJim Jagielski            = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
591*b1cdbd2cSJim Jagielski        $big5_data_used[$big5_row]
592*b1cdbd2cSJim Jagielski            = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
593*b1cdbd2cSJim Jagielski                                          0 : 1 + $big5_row_surrogates))
594*b1cdbd2cSJim Jagielski                  * 2;
595*b1cdbd2cSJim Jagielski    }
596*b1cdbd2cSJim Jagielski    else
597*b1cdbd2cSJim Jagielski    {
598*b1cdbd2cSJim Jagielski        print OUT " /* row ", $big5_row, ": --- */\n";
599*b1cdbd2cSJim Jagielski        $big5_data_offsets[$big5_row] = -1;
600*b1cdbd2cSJim Jagielski    }
601*b1cdbd2cSJim Jagielski}
602*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
603*b1cdbd2cSJim Jagielskiprint "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
604*b1cdbd2cSJim Jagielski
605*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
606*b1cdbd2cSJim Jagielski$big5_rowoffsets_used = 0;
607*b1cdbd2cSJim Jagielskifor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
608*b1cdbd2cSJim Jagielski{
609*b1cdbd2cSJim Jagielski    if ($big5_data_offsets[$big5_row] == -1)
610*b1cdbd2cSJim Jagielski    {
611*b1cdbd2cSJim Jagielski        print OUT "  -1, /* row ", $big5_row, " */\n";
612*b1cdbd2cSJim Jagielski    }
613*b1cdbd2cSJim Jagielski    else
614*b1cdbd2cSJim Jagielski    {
615*b1cdbd2cSJim Jagielski        print OUT "  ",
616*b1cdbd2cSJim Jagielski                  $big5_data_offsets[$big5_row],
617*b1cdbd2cSJim Jagielski                  ", /* row ",
618*b1cdbd2cSJim Jagielski                  $big5_row,
619*b1cdbd2cSJim Jagielski                  "; ",
620*b1cdbd2cSJim Jagielski                  printStats($big5_data_used[$big5_row],
621*b1cdbd2cSJim Jagielski                             $big5_data_space[$big5_row]),
622*b1cdbd2cSJim Jagielski                  " */\n";
623*b1cdbd2cSJim Jagielski        $big5_rowoffsets_used += 4;
624*b1cdbd2cSJim Jagielski    }
625*b1cdbd2cSJim Jagielski}
626*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
627*b1cdbd2cSJim Jagielski
628*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
629*b1cdbd2cSJim Jagielski$uni_data_index = 0;
630*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
631*b1cdbd2cSJim Jagielski{
632*b1cdbd2cSJim Jagielski    if (defined($uni_plane_used[$uni_plane]))
633*b1cdbd2cSJim Jagielski    {
634*b1cdbd2cSJim Jagielski        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
635*b1cdbd2cSJim Jagielski        {
636*b1cdbd2cSJim Jagielski            if (defined($uni_page_used[$uni_plane][$uni_page]))
637*b1cdbd2cSJim Jagielski            {
638*b1cdbd2cSJim Jagielski                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
639*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
640*b1cdbd2cSJim Jagielski                          " */\n";
641*b1cdbd2cSJim Jagielski
642*b1cdbd2cSJim Jagielski                $uni_page_first = -1;
643*b1cdbd2cSJim Jagielski                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
644*b1cdbd2cSJim Jagielski                {
645*b1cdbd2cSJim Jagielski                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
646*b1cdbd2cSJim Jagielski                    {
647*b1cdbd2cSJim Jagielski                        if ($uni_page_first == -1)
648*b1cdbd2cSJim Jagielski                        {
649*b1cdbd2cSJim Jagielski                            $uni_page_first = $uni_index;
650*b1cdbd2cSJim Jagielski                        }
651*b1cdbd2cSJim Jagielski                        $uni_page_last = $uni_index;
652*b1cdbd2cSJim Jagielski                    }
653*b1cdbd2cSJim Jagielski                }
654*b1cdbd2cSJim Jagielski
655*b1cdbd2cSJim Jagielski                $uni_data_used[$uni_plane][$uni_page] = 0;
656*b1cdbd2cSJim Jagielski
657*b1cdbd2cSJim Jagielski                print OUT "  ", $uni_page_first, " | (", $uni_page_last,
658*b1cdbd2cSJim Jagielski                          " << 8), /* first, last */\n";
659*b1cdbd2cSJim Jagielski                ++$uni_data_index;
660*b1cdbd2cSJim Jagielski                $uni_data_used[$uni_plane][$uni_page] += 2;
661*b1cdbd2cSJim Jagielski
662*b1cdbd2cSJim Jagielski                print OUT "  ", printSpaces(7, 10, $uni_page_first);
663*b1cdbd2cSJim Jagielski                $bol = 0;
664*b1cdbd2cSJim Jagielski                for ($uni_index = $uni_page_first;
665*b1cdbd2cSJim Jagielski                     $uni_index <= $uni_page_last;
666*b1cdbd2cSJim Jagielski                     ++$uni_index)
667*b1cdbd2cSJim Jagielski                {
668*b1cdbd2cSJim Jagielski                    if ($bol == 1)
669*b1cdbd2cSJim Jagielski                    {
670*b1cdbd2cSJim Jagielski                        print OUT "  ";
671*b1cdbd2cSJim Jagielski                        $bol = 0;
672*b1cdbd2cSJim Jagielski                    }
673*b1cdbd2cSJim Jagielski                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
674*b1cdbd2cSJim Jagielski                    {
675*b1cdbd2cSJim Jagielski                        $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
676*b1cdbd2cSJim Jagielski                        printf OUT "0x%04X,", $big5;
677*b1cdbd2cSJim Jagielski                        $uni_data_used[$uni_plane][$uni_page] += 2;
678*b1cdbd2cSJim Jagielski                    }
679*b1cdbd2cSJim Jagielski                    else
680*b1cdbd2cSJim Jagielski                    {
681*b1cdbd2cSJim Jagielski                        print OUT "     0,";
682*b1cdbd2cSJim Jagielski                    }
683*b1cdbd2cSJim Jagielski                    ++$uni_data_index;
684*b1cdbd2cSJim Jagielski                    if ($uni_index % 10 == 9)
685*b1cdbd2cSJim Jagielski                    {
686*b1cdbd2cSJim Jagielski                        print OUT "\n";
687*b1cdbd2cSJim Jagielski                        $bol = 1;
688*b1cdbd2cSJim Jagielski                    }
689*b1cdbd2cSJim Jagielski                }
690*b1cdbd2cSJim Jagielski                if ($bol == 0)
691*b1cdbd2cSJim Jagielski                {
692*b1cdbd2cSJim Jagielski                    print OUT "\n";
693*b1cdbd2cSJim Jagielski                }
694*b1cdbd2cSJim Jagielski
695*b1cdbd2cSJim Jagielski                $uni_data_space[$uni_plane][$uni_page]
696*b1cdbd2cSJim Jagielski                    = ($uni_data_index
697*b1cdbd2cSJim Jagielski                       - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
698*b1cdbd2cSJim Jagielski            }
699*b1cdbd2cSJim Jagielski            else
700*b1cdbd2cSJim Jagielski            {
701*b1cdbd2cSJim Jagielski                $uni_data_offsets[$uni_plane][$uni_page] = -1;
702*b1cdbd2cSJim Jagielski                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
703*b1cdbd2cSJim Jagielski                          ": --- */\n";
704*b1cdbd2cSJim Jagielski            }
705*b1cdbd2cSJim Jagielski        }
706*b1cdbd2cSJim Jagielski    }
707*b1cdbd2cSJim Jagielski    else
708*b1cdbd2cSJim Jagielski    {
709*b1cdbd2cSJim Jagielski        print OUT " /* plane ", $uni_plane, ": --- */\n";
710*b1cdbd2cSJim Jagielski    }
711*b1cdbd2cSJim Jagielski}
712*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
713*b1cdbd2cSJim Jagielski
714*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
715*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
716*b1cdbd2cSJim Jagielski{
717*b1cdbd2cSJim Jagielski    if (defined($uni_plane_used[$uni_plane]))
718*b1cdbd2cSJim Jagielski    {
719*b1cdbd2cSJim Jagielski        $uni_pageoffsets_used[$uni_plane] = 0;
720*b1cdbd2cSJim Jagielski        $uni_data_used_sum[$uni_plane] = 0;
721*b1cdbd2cSJim Jagielski        $uni_data_space_sum[$uni_plane] = 0;
722*b1cdbd2cSJim Jagielski        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
723*b1cdbd2cSJim Jagielski        {
724*b1cdbd2cSJim Jagielski            $offset = $uni_data_offsets[$uni_plane][$uni_page];
725*b1cdbd2cSJim Jagielski            if ($offset == -1)
726*b1cdbd2cSJim Jagielski            {
727*b1cdbd2cSJim Jagielski                print OUT "  -1, /* plane ",
728*b1cdbd2cSJim Jagielski                          $uni_plane,
729*b1cdbd2cSJim Jagielski                          ", page ",
730*b1cdbd2cSJim Jagielski                          $uni_page,
731*b1cdbd2cSJim Jagielski                          " */\n";
732*b1cdbd2cSJim Jagielski            }
733*b1cdbd2cSJim Jagielski            else
734*b1cdbd2cSJim Jagielski            {
735*b1cdbd2cSJim Jagielski                print OUT "  ",
736*b1cdbd2cSJim Jagielski                          $offset,
737*b1cdbd2cSJim Jagielski                          ", /* plane ",
738*b1cdbd2cSJim Jagielski                          $uni_plane,
739*b1cdbd2cSJim Jagielski                          ", page ",
740*b1cdbd2cSJim Jagielski                          $uni_page,
741*b1cdbd2cSJim Jagielski                          "; ",
742*b1cdbd2cSJim Jagielski                          printStats($uni_data_used[$uni_plane][$uni_page],
743*b1cdbd2cSJim Jagielski                                     $uni_data_space[$uni_plane][$uni_page]),
744*b1cdbd2cSJim Jagielski                          " */\n";
745*b1cdbd2cSJim Jagielski                $uni_pageoffsets_used[$uni_plane] += 4;
746*b1cdbd2cSJim Jagielski                $uni_data_used_sum[$uni_plane]
747*b1cdbd2cSJim Jagielski                    += $uni_data_used[$uni_plane][$uni_page];
748*b1cdbd2cSJim Jagielski                $uni_data_space_sum[$uni_plane]
749*b1cdbd2cSJim Jagielski                    += $uni_data_space[$uni_plane][$uni_page];
750*b1cdbd2cSJim Jagielski            }
751*b1cdbd2cSJim Jagielski        }
752*b1cdbd2cSJim Jagielski    }
753*b1cdbd2cSJim Jagielski    else
754*b1cdbd2cSJim Jagielski    {
755*b1cdbd2cSJim Jagielski        print OUT "  /* plane ", $uni_plane, ": --- */\n";
756*b1cdbd2cSJim Jagielski    }
757*b1cdbd2cSJim Jagielski}
758*b1cdbd2cSJim Jagielskiprint OUT "};\n\n";
759*b1cdbd2cSJim Jagielski
760*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo",
761*b1cdbd2cSJim Jagielski          $id,
762*b1cdbd2cSJim Jagielski          "PlaneOffsets[] = {\n";
763*b1cdbd2cSJim Jagielski$uni_page_offset = 0;
764*b1cdbd2cSJim Jagielski$uni_planeoffsets_used = 0;
765*b1cdbd2cSJim Jagielski$uni_pageoffsets_used_sum = 0;
766*b1cdbd2cSJim Jagielski$uni_pageoffsets_space_sum = 0;
767*b1cdbd2cSJim Jagielski$uni_data_used_sum2 = 0;
768*b1cdbd2cSJim Jagielski$uni_data_space_sum2 = 0;
769*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
770*b1cdbd2cSJim Jagielski{
771*b1cdbd2cSJim Jagielski    if (defined ($uni_plane_used[$uni_plane]))
772*b1cdbd2cSJim Jagielski    {
773*b1cdbd2cSJim Jagielski        print OUT "  ",
774*b1cdbd2cSJim Jagielski                  $uni_page_offset++,
775*b1cdbd2cSJim Jagielski                  " * 256, /* plane ",
776*b1cdbd2cSJim Jagielski                  $uni_plane,
777*b1cdbd2cSJim Jagielski                  "; ",
778*b1cdbd2cSJim Jagielski                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
779*b1cdbd2cSJim Jagielski                  ", ",
780*b1cdbd2cSJim Jagielski                  printStats($uni_data_used_sum[$uni_plane],
781*b1cdbd2cSJim Jagielski                             $uni_data_space_sum[$uni_plane]),
782*b1cdbd2cSJim Jagielski                  " */\n";
783*b1cdbd2cSJim Jagielski        $uni_planeoffsets_used += 4;
784*b1cdbd2cSJim Jagielski        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
785*b1cdbd2cSJim Jagielski        $uni_pageoffsets_space_sum += 256 * 4;
786*b1cdbd2cSJim Jagielski        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
787*b1cdbd2cSJim Jagielski        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
788*b1cdbd2cSJim Jagielski    }
789*b1cdbd2cSJim Jagielski    else
790*b1cdbd2cSJim Jagielski    {
791*b1cdbd2cSJim Jagielski        print OUT "  -1, /* plane ", $uni_plane, " */\n";
792*b1cdbd2cSJim Jagielski    }
793*b1cdbd2cSJim Jagielski}
794*b1cdbd2cSJim Jagielskiprint OUT " /* ",
795*b1cdbd2cSJim Jagielski          printStats($uni_planeoffsets_used, 17 * 4),
796*b1cdbd2cSJim Jagielski          ", ",
797*b1cdbd2cSJim Jagielski          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
798*b1cdbd2cSJim Jagielski          ", ",
799*b1cdbd2cSJim Jagielski          printStats($uni_data_used_sum2, $uni_data_space_sum2),
800*b1cdbd2cSJim Jagielski          " */\n};\n";
801*b1cdbd2cSJim Jagielski
802*b1cdbd2cSJim Jagielskiclose OUT;
803