1cdf0e10cSrcweir#!/usr/bin/perl
2*7e90fac2SAndrew Rist#**************************************************************
3*7e90fac2SAndrew Rist#
4*7e90fac2SAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
5*7e90fac2SAndrew Rist#  or more contributor license agreements.  See the NOTICE file
6*7e90fac2SAndrew Rist#  distributed with this work for additional information
7*7e90fac2SAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
8*7e90fac2SAndrew Rist#  to you under the Apache License, Version 2.0 (the
9*7e90fac2SAndrew Rist#  "License"); you may not use this file except in compliance
10*7e90fac2SAndrew Rist#  with the License.  You may obtain a copy of the License at
11*7e90fac2SAndrew Rist#
12*7e90fac2SAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
13*7e90fac2SAndrew Rist#
14*7e90fac2SAndrew Rist#  Unless required by applicable law or agreed to in writing,
15*7e90fac2SAndrew Rist#  software distributed under the License is distributed on an
16*7e90fac2SAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17*7e90fac2SAndrew Rist#  KIND, either express or implied.  See the License for the
18*7e90fac2SAndrew Rist#  specific language governing permissions and limitations
19*7e90fac2SAndrew Rist#  under the License.
20*7e90fac2SAndrew Rist#
21*7e90fac2SAndrew Rist#**************************************************************
22*7e90fac2SAndrew Rist
23*7e90fac2SAndrew Rist
24cdf0e10cSrcweir
25cdf0e10cSrcweir# The following files must be available in a ./input subdir:
26cdf0e10cSrcweir
27cdf0e10cSrcweir# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt>
28cdf0e10cSrcweir
29cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>:
30cdf0e10cSrcweir#  "Unicode version: 1.1    Table version: 0.0d3    Date: 11 February 1994"
31cdf0e10cSrcweir#  Only used to track Unicode characters that are mapped from both Big5 and
32cdf0e10cSrcweir#  HKSCS.
33cdf0e10cSrcweir
34cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>:
35cdf0e10cSrcweir#  "Unicode version: 2.0    Table version: 2.01    Date: 1/7/2000"
36cdf0e10cSrcweir#  Only used to track Unicode characters that are mapped from both CP950 and
37cdf0e10cSrcweir#  HKSCS.
38cdf0e10cSrcweir
39cdf0e10cSrcweir$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0
40cdf0e10cSrcweir
41cdf0e10cSrcweir$id = "Big5Hkscs2001";
42cdf0e10cSrcweir
43cdf0e10cSrcweirsub isValidUtf32
44cdf0e10cSrcweir{
45cdf0e10cSrcweir    my $utf32 = $_[0];
46cdf0e10cSrcweir    return $utf32 >= 0 && $utf32 <= 0x10FFFF
47cdf0e10cSrcweir           && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF)
48cdf0e10cSrcweir           && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF)
49cdf0e10cSrcweir           && ($utf32 & 0xFFFF) < 0xFFFE;
50cdf0e10cSrcweir}
51cdf0e10cSrcweir
52cdf0e10cSrcweirsub printUtf32
53cdf0e10cSrcweir{
54cdf0e10cSrcweir    my $utf32 = $_[0];
55cdf0e10cSrcweir    return sprintf("U+%04X", $utf32);
56cdf0e10cSrcweir}
57cdf0e10cSrcweir
58cdf0e10cSrcweirsub isValidBig5
59cdf0e10cSrcweir{
60cdf0e10cSrcweir    my $big5 = $_[0];
61cdf0e10cSrcweir    my $big5_row = $big5 >> 8;
62cdf0e10cSrcweir    my $big5_column = $big5 & 0xFF;
63cdf0e10cSrcweir    return $big5_row >= 0x81 && $big5_row <= 0xFE
64cdf0e10cSrcweir           && ($big5_column >= 0x40 && $big5_column <= 0x7E
65cdf0e10cSrcweir               || $big5_column >= 0xA1 && $big5_column <= 0xFE);
66cdf0e10cSrcweir}
67cdf0e10cSrcweir
68cdf0e10cSrcweirsub printBig5
69cdf0e10cSrcweir{
70cdf0e10cSrcweir    my $big5 = $_[0];
71cdf0e10cSrcweir    return sprintf("%04X", $big5);
72cdf0e10cSrcweir}
73cdf0e10cSrcweir
74cdf0e10cSrcweirsub printStats
75cdf0e10cSrcweir{
76cdf0e10cSrcweir    my $used = $_[0];
77cdf0e10cSrcweir    my $space = $_[1];
78cdf0e10cSrcweir    return sprintf("%d/%d bytes (%.1f%%)",
79cdf0e10cSrcweir                   $used,
80cdf0e10cSrcweir                   $space,
81cdf0e10cSrcweir                   $used * 100 / $space);
82cdf0e10cSrcweir}
83cdf0e10cSrcweir
84cdf0e10cSrcweirsub printSpaces
85cdf0e10cSrcweir{
86cdf0e10cSrcweir    my $column_width = $_[0];
87cdf0e10cSrcweir    my $columns_per_line = $_[1];
88cdf0e10cSrcweir    my $end = $_[2];
89cdf0e10cSrcweir    $output = "";
90cdf0e10cSrcweir    for ($i = int($end / $columns_per_line) * $columns_per_line;
91cdf0e10cSrcweir         $i < $end;
92cdf0e10cSrcweir         ++$i)
93cdf0e10cSrcweir    {
94cdf0e10cSrcweir        for ($j = 0; $j < $column_width; ++$j)
95cdf0e10cSrcweir        {
96cdf0e10cSrcweir            $output = $output . " ";
97cdf0e10cSrcweir        }
98cdf0e10cSrcweir    }
99cdf0e10cSrcweir    return $output;
100cdf0e10cSrcweir}
101cdf0e10cSrcweir
102cdf0e10cSrcweirsub addMapping
103cdf0e10cSrcweir{
104cdf0e10cSrcweir	my $utf32 = $_[0];
105cdf0e10cSrcweir	my $big5 = $_[1];
106cdf0e10cSrcweir	my $comp = $_[2];
107cdf0e10cSrcweir
108cdf0e10cSrcweir	$uni_plane = $utf32 >> 16;
109cdf0e10cSrcweir	$uni_page = ($utf32 >> 8) & 0xFF;
110cdf0e10cSrcweir	$uni_index = $utf32 & 0xFF;
111cdf0e10cSrcweir
112cdf0e10cSrcweir	if (!defined($uni_plane_used[$uni_plane])
113cdf0e10cSrcweir		|| !defined($uni_page_used[$uni_plane][$uni_page])
114cdf0e10cSrcweir		|| !defined($uni_map[$uni_plane][$uni_page][$uni_index]))
115cdf0e10cSrcweir	{
116cdf0e10cSrcweir		$uni_map[$uni_plane][$uni_page][$uni_index] = $big5;
117cdf0e10cSrcweir		$uni_plane_used[$uni_plane] = 1;
118cdf0e10cSrcweir		$uni_page_used[$uni_plane][$uni_page] = 1;
119cdf0e10cSrcweir		if ($comp != -1)
120cdf0e10cSrcweir		{
121cdf0e10cSrcweir			++$compat[$comp];
122cdf0e10cSrcweir		}
123cdf0e10cSrcweir	}
124cdf0e10cSrcweir	else
125cdf0e10cSrcweir	{
126cdf0e10cSrcweir		$big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index];
127cdf0e10cSrcweir		print "WARNING!  Mapping ", printUtf32($utf32), " to ",
128cdf0e10cSrcweir		      printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""),
129cdf0e10cSrcweir		      printBig5($big5), "\n";
130cdf0e10cSrcweir	}
131cdf0e10cSrcweir}
132cdf0e10cSrcweir
133cdf0e10cSrcweir# Build mappings to track Unicode characters that are mapped from both Big5/
134cdf0e10cSrcweir# CP950 and HKSCS:
135cdf0e10cSrcweir{
136cdf0e10cSrcweir	$filename = "BIG5.TXT";
137cdf0e10cSrcweir	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
138cdf0e10cSrcweir	while (<IN>)
139cdf0e10cSrcweir	{
140cdf0e10cSrcweir		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
141cdf0e10cSrcweir		{
142cdf0e10cSrcweir			$big5 = oct($1);
143cdf0e10cSrcweir			$utf32 = oct($2);
144cdf0e10cSrcweir			isValidBig5($big5)
145cdf0e10cSrcweir				or die "Bad Big5 char " . printBig5($big5);
146cdf0e10cSrcweir			isValidUtf32($utf32)
147cdf0e10cSrcweir				or die "Bad UTF32 char " . printUtf32($utf32);
148cdf0e10cSrcweir			if ($utf32 != 0xFFFD)
149cdf0e10cSrcweir			{
150cdf0e10cSrcweir				if (defined($underlying_big5[$utf32]))
151cdf0e10cSrcweir				{
152cdf0e10cSrcweir					print "WARNING!  In ", $filename, ", both ",
153cdf0e10cSrcweir					      printBig5($underlying_big5[$utf32]), " and ",
154cdf0e10cSrcweir					      printBig5($big5), " map to ", printUtf32($utf32),
155cdf0e10cSrcweir					      "\n";
156cdf0e10cSrcweir				}
157cdf0e10cSrcweir				else
158cdf0e10cSrcweir				{
159cdf0e10cSrcweir					$underlying_big5[$utf32] = $big5;
160cdf0e10cSrcweir				}
161cdf0e10cSrcweir			}
162cdf0e10cSrcweir		}
163cdf0e10cSrcweir	}
164cdf0e10cSrcweir	close IN;
165cdf0e10cSrcweir
166cdf0e10cSrcweir	$filename = "CP950.TXT";
167cdf0e10cSrcweir	open IN, ("input/" . $filename) or die "Cannot read " . $filename;
168cdf0e10cSrcweir	while (<IN>)
169cdf0e10cSrcweir	{
170cdf0e10cSrcweir		if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/)
171cdf0e10cSrcweir		{
172cdf0e10cSrcweir			$big5 = oct($1);
173cdf0e10cSrcweir			$utf32 = oct($2);
174cdf0e10cSrcweir			isValidBig5($big5)
175cdf0e10cSrcweir				or die "Bad Big5 char " . printBig5($big5);
176cdf0e10cSrcweir			isValidUtf32($utf32)
177cdf0e10cSrcweir				or die "Bad UTF32 char " . printUtf32($utf32);
178cdf0e10cSrcweir			if (defined($underlying_cp950[$utf32]))
179cdf0e10cSrcweir			{
180cdf0e10cSrcweir				print "WARNING!  In ", $filename, ", both ",
181cdf0e10cSrcweir				      printBig5($underlying_cp950[$utf32]), " and ",
182cdf0e10cSrcweir				      printBig5($big5), " map to ", printUtf32($utf32), "\n";
183cdf0e10cSrcweir			}
184cdf0e10cSrcweir			else
185cdf0e10cSrcweir			{
186cdf0e10cSrcweir				$underlying_cp950[$utf32] = $big5;
187cdf0e10cSrcweir			}
188cdf0e10cSrcweir		}
189cdf0e10cSrcweir	}
190cdf0e10cSrcweir	close IN;
191cdf0e10cSrcweir}
192cdf0e10cSrcweir
193cdf0e10cSrcweir# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some
194cdf0e10cSrcweir# nonstandard Unicode points, so they are explicitly mentioned here to map
195cdf0e10cSrcweir# to the standard Unicode PUA points.  (In the other direction, the unofficial
196cdf0e10cSrcweir# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless,
197cdf0e10cSrcweir# since all Unicode characters involved are already covered by the official
198cdf0e10cSrcweir# Big5-HKSCS mappings.)
199cdf0e10cSrcweir$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1);
200cdf0e10cSrcweir$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1);
201cdf0e10cSrcweir$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1);
202cdf0e10cSrcweir$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1);
203cdf0e10cSrcweir$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1);
204cdf0e10cSrcweir$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1);
205cdf0e10cSrcweir
206cdf0e10cSrcweir# The following implements the mapping of Big5-HKSCS compatibility points
207cdf0e10cSrcweir# (GCCS characters unified with other HKSCS characters) to Unicode.  In the
208cdf0e10cSrcweir# other direction, characters from Unicode's PUA will map to these Big5-HKSCS
209cdf0e10cSrcweir# compatibility points.  (See the first list in <http://www.info.gov.hk/
210cdf0e10cSrcweir# digital21/eng/hkscs/download/big5cmp.txt>.)
211cdf0e10cSrcweir$big5_map[0x8E][0x69] = 0x7BB8;
212cdf0e10cSrcweir$big5_map[0x8E][0x6F] = 0x7C06;
213cdf0e10cSrcweir$big5_map[0x8E][0x7E] = 0x7CCE;
214cdf0e10cSrcweir$big5_map[0x8E][0xAB] = 0x7DD2;
215cdf0e10cSrcweir$big5_map[0x8E][0xB4] = 0x7E1D;
216cdf0e10cSrcweir$big5_map[0x8E][0xCD] = 0x8005;
217cdf0e10cSrcweir$big5_map[0x8E][0xD0] = 0x8028;
218cdf0e10cSrcweir$big5_map[0x8F][0x57] = 0x83C1;
219cdf0e10cSrcweir$big5_map[0x8F][0x69] = 0x84A8;
220cdf0e10cSrcweir$big5_map[0x8F][0x6E] = 0x840F;
221cdf0e10cSrcweir$big5_map[0x8F][0xCB] = 0x89A6;
222cdf0e10cSrcweir$big5_map[0x8F][0xCC] = 0x89A9;
223cdf0e10cSrcweir$big5_map[0x8F][0xFE] = 0x8D77;
224cdf0e10cSrcweir$big5_map[0x90][0x6D] = 0x90FD;
225cdf0e10cSrcweir$big5_map[0x90][0x7A] = 0x92B9;
226cdf0e10cSrcweir$big5_map[0x90][0xDC] = 0x975C;
227cdf0e10cSrcweir$big5_map[0x90][0xF1] = 0x97FF;
228cdf0e10cSrcweir$big5_map[0x91][0xBF] = 0x9F16;
229cdf0e10cSrcweir$big5_map[0x92][0x44] = 0x8503;
230cdf0e10cSrcweir$big5_map[0x92][0xAF] = 0x5159;
231cdf0e10cSrcweir$big5_map[0x92][0xB0] = 0x515B;
232cdf0e10cSrcweir$big5_map[0x92][0xB1] = 0x515D;
233cdf0e10cSrcweir$big5_map[0x92][0xB2] = 0x515E;
234cdf0e10cSrcweir$big5_map[0x92][0xC8] = 0x936E;
235cdf0e10cSrcweir$big5_map[0x92][0xD1] = 0x7479;
236cdf0e10cSrcweir$big5_map[0x94][0x47] = 0x6D67;
237cdf0e10cSrcweir$big5_map[0x94][0xCA] = 0x799B;
238cdf0e10cSrcweir$big5_map[0x95][0xD9] = 0x9097;
239cdf0e10cSrcweir$big5_map[0x96][0x44] = 0x975D;
240cdf0e10cSrcweir$big5_map[0x96][0xED] = 0x701E;
241cdf0e10cSrcweir$big5_map[0x96][0xFC] = 0x5B28;
242cdf0e10cSrcweir$big5_map[0x9B][0x76] = 0x7201;
243cdf0e10cSrcweir$big5_map[0x9B][0x78] = 0x77D7;
244cdf0e10cSrcweir$big5_map[0x9B][0x7B] = 0x7E87;
245cdf0e10cSrcweir$big5_map[0x9B][0xC6] = 0x99D6;
246cdf0e10cSrcweir$big5_map[0x9B][0xDE] = 0x91D4;
247cdf0e10cSrcweir$big5_map[0x9B][0xEC] = 0x60DE;
248cdf0e10cSrcweir$big5_map[0x9B][0xF6] = 0x6FB6;
249cdf0e10cSrcweir$big5_map[0x9C][0x42] = 0x8F36;
250cdf0e10cSrcweir$big5_map[0x9C][0x53] = 0x4FBB;
251cdf0e10cSrcweir$big5_map[0x9C][0x62] = 0x71DF;
252cdf0e10cSrcweir$big5_map[0x9C][0x68] = 0x9104;
253cdf0e10cSrcweir$big5_map[0x9C][0x6B] = 0x9DF0;
254cdf0e10cSrcweir$big5_map[0x9C][0x77] = 0x83CF;
255cdf0e10cSrcweir$big5_map[0x9C][0xBC] = 0x5C10;
256cdf0e10cSrcweir$big5_map[0x9C][0xBD] = 0x79E3;
257cdf0e10cSrcweir$big5_map[0x9C][0xD0] = 0x5A67;
258cdf0e10cSrcweir$big5_map[0x9D][0x57] = 0x8F0B;
259cdf0e10cSrcweir$big5_map[0x9D][0x5A] = 0x7B51;
260cdf0e10cSrcweir$big5_map[0x9D][0xC4] = 0x62D0;
261cdf0e10cSrcweir$big5_map[0x9E][0xA9] = 0x6062;
262cdf0e10cSrcweir$big5_map[0x9E][0xEF] = 0x75F9;
263cdf0e10cSrcweir$big5_map[0x9E][0xFD] = 0x6C4A;
264cdf0e10cSrcweir$big5_map[0x9F][0x60] = 0x9B2E;
265cdf0e10cSrcweir$big5_map[0x9F][0x66] = 0x9F17;
266cdf0e10cSrcweir$big5_map[0x9F][0xCB] = 0x50ED;
267cdf0e10cSrcweir$big5_map[0x9F][0xD8] = 0x5F0C;
268cdf0e10cSrcweir$big5_map[0xA0][0x63] = 0x880F;
269cdf0e10cSrcweir$big5_map[0xA0][0x77] = 0x62CE;
270cdf0e10cSrcweir$big5_map[0xA0][0xD5] = 0x7468;
271cdf0e10cSrcweir$big5_map[0xA0][0xDF] = 0x7162;
272cdf0e10cSrcweir$big5_map[0xA0][0xE4] = 0x7250;
273cdf0e10cSrcweir$big5_map[0xFA][0x5F] = 0x5029;
274cdf0e10cSrcweir$big5_map[0xFA][0x66] = 0x507D;
275cdf0e10cSrcweir$big5_map[0xFA][0xBD] = 0x5305;
276cdf0e10cSrcweir$big5_map[0xFA][0xC5] = 0x5344;
277cdf0e10cSrcweir$big5_map[0xFA][0xD5] = 0x537F;
278cdf0e10cSrcweir$big5_map[0xFB][0x48] = 0x5605;
279cdf0e10cSrcweir$big5_map[0xFB][0xB8] = 0x5A77;
280cdf0e10cSrcweir$big5_map[0xFB][0xF3] = 0x5E75;
281cdf0e10cSrcweir$big5_map[0xFB][0xF9] = 0x5ED0;
282cdf0e10cSrcweir$big5_map[0xFC][0x4F] = 0x5F58;
283cdf0e10cSrcweir$big5_map[0xFC][0x6C] = 0x60A4;
284cdf0e10cSrcweir$big5_map[0xFC][0xB9] = 0x6490;
285cdf0e10cSrcweir$big5_map[0xFC][0xE2] = 0x6674;
286cdf0e10cSrcweir$big5_map[0xFC][0xF1] = 0x675E;
287cdf0e10cSrcweir$big5_map[0xFD][0xB7] = 0x6C9C;
288cdf0e10cSrcweir$big5_map[0xFD][0xB8] = 0x6E1D;
289cdf0e10cSrcweir$big5_map[0xFD][0xBB] = 0x6E2F;
290cdf0e10cSrcweir$big5_map[0xFD][0xF1] = 0x716E;
291cdf0e10cSrcweir$big5_map[0xFE][0x52] = 0x732A;
292cdf0e10cSrcweir$big5_map[0xFE][0x6F] = 0x745C;
293cdf0e10cSrcweir$big5_map[0xFE][0xAA] = 0x74E9;
294cdf0e10cSrcweir$big5_map[0xFE][0xDD] = 0x7809;
295cdf0e10cSrcweir
296cdf0e10cSrcweir$pua = 0;
297cdf0e10cSrcweir$compat[0] = 0; # 1993
298cdf0e10cSrcweir$compat[1] = 0; # 2000
299cdf0e10cSrcweir$compat[2] = 0; # 2001
300cdf0e10cSrcweir
301cdf0e10cSrcweir$filename = "big5-iso.txt";
302cdf0e10cSrcweiropen IN, ("input/" . $filename) or die "Cannot read " . $filename;
303cdf0e10cSrcweirwhile (<IN>)
304cdf0e10cSrcweir{
305cdf0e10cSrcweir    if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/)
306cdf0e10cSrcweir    {
307cdf0e10cSrcweir        $big5 = oct("0x" . $1);
308cdf0e10cSrcweir		$utf32_1993 = oct("0x" . $2);
309cdf0e10cSrcweir        $utf32_2000 = oct("0x" . $3);
310cdf0e10cSrcweir        $utf32_2001 = oct("0x" . $4);
311cdf0e10cSrcweir        isValidBig5($big5)
312cdf0e10cSrcweir            or die "Bad Big5 char " . printBig5($big5);
313cdf0e10cSrcweir        isValidUtf32($utf32_1993)
314cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_1993);
315cdf0e10cSrcweir        isValidUtf32($utf32_2000)
316cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_2000);
317cdf0e10cSrcweir        isValidUtf32($utf32_2001)
318cdf0e10cSrcweir            or die "Bad UTF32 char " . printUtf32($utf32_2001);
319cdf0e10cSrcweir
320cdf0e10cSrcweir		$utf32 = $surrogates ? $utf32_2001 : $utf32_2000;
321cdf0e10cSrcweir
322cdf0e10cSrcweir		if (defined($underlying_big5[$utf32])
323cdf0e10cSrcweir			|| defined($underlying_cp950[$utf32]))
324cdf0e10cSrcweir		{
325cdf0e10cSrcweir			if (defined($underlying_big5[$utf32])
326cdf0e10cSrcweir				&& defined($underlying_cp950[$utf32])
327cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $underlying_cp950[$utf32]
328cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $big5
329cdf0e10cSrcweir				||
330cdf0e10cSrcweir				defined($underlying_big5[$utf32])
331cdf0e10cSrcweir				&& !defined($underlying_cp950[$utf32])
332cdf0e10cSrcweir				&& $underlying_big5[$utf32] == $big5
333cdf0e10cSrcweir				||
334cdf0e10cSrcweir				!defined($underlying_big5[$utf32])
335cdf0e10cSrcweir				&& defined($underlying_cp950[$utf32])
336cdf0e10cSrcweir				&& $underlying_cp950[$utf32] == $big5)
337cdf0e10cSrcweir			{
338cdf0e10cSrcweir				# ignore
339cdf0e10cSrcweir
340cdf0e10cSrcweir				# Depending on real underlying mapping (cf.
341cdf0e10cSrcweir				# ../convertbig5hkscs.tab), it would be possible to save some
342cdf0e10cSrcweir				# table space by dropping those HKSCS code points that are
343cdf0e10cSrcweir				# already covered by the underlying mapping.
344cdf0e10cSrcweir			}
345cdf0e10cSrcweir			else
346cdf0e10cSrcweir			{
347cdf0e10cSrcweir				print "XXX mapping underlying";
348cdf0e10cSrcweir				if (defined($underlying_big5[$utf32])
349cdf0e10cSrcweir					&& defined($underlying_cp950[$utf32])
350cdf0e10cSrcweir					&& $underlying_big5[$utf32] == $underlying_cp950[$utf32])
351cdf0e10cSrcweir				{
352cdf0e10cSrcweir					print " Big5/CP950 ", printBig5($underlying_big5[$utf32]);
353cdf0e10cSrcweir				}
354cdf0e10cSrcweir				else
355cdf0e10cSrcweir				{
356cdf0e10cSrcweir					if (defined($underlying_big5[$utf32]))
357cdf0e10cSrcweir					{
358cdf0e10cSrcweir						print " Big5 ", printBig5($underlying_big5[$utf32]);
359cdf0e10cSrcweir					}
360cdf0e10cSrcweir					if (defined($underlying_cp950[$utf32]))
361cdf0e10cSrcweir					{
362cdf0e10cSrcweir						print " CP950 ", printBig5($underlying_cp950[$utf32]);
363cdf0e10cSrcweir					}
364cdf0e10cSrcweir				}
365cdf0e10cSrcweir				print " and HKSCS ", printBig5($big5), " to ",
366cdf0e10cSrcweir				      printUtf32($utf32), "\n";
367cdf0e10cSrcweir			}
368cdf0e10cSrcweir		}
369cdf0e10cSrcweir
370cdf0e10cSrcweir        if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF)
371cdf0e10cSrcweir        {
372cdf0e10cSrcweir            ++$pua;
373cdf0e10cSrcweir        }
374cdf0e10cSrcweir
375cdf0e10cSrcweir        $big5_row = $big5 >> 8;
376cdf0e10cSrcweir        $big5_column = $big5 & 0xFF;
377cdf0e10cSrcweir        if (defined($big5_map[$big5_row][$big5_column]))
378cdf0e10cSrcweir        {
379cdf0e10cSrcweir            die "Bad Big5 mapping " . printBig5($big5);
380cdf0e10cSrcweir        }
381cdf0e10cSrcweir        $big5_map[$big5_row][$big5_column] = $utf32;
382cdf0e10cSrcweir
383cdf0e10cSrcweir		addMapping($utf32, $big5, -1);
384cdf0e10cSrcweir
385cdf0e10cSrcweir		if ($utf32_2001 != $utf32)
386cdf0e10cSrcweir		{
387cdf0e10cSrcweir			addMapping($utf32_2001, $big5, 2);
388cdf0e10cSrcweir		}
389cdf0e10cSrcweir		if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001)
390cdf0e10cSrcweir		{
391cdf0e10cSrcweir			addMapping($utf32_2000, $big5, 1);
392cdf0e10cSrcweir		}
393cdf0e10cSrcweir		if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000
394cdf0e10cSrcweir			&& $utf32_1993 != $utf32_2001)
395cdf0e10cSrcweir		{
396cdf0e10cSrcweir			addMapping($utf32_1993, $big5, 0);
397cdf0e10cSrcweir		}
398cdf0e10cSrcweir    }
399cdf0e10cSrcweir}
400cdf0e10cSrcweirclose IN;
401cdf0e10cSrcweir
402cdf0e10cSrcweirprint $pua, " mappings to PUA\n";
403cdf0e10cSrcweirprint $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0);
404cdf0e10cSrcweirprint $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0);
405cdf0e10cSrcweirprint $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0);
406cdf0e10cSrcweir
407cdf0e10cSrcweirif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0]))
408cdf0e10cSrcweir{
409cdf0e10cSrcweir    for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32)
410cdf0e10cSrcweir    {
411cdf0e10cSrcweir        if (defined($uni_map[0][0][$uni_index]))
412cdf0e10cSrcweir        {
413cdf0e10cSrcweir            $big5 = $uni_map[0][0][$utf32];
414cdf0e10cSrcweir            die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5);
415cdf0e10cSrcweir        }
416cdf0e10cSrcweir    }
417cdf0e10cSrcweir}
418cdf0e10cSrcweir
419cdf0e10cSrcweir$filename = lc($id) . ".tab";
420cdf0e10cSrcweiropen OUT, ("> " . $filename) or die "Cannot write " . $filename;
421cdf0e10cSrcweir
422cdf0e10cSrcweir{
423cdf0e10cSrcweir    $filename = lc($id). ".pl";
424cdf0e10cSrcweir    open IN, $filename or die "Cannot read ". $filename;
425cdf0e10cSrcweir    $first = 1;
426cdf0e10cSrcweir    while (<IN>)
427cdf0e10cSrcweir    {
428cdf0e10cSrcweir        if (/^\#!.*$/)
429cdf0e10cSrcweir        {
430cdf0e10cSrcweir        }
431cdf0e10cSrcweir        elsif (/^\#(\*.*)$/)
432cdf0e10cSrcweir        {
433cdf0e10cSrcweir            if ($first == 1)
434cdf0e10cSrcweir            {
435cdf0e10cSrcweir                print OUT "/", $1, "\n";
436cdf0e10cSrcweir                $first = 0;
437cdf0e10cSrcweir            }
438cdf0e10cSrcweir            else
439cdf0e10cSrcweir            {
440cdf0e10cSrcweir                print OUT " ", substr($1, 0, length($1) - 1), "/\n";
441cdf0e10cSrcweir            }
442cdf0e10cSrcweir        }
443cdf0e10cSrcweir        elsif (/^\# (.*)$/)
444cdf0e10cSrcweir        {
445cdf0e10cSrcweir            print OUT " *", $1, "\n";
446cdf0e10cSrcweir        }
447cdf0e10cSrcweir        elsif (/^\#(.*)$/)
448cdf0e10cSrcweir        {
449cdf0e10cSrcweir            print OUT " *", $1, "\n";
450cdf0e10cSrcweir        }
451cdf0e10cSrcweir        else
452cdf0e10cSrcweir        {
453cdf0e10cSrcweir            goto done;
454cdf0e10cSrcweir        }
455cdf0e10cSrcweir    }
456cdf0e10cSrcweir  done:
457cdf0e10cSrcweir}
458cdf0e10cSrcweir
459cdf0e10cSrcweirprint OUT "\n",
460cdf0e10cSrcweir          "#ifndef _SAL_TYPES_H_\n",
461cdf0e10cSrcweir          "#include \"sal/types.h\"\n",
462cdf0e10cSrcweir          "#endif\n",
463cdf0e10cSrcweir          "\n";
464cdf0e10cSrcweir
465cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n";
466cdf0e10cSrcweir$big5_data_index = 0;
467cdf0e10cSrcweir$big5_rows = 0;
468cdf0e10cSrcweir$big5_chars = 0;
469cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
470cdf0e10cSrcweir{
471cdf0e10cSrcweir    $big5_row_first = -1;
472cdf0e10cSrcweir    for ($big5_column = 0; $big5_column <= 255; ++$big5_column)
473cdf0e10cSrcweir    {
474cdf0e10cSrcweir        if (defined($big5_map[$big5_row][$big5_column]))
475cdf0e10cSrcweir        {
476cdf0e10cSrcweir            if ($big5_row_first == -1)
477cdf0e10cSrcweir            {
478cdf0e10cSrcweir                $big5_row_first = $big5_column;
479cdf0e10cSrcweir            }
480cdf0e10cSrcweir            $big5_row_last = $big5_column;
481cdf0e10cSrcweir        }
482cdf0e10cSrcweir    }
483cdf0e10cSrcweir    if ($big5_row_first != -1)
484cdf0e10cSrcweir    {
485cdf0e10cSrcweir        $big5_data_offsets[$big5_row] = $big5_data_index;
486cdf0e10cSrcweir        ++$big5_rows;
487cdf0e10cSrcweir        print OUT " /* row ", $big5_row, " */\n";
488cdf0e10cSrcweir
489cdf0e10cSrcweir        $big5_row_surrogates_first = -1;
490cdf0e10cSrcweir        $big5_row_chars = 0;
491cdf0e10cSrcweir        $big5_row_surrogates = 0;
492cdf0e10cSrcweir
493cdf0e10cSrcweir        print OUT "  ", $big5_row_first, " | (", $big5_row_last,
494cdf0e10cSrcweir                  " << 8), /* first, last */\n";
495cdf0e10cSrcweir        ++$big5_data_index;
496cdf0e10cSrcweir
497cdf0e10cSrcweir        print OUT "  ", printSpaces(7, 10, $big5_row_first);
498cdf0e10cSrcweir        $bol = 0;
499cdf0e10cSrcweir        for ($big5_column = $big5_row_first;
500cdf0e10cSrcweir             $big5_column <= $big5_row_last;
501cdf0e10cSrcweir             ++$big5_column)
502cdf0e10cSrcweir        {
503cdf0e10cSrcweir            if ($bol == 1)
504cdf0e10cSrcweir            {
505cdf0e10cSrcweir                print OUT "  ";
506cdf0e10cSrcweir                $bol = 0;
507cdf0e10cSrcweir            }
508cdf0e10cSrcweir            if (defined($big5_map[$big5_row][$big5_column]))
509cdf0e10cSrcweir            {
510cdf0e10cSrcweir                $utf32 = $big5_map[$big5_row][$big5_column];
511cdf0e10cSrcweir                ++$big5_row_chars;
512cdf0e10cSrcweir                if ($utf32 <= 0xFFFF)
513cdf0e10cSrcweir                {
514cdf0e10cSrcweir                    printf OUT "0x%04X,", $utf32;
515cdf0e10cSrcweir                }
516cdf0e10cSrcweir                else
517cdf0e10cSrcweir                {
518cdf0e10cSrcweir                    ++$big5_row_surrogates;
519cdf0e10cSrcweir                    printf OUT "0x%04X,",
520cdf0e10cSrcweir                               (0xD800 | (($utf32 - 0x10000) >> 10));
521cdf0e10cSrcweir                    if ($big5_row_surrogates_first == -1)
522cdf0e10cSrcweir                    {
523cdf0e10cSrcweir                        $big5_row_surrogates_first = $big5_column;
524cdf0e10cSrcweir                    }
525cdf0e10cSrcweir                    $big5_row_surrogates_last = $big5_column;
526cdf0e10cSrcweir                }
527cdf0e10cSrcweir            }
528cdf0e10cSrcweir            else
529cdf0e10cSrcweir            {
530cdf0e10cSrcweir                printf OUT "0xffff,";
531cdf0e10cSrcweir            }
532cdf0e10cSrcweir            ++$big5_data_index;
533cdf0e10cSrcweir            if ($big5_column % 10 == 9)
534cdf0e10cSrcweir            {
535cdf0e10cSrcweir                print OUT "\n";
536cdf0e10cSrcweir                $bol = 1;
537cdf0e10cSrcweir            }
538cdf0e10cSrcweir        }
539cdf0e10cSrcweir        if ($bol == 0)
540cdf0e10cSrcweir        {
541cdf0e10cSrcweir            print OUT "\n";
542cdf0e10cSrcweir        }
543cdf0e10cSrcweir
544cdf0e10cSrcweir        if ($big5_row_surrogates_first != -1)
545cdf0e10cSrcweir        {
546cdf0e10cSrcweir            print OUT "  ", $big5_row_surrogates_first,
547cdf0e10cSrcweir                      ", /* first low-surrogate */\n";
548cdf0e10cSrcweir            ++$big5_data_index;
549cdf0e10cSrcweir
550cdf0e10cSrcweir            print OUT "  ", printSpaces(7, 10, $big5_row_surrogates_first);
551cdf0e10cSrcweir            $bol = 0;
552cdf0e10cSrcweir            for ($big5_column = $big5_row_surrogates_first;
553cdf0e10cSrcweir                 $big5_column <= $big5_row_surrogates_last;
554cdf0e10cSrcweir                 ++$big5_column)
555cdf0e10cSrcweir            {
556cdf0e10cSrcweir                if ($bol == 1)
557cdf0e10cSrcweir                {
558cdf0e10cSrcweir                    print OUT "  ";
559cdf0e10cSrcweir                    $bol = 0;
560cdf0e10cSrcweir                }
561cdf0e10cSrcweir                $utf32 = 0;
562cdf0e10cSrcweir                if (defined($big5_map[$big5_row][$big5_column]))
563cdf0e10cSrcweir                {
564cdf0e10cSrcweir                    $utf32 = $big5_map[$big5_row][$big5_column];
565cdf0e10cSrcweir                }
566cdf0e10cSrcweir                if ($utf32 <= 0xFFFF)
567cdf0e10cSrcweir                {
568cdf0e10cSrcweir                    printf OUT "     0,";
569cdf0e10cSrcweir                }
570cdf0e10cSrcweir                else
571cdf0e10cSrcweir                {
572cdf0e10cSrcweir                    printf OUT "0x%04X,",
573cdf0e10cSrcweir                               (0xDC00 | (($utf32 - 0x10000) & 0x3FF));
574cdf0e10cSrcweir                }
575cdf0e10cSrcweir                ++$big5_data_index;
576cdf0e10cSrcweir                if ($big5_column % 10 == 9)
577cdf0e10cSrcweir                {
578cdf0e10cSrcweir                    print OUT "\n";
579cdf0e10cSrcweir                    $bol = 1;
580cdf0e10cSrcweir                }
581cdf0e10cSrcweir            }
582cdf0e10cSrcweir            if ($bol == 0)
583cdf0e10cSrcweir            {
584cdf0e10cSrcweir                print OUT "\n";
585cdf0e10cSrcweir            }
586cdf0e10cSrcweir        }
587cdf0e10cSrcweir
588cdf0e10cSrcweir        $big5_chars += $big5_row_chars;
589cdf0e10cSrcweir        $big5_data_space[$big5_row]
590cdf0e10cSrcweir            = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2;
591cdf0e10cSrcweir        $big5_data_used[$big5_row]
592cdf0e10cSrcweir            = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ?
593cdf0e10cSrcweir                                          0 : 1 + $big5_row_surrogates))
594cdf0e10cSrcweir                  * 2;
595cdf0e10cSrcweir    }
596cdf0e10cSrcweir    else
597cdf0e10cSrcweir    {
598cdf0e10cSrcweir        print OUT " /* row ", $big5_row, ": --- */\n";
599cdf0e10cSrcweir        $big5_data_offsets[$big5_row] = -1;
600cdf0e10cSrcweir    }
601cdf0e10cSrcweir}
602cdf0e10cSrcweirprint OUT "};\n\n";
603cdf0e10cSrcweirprint "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n";
604cdf0e10cSrcweir
605cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n";
606cdf0e10cSrcweir$big5_rowoffsets_used = 0;
607cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row)
608cdf0e10cSrcweir{
609cdf0e10cSrcweir    if ($big5_data_offsets[$big5_row] == -1)
610cdf0e10cSrcweir    {
611cdf0e10cSrcweir        print OUT "  -1, /* row ", $big5_row, " */\n";
612cdf0e10cSrcweir    }
613cdf0e10cSrcweir    else
614cdf0e10cSrcweir    {
615cdf0e10cSrcweir        print OUT "  ",
616cdf0e10cSrcweir                  $big5_data_offsets[$big5_row],
617cdf0e10cSrcweir                  ", /* row ",
618cdf0e10cSrcweir                  $big5_row,
619cdf0e10cSrcweir                  "; ",
620cdf0e10cSrcweir                  printStats($big5_data_used[$big5_row],
621cdf0e10cSrcweir                             $big5_data_space[$big5_row]),
622cdf0e10cSrcweir                  " */\n";
623cdf0e10cSrcweir        $big5_rowoffsets_used += 4;
624cdf0e10cSrcweir    }
625cdf0e10cSrcweir}
626cdf0e10cSrcweirprint OUT "};\n\n";
627cdf0e10cSrcweir
628cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n";
629cdf0e10cSrcweir$uni_data_index = 0;
630cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
631cdf0e10cSrcweir{
632cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
633cdf0e10cSrcweir    {
634cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
635cdf0e10cSrcweir        {
636cdf0e10cSrcweir            if (defined($uni_page_used[$uni_plane][$uni_page]))
637cdf0e10cSrcweir            {
638cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index;
639cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
640cdf0e10cSrcweir                          " */\n";
641cdf0e10cSrcweir
642cdf0e10cSrcweir                $uni_page_first = -1;
643cdf0e10cSrcweir                for ($uni_index = 0; $uni_index <= 255; ++$uni_index)
644cdf0e10cSrcweir                {
645cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
646cdf0e10cSrcweir                    {
647cdf0e10cSrcweir                        if ($uni_page_first == -1)
648cdf0e10cSrcweir                        {
649cdf0e10cSrcweir                            $uni_page_first = $uni_index;
650cdf0e10cSrcweir                        }
651cdf0e10cSrcweir                        $uni_page_last = $uni_index;
652cdf0e10cSrcweir                    }
653cdf0e10cSrcweir                }
654cdf0e10cSrcweir
655cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] = 0;
656cdf0e10cSrcweir
657cdf0e10cSrcweir                print OUT "  ", $uni_page_first, " | (", $uni_page_last,
658cdf0e10cSrcweir                          " << 8), /* first, last */\n";
659cdf0e10cSrcweir                ++$uni_data_index;
660cdf0e10cSrcweir                $uni_data_used[$uni_plane][$uni_page] += 2;
661cdf0e10cSrcweir
662cdf0e10cSrcweir                print OUT "  ", printSpaces(7, 10, $uni_page_first);
663cdf0e10cSrcweir                $bol = 0;
664cdf0e10cSrcweir                for ($uni_index = $uni_page_first;
665cdf0e10cSrcweir                     $uni_index <= $uni_page_last;
666cdf0e10cSrcweir                     ++$uni_index)
667cdf0e10cSrcweir                {
668cdf0e10cSrcweir                    if ($bol == 1)
669cdf0e10cSrcweir                    {
670cdf0e10cSrcweir                        print OUT "  ";
671cdf0e10cSrcweir                        $bol = 0;
672cdf0e10cSrcweir                    }
673cdf0e10cSrcweir                    if (defined($uni_map[$uni_plane][$uni_page][$uni_index]))
674cdf0e10cSrcweir                    {
675cdf0e10cSrcweir                        $big5 = $uni_map[$uni_plane][$uni_page][$uni_index];
676cdf0e10cSrcweir                        printf OUT "0x%04X,", $big5;
677cdf0e10cSrcweir                        $uni_data_used[$uni_plane][$uni_page] += 2;
678cdf0e10cSrcweir                    }
679cdf0e10cSrcweir                    else
680cdf0e10cSrcweir                    {
681cdf0e10cSrcweir                        print OUT "     0,";
682cdf0e10cSrcweir                    }
683cdf0e10cSrcweir                    ++$uni_data_index;
684cdf0e10cSrcweir                    if ($uni_index % 10 == 9)
685cdf0e10cSrcweir                    {
686cdf0e10cSrcweir                        print OUT "\n";
687cdf0e10cSrcweir                        $bol = 1;
688cdf0e10cSrcweir                    }
689cdf0e10cSrcweir                }
690cdf0e10cSrcweir                if ($bol == 0)
691cdf0e10cSrcweir                {
692cdf0e10cSrcweir                    print OUT "\n";
693cdf0e10cSrcweir                }
694cdf0e10cSrcweir
695cdf0e10cSrcweir                $uni_data_space[$uni_plane][$uni_page]
696cdf0e10cSrcweir                    = ($uni_data_index
697cdf0e10cSrcweir                       - $uni_data_offsets[$uni_plane][$uni_page]) * 2;
698cdf0e10cSrcweir            }
699cdf0e10cSrcweir            else
700cdf0e10cSrcweir            {
701cdf0e10cSrcweir                $uni_data_offsets[$uni_plane][$uni_page] = -1;
702cdf0e10cSrcweir                print OUT " /* plane ", $uni_plane, ", page ", $uni_page,
703cdf0e10cSrcweir                          ": --- */\n";
704cdf0e10cSrcweir            }
705cdf0e10cSrcweir        }
706cdf0e10cSrcweir    }
707cdf0e10cSrcweir    else
708cdf0e10cSrcweir    {
709cdf0e10cSrcweir        print OUT " /* plane ", $uni_plane, ": --- */\n";
710cdf0e10cSrcweir    }
711cdf0e10cSrcweir}
712cdf0e10cSrcweirprint OUT "};\n\n";
713cdf0e10cSrcweir
714cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n";
715cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
716cdf0e10cSrcweir{
717cdf0e10cSrcweir    if (defined($uni_plane_used[$uni_plane]))
718cdf0e10cSrcweir    {
719cdf0e10cSrcweir        $uni_pageoffsets_used[$uni_plane] = 0;
720cdf0e10cSrcweir        $uni_data_used_sum[$uni_plane] = 0;
721cdf0e10cSrcweir        $uni_data_space_sum[$uni_plane] = 0;
722cdf0e10cSrcweir        for ($uni_page = 0; $uni_page <= 255; ++$uni_page)
723cdf0e10cSrcweir        {
724cdf0e10cSrcweir            $offset = $uni_data_offsets[$uni_plane][$uni_page];
725cdf0e10cSrcweir            if ($offset == -1)
726cdf0e10cSrcweir            {
727cdf0e10cSrcweir                print OUT "  -1, /* plane ",
728cdf0e10cSrcweir                          $uni_plane,
729cdf0e10cSrcweir                          ", page ",
730cdf0e10cSrcweir                          $uni_page,
731cdf0e10cSrcweir                          " */\n";
732cdf0e10cSrcweir            }
733cdf0e10cSrcweir            else
734cdf0e10cSrcweir            {
735cdf0e10cSrcweir                print OUT "  ",
736cdf0e10cSrcweir                          $offset,
737cdf0e10cSrcweir                          ", /* plane ",
738cdf0e10cSrcweir                          $uni_plane,
739cdf0e10cSrcweir                          ", page ",
740cdf0e10cSrcweir                          $uni_page,
741cdf0e10cSrcweir                          "; ",
742cdf0e10cSrcweir                          printStats($uni_data_used[$uni_plane][$uni_page],
743cdf0e10cSrcweir                                     $uni_data_space[$uni_plane][$uni_page]),
744cdf0e10cSrcweir                          " */\n";
745cdf0e10cSrcweir                $uni_pageoffsets_used[$uni_plane] += 4;
746cdf0e10cSrcweir                $uni_data_used_sum[$uni_plane]
747cdf0e10cSrcweir                    += $uni_data_used[$uni_plane][$uni_page];
748cdf0e10cSrcweir                $uni_data_space_sum[$uni_plane]
749cdf0e10cSrcweir                    += $uni_data_space[$uni_plane][$uni_page];
750cdf0e10cSrcweir            }
751cdf0e10cSrcweir        }
752cdf0e10cSrcweir    }
753cdf0e10cSrcweir    else
754cdf0e10cSrcweir    {
755cdf0e10cSrcweir        print OUT "  /* plane ", $uni_plane, ": --- */\n";
756cdf0e10cSrcweir    }
757cdf0e10cSrcweir}
758cdf0e10cSrcweirprint OUT "};\n\n";
759cdf0e10cSrcweir
760cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo",
761cdf0e10cSrcweir          $id,
762cdf0e10cSrcweir          "PlaneOffsets[] = {\n";
763cdf0e10cSrcweir$uni_page_offset = 0;
764cdf0e10cSrcweir$uni_planeoffsets_used = 0;
765cdf0e10cSrcweir$uni_pageoffsets_used_sum = 0;
766cdf0e10cSrcweir$uni_pageoffsets_space_sum = 0;
767cdf0e10cSrcweir$uni_data_used_sum2 = 0;
768cdf0e10cSrcweir$uni_data_space_sum2 = 0;
769cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane)
770cdf0e10cSrcweir{
771cdf0e10cSrcweir    if (defined ($uni_plane_used[$uni_plane]))
772cdf0e10cSrcweir    {
773cdf0e10cSrcweir        print OUT "  ",
774cdf0e10cSrcweir                  $uni_page_offset++,
775cdf0e10cSrcweir                  " * 256, /* plane ",
776cdf0e10cSrcweir                  $uni_plane,
777cdf0e10cSrcweir                  "; ",
778cdf0e10cSrcweir                  printStats($uni_pageoffsets_used[$uni_plane], 256 * 4),
779cdf0e10cSrcweir                  ", ",
780cdf0e10cSrcweir                  printStats($uni_data_used_sum[$uni_plane],
781cdf0e10cSrcweir                             $uni_data_space_sum[$uni_plane]),
782cdf0e10cSrcweir                  " */\n";
783cdf0e10cSrcweir        $uni_planeoffsets_used += 4;
784cdf0e10cSrcweir        $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane];
785cdf0e10cSrcweir        $uni_pageoffsets_space_sum += 256 * 4;
786cdf0e10cSrcweir        $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane];
787cdf0e10cSrcweir        $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane];
788cdf0e10cSrcweir    }
789cdf0e10cSrcweir    else
790cdf0e10cSrcweir    {
791cdf0e10cSrcweir        print OUT "  -1, /* plane ", $uni_plane, " */\n";
792cdf0e10cSrcweir    }
793cdf0e10cSrcweir}
794cdf0e10cSrcweirprint OUT " /* ",
795cdf0e10cSrcweir          printStats($uni_planeoffsets_used, 17 * 4),
796cdf0e10cSrcweir          ", ",
797cdf0e10cSrcweir          printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum),
798cdf0e10cSrcweir          ", ",
799cdf0e10cSrcweir          printStats($uni_data_used_sum2, $uni_data_space_sum2),
800cdf0e10cSrcweir          " */\n};\n";
801cdf0e10cSrcweir
802cdf0e10cSrcweirclose OUT;
803