1*b1cdbd2cSJim Jagielski#!/usr/bin/perl 2*b1cdbd2cSJim Jagielski#************************************************************** 3*b1cdbd2cSJim Jagielski# 4*b1cdbd2cSJim Jagielski# Licensed to the Apache Software Foundation (ASF) under one 5*b1cdbd2cSJim Jagielski# or more contributor license agreements. See the NOTICE file 6*b1cdbd2cSJim Jagielski# distributed with this work for additional information 7*b1cdbd2cSJim Jagielski# regarding copyright ownership. The ASF licenses this file 8*b1cdbd2cSJim Jagielski# to you under the Apache License, Version 2.0 (the 9*b1cdbd2cSJim Jagielski# "License"); you may not use this file except in compliance 10*b1cdbd2cSJim Jagielski# with the License. You may obtain a copy of the License at 11*b1cdbd2cSJim Jagielski# 12*b1cdbd2cSJim Jagielski# http://www.apache.org/licenses/LICENSE-2.0 13*b1cdbd2cSJim Jagielski# 14*b1cdbd2cSJim Jagielski# Unless required by applicable law or agreed to in writing, 15*b1cdbd2cSJim Jagielski# software distributed under the License is distributed on an 16*b1cdbd2cSJim Jagielski# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17*b1cdbd2cSJim Jagielski# KIND, either express or implied. See the License for the 18*b1cdbd2cSJim Jagielski# specific language governing permissions and limitations 19*b1cdbd2cSJim Jagielski# under the License. 20*b1cdbd2cSJim Jagielski# 21*b1cdbd2cSJim Jagielski#************************************************************** 22*b1cdbd2cSJim Jagielski 23*b1cdbd2cSJim Jagielski 24*b1cdbd2cSJim Jagielski 25*b1cdbd2cSJim Jagielski# The following files must be available in a ./input subdir: 26*b1cdbd2cSJim Jagielski 27*b1cdbd2cSJim Jagielski# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt> 28*b1cdbd2cSJim Jagielski 29*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>: 30*b1cdbd2cSJim Jagielski# "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994" 31*b1cdbd2cSJim Jagielski# Only used to track Unicode characters that are mapped from both Big5 and 32*b1cdbd2cSJim Jagielski# HKSCS. 33*b1cdbd2cSJim Jagielski 34*b1cdbd2cSJim Jagielski# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>: 35*b1cdbd2cSJim Jagielski# "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000" 36*b1cdbd2cSJim Jagielski# Only used to track Unicode characters that are mapped from both CP950 and 37*b1cdbd2cSJim Jagielski# HKSCS. 38*b1cdbd2cSJim Jagielski 39*b1cdbd2cSJim Jagielski$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0 40*b1cdbd2cSJim Jagielski 41*b1cdbd2cSJim Jagielski$id = "Big5Hkscs2001"; 42*b1cdbd2cSJim Jagielski 43*b1cdbd2cSJim Jagielskisub isValidUtf32 44*b1cdbd2cSJim Jagielski{ 45*b1cdbd2cSJim Jagielski my $utf32 = $_[0]; 46*b1cdbd2cSJim Jagielski return $utf32 >= 0 && $utf32 <= 0x10FFFF 47*b1cdbd2cSJim Jagielski && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 48*b1cdbd2cSJim Jagielski && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 49*b1cdbd2cSJim Jagielski && ($utf32 & 0xFFFF) < 0xFFFE; 50*b1cdbd2cSJim Jagielski} 51*b1cdbd2cSJim Jagielski 52*b1cdbd2cSJim Jagielskisub printUtf32 53*b1cdbd2cSJim Jagielski{ 54*b1cdbd2cSJim Jagielski my $utf32 = $_[0]; 55*b1cdbd2cSJim Jagielski return sprintf("U+%04X", $utf32); 56*b1cdbd2cSJim Jagielski} 57*b1cdbd2cSJim Jagielski 58*b1cdbd2cSJim Jagielskisub isValidBig5 59*b1cdbd2cSJim Jagielski{ 60*b1cdbd2cSJim Jagielski my $big5 = $_[0]; 61*b1cdbd2cSJim Jagielski my $big5_row = $big5 >> 8; 62*b1cdbd2cSJim Jagielski my $big5_column = $big5 & 0xFF; 63*b1cdbd2cSJim Jagielski return $big5_row >= 0x81 && $big5_row <= 0xFE 64*b1cdbd2cSJim Jagielski && ($big5_column >= 0x40 && $big5_column <= 0x7E 65*b1cdbd2cSJim Jagielski || $big5_column >= 0xA1 && $big5_column <= 0xFE); 66*b1cdbd2cSJim Jagielski} 67*b1cdbd2cSJim Jagielski 68*b1cdbd2cSJim Jagielskisub printBig5 69*b1cdbd2cSJim Jagielski{ 70*b1cdbd2cSJim Jagielski my $big5 = $_[0]; 71*b1cdbd2cSJim Jagielski return sprintf("%04X", $big5); 72*b1cdbd2cSJim Jagielski} 73*b1cdbd2cSJim Jagielski 74*b1cdbd2cSJim Jagielskisub printStats 75*b1cdbd2cSJim Jagielski{ 76*b1cdbd2cSJim Jagielski my $used = $_[0]; 77*b1cdbd2cSJim Jagielski my $space = $_[1]; 78*b1cdbd2cSJim Jagielski return sprintf("%d/%d bytes (%.1f%%)", 79*b1cdbd2cSJim Jagielski $used, 80*b1cdbd2cSJim Jagielski $space, 81*b1cdbd2cSJim Jagielski $used * 100 / $space); 82*b1cdbd2cSJim Jagielski} 83*b1cdbd2cSJim Jagielski 84*b1cdbd2cSJim Jagielskisub printSpaces 85*b1cdbd2cSJim Jagielski{ 86*b1cdbd2cSJim Jagielski my $column_width = $_[0]; 87*b1cdbd2cSJim Jagielski my $columns_per_line = $_[1]; 88*b1cdbd2cSJim Jagielski my $end = $_[2]; 89*b1cdbd2cSJim Jagielski $output = ""; 90*b1cdbd2cSJim Jagielski for ($i = int($end / $columns_per_line) * $columns_per_line; 91*b1cdbd2cSJim Jagielski $i < $end; 92*b1cdbd2cSJim Jagielski ++$i) 93*b1cdbd2cSJim Jagielski { 94*b1cdbd2cSJim Jagielski for ($j = 0; $j < $column_width; ++$j) 95*b1cdbd2cSJim Jagielski { 96*b1cdbd2cSJim Jagielski $output = $output . " "; 97*b1cdbd2cSJim Jagielski } 98*b1cdbd2cSJim Jagielski } 99*b1cdbd2cSJim Jagielski return $output; 100*b1cdbd2cSJim Jagielski} 101*b1cdbd2cSJim Jagielski 102*b1cdbd2cSJim Jagielskisub addMapping 103*b1cdbd2cSJim Jagielski{ 104*b1cdbd2cSJim Jagielski my $utf32 = $_[0]; 105*b1cdbd2cSJim Jagielski my $big5 = $_[1]; 106*b1cdbd2cSJim Jagielski my $comp = $_[2]; 107*b1cdbd2cSJim Jagielski 108*b1cdbd2cSJim Jagielski $uni_plane = $utf32 >> 16; 109*b1cdbd2cSJim Jagielski $uni_page = ($utf32 >> 8) & 0xFF; 110*b1cdbd2cSJim Jagielski $uni_index = $utf32 & 0xFF; 111*b1cdbd2cSJim Jagielski 112*b1cdbd2cSJim Jagielski if (!defined($uni_plane_used[$uni_plane]) 113*b1cdbd2cSJim Jagielski || !defined($uni_page_used[$uni_plane][$uni_page]) 114*b1cdbd2cSJim Jagielski || !defined($uni_map[$uni_plane][$uni_page][$uni_index])) 115*b1cdbd2cSJim Jagielski { 116*b1cdbd2cSJim Jagielski $uni_map[$uni_plane][$uni_page][$uni_index] = $big5; 117*b1cdbd2cSJim Jagielski $uni_plane_used[$uni_plane] = 1; 118*b1cdbd2cSJim Jagielski $uni_page_used[$uni_plane][$uni_page] = 1; 119*b1cdbd2cSJim Jagielski if ($comp != -1) 120*b1cdbd2cSJim Jagielski { 121*b1cdbd2cSJim Jagielski ++$compat[$comp]; 122*b1cdbd2cSJim Jagielski } 123*b1cdbd2cSJim Jagielski } 124*b1cdbd2cSJim Jagielski else 125*b1cdbd2cSJim Jagielski { 126*b1cdbd2cSJim Jagielski $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 127*b1cdbd2cSJim Jagielski print "WARNING! Mapping ", printUtf32($utf32), " to ", 128*b1cdbd2cSJim Jagielski printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""), 129*b1cdbd2cSJim Jagielski printBig5($big5), "\n"; 130*b1cdbd2cSJim Jagielski } 131*b1cdbd2cSJim Jagielski} 132*b1cdbd2cSJim Jagielski 133*b1cdbd2cSJim Jagielski# Build mappings to track Unicode characters that are mapped from both Big5/ 134*b1cdbd2cSJim Jagielski# CP950 and HKSCS: 135*b1cdbd2cSJim Jagielski{ 136*b1cdbd2cSJim Jagielski $filename = "BIG5.TXT"; 137*b1cdbd2cSJim Jagielski open IN, ("input/" . $filename) or die "Cannot read " . $filename; 138*b1cdbd2cSJim Jagielski while (<IN>) 139*b1cdbd2cSJim Jagielski { 140*b1cdbd2cSJim Jagielski if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 141*b1cdbd2cSJim Jagielski { 142*b1cdbd2cSJim Jagielski $big5 = oct($1); 143*b1cdbd2cSJim Jagielski $utf32 = oct($2); 144*b1cdbd2cSJim Jagielski isValidBig5($big5) 145*b1cdbd2cSJim Jagielski or die "Bad Big5 char " . printBig5($big5); 146*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 147*b1cdbd2cSJim Jagielski or die "Bad UTF32 char " . printUtf32($utf32); 148*b1cdbd2cSJim Jagielski if ($utf32 != 0xFFFD) 149*b1cdbd2cSJim Jagielski { 150*b1cdbd2cSJim Jagielski if (defined($underlying_big5[$utf32])) 151*b1cdbd2cSJim Jagielski { 152*b1cdbd2cSJim Jagielski print "WARNING! In ", $filename, ", both ", 153*b1cdbd2cSJim Jagielski printBig5($underlying_big5[$utf32]), " and ", 154*b1cdbd2cSJim Jagielski printBig5($big5), " map to ", printUtf32($utf32), 155*b1cdbd2cSJim Jagielski "\n"; 156*b1cdbd2cSJim Jagielski } 157*b1cdbd2cSJim Jagielski else 158*b1cdbd2cSJim Jagielski { 159*b1cdbd2cSJim Jagielski $underlying_big5[$utf32] = $big5; 160*b1cdbd2cSJim Jagielski } 161*b1cdbd2cSJim Jagielski } 162*b1cdbd2cSJim Jagielski } 163*b1cdbd2cSJim Jagielski } 164*b1cdbd2cSJim Jagielski close IN; 165*b1cdbd2cSJim Jagielski 166*b1cdbd2cSJim Jagielski $filename = "CP950.TXT"; 167*b1cdbd2cSJim Jagielski open IN, ("input/" . $filename) or die "Cannot read " . $filename; 168*b1cdbd2cSJim Jagielski while (<IN>) 169*b1cdbd2cSJim Jagielski { 170*b1cdbd2cSJim Jagielski if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 171*b1cdbd2cSJim Jagielski { 172*b1cdbd2cSJim Jagielski $big5 = oct($1); 173*b1cdbd2cSJim Jagielski $utf32 = oct($2); 174*b1cdbd2cSJim Jagielski isValidBig5($big5) 175*b1cdbd2cSJim Jagielski or die "Bad Big5 char " . printBig5($big5); 176*b1cdbd2cSJim Jagielski isValidUtf32($utf32) 177*b1cdbd2cSJim Jagielski or die "Bad UTF32 char " . printUtf32($utf32); 178*b1cdbd2cSJim Jagielski if (defined($underlying_cp950[$utf32])) 179*b1cdbd2cSJim Jagielski { 180*b1cdbd2cSJim Jagielski print "WARNING! In ", $filename, ", both ", 181*b1cdbd2cSJim Jagielski printBig5($underlying_cp950[$utf32]), " and ", 182*b1cdbd2cSJim Jagielski printBig5($big5), " map to ", printUtf32($utf32), "\n"; 183*b1cdbd2cSJim Jagielski } 184*b1cdbd2cSJim Jagielski else 185*b1cdbd2cSJim Jagielski { 186*b1cdbd2cSJim Jagielski $underlying_cp950[$utf32] = $big5; 187*b1cdbd2cSJim Jagielski } 188*b1cdbd2cSJim Jagielski } 189*b1cdbd2cSJim Jagielski } 190*b1cdbd2cSJim Jagielski close IN; 191*b1cdbd2cSJim Jagielski} 192*b1cdbd2cSJim Jagielski 193*b1cdbd2cSJim Jagielski# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some 194*b1cdbd2cSJim Jagielski# nonstandard Unicode points, so they are explicitly mentioned here to map 195*b1cdbd2cSJim Jagielski# to the standard Unicode PUA points. (In the other direction, the unofficial 196*b1cdbd2cSJim Jagielski# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless, 197*b1cdbd2cSJim Jagielski# since all Unicode characters involved are already covered by the official 198*b1cdbd2cSJim Jagielski# Big5-HKSCS mappings.) 199*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1); 200*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1); 201*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1); 202*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1); 203*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1); 204*b1cdbd2cSJim Jagielski$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1); 205*b1cdbd2cSJim Jagielski 206*b1cdbd2cSJim Jagielski# The following implements the mapping of Big5-HKSCS compatibility points 207*b1cdbd2cSJim Jagielski# (GCCS characters unified with other HKSCS characters) to Unicode. In the 208*b1cdbd2cSJim Jagielski# other direction, characters from Unicode's PUA will map to these Big5-HKSCS 209*b1cdbd2cSJim Jagielski# compatibility points. (See the first list in <http://www.info.gov.hk/ 210*b1cdbd2cSJim Jagielski# digital21/eng/hkscs/download/big5cmp.txt>.) 211*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x69] = 0x7BB8; 212*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x6F] = 0x7C06; 213*b1cdbd2cSJim Jagielski$big5_map[0x8E][0x7E] = 0x7CCE; 214*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xAB] = 0x7DD2; 215*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xB4] = 0x7E1D; 216*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xCD] = 0x8005; 217*b1cdbd2cSJim Jagielski$big5_map[0x8E][0xD0] = 0x8028; 218*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x57] = 0x83C1; 219*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x69] = 0x84A8; 220*b1cdbd2cSJim Jagielski$big5_map[0x8F][0x6E] = 0x840F; 221*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xCB] = 0x89A6; 222*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xCC] = 0x89A9; 223*b1cdbd2cSJim Jagielski$big5_map[0x8F][0xFE] = 0x8D77; 224*b1cdbd2cSJim Jagielski$big5_map[0x90][0x6D] = 0x90FD; 225*b1cdbd2cSJim Jagielski$big5_map[0x90][0x7A] = 0x92B9; 226*b1cdbd2cSJim Jagielski$big5_map[0x90][0xDC] = 0x975C; 227*b1cdbd2cSJim Jagielski$big5_map[0x90][0xF1] = 0x97FF; 228*b1cdbd2cSJim Jagielski$big5_map[0x91][0xBF] = 0x9F16; 229*b1cdbd2cSJim Jagielski$big5_map[0x92][0x44] = 0x8503; 230*b1cdbd2cSJim Jagielski$big5_map[0x92][0xAF] = 0x5159; 231*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB0] = 0x515B; 232*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB1] = 0x515D; 233*b1cdbd2cSJim Jagielski$big5_map[0x92][0xB2] = 0x515E; 234*b1cdbd2cSJim Jagielski$big5_map[0x92][0xC8] = 0x936E; 235*b1cdbd2cSJim Jagielski$big5_map[0x92][0xD1] = 0x7479; 236*b1cdbd2cSJim Jagielski$big5_map[0x94][0x47] = 0x6D67; 237*b1cdbd2cSJim Jagielski$big5_map[0x94][0xCA] = 0x799B; 238*b1cdbd2cSJim Jagielski$big5_map[0x95][0xD9] = 0x9097; 239*b1cdbd2cSJim Jagielski$big5_map[0x96][0x44] = 0x975D; 240*b1cdbd2cSJim Jagielski$big5_map[0x96][0xED] = 0x701E; 241*b1cdbd2cSJim Jagielski$big5_map[0x96][0xFC] = 0x5B28; 242*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x76] = 0x7201; 243*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x78] = 0x77D7; 244*b1cdbd2cSJim Jagielski$big5_map[0x9B][0x7B] = 0x7E87; 245*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xC6] = 0x99D6; 246*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xDE] = 0x91D4; 247*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xEC] = 0x60DE; 248*b1cdbd2cSJim Jagielski$big5_map[0x9B][0xF6] = 0x6FB6; 249*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x42] = 0x8F36; 250*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x53] = 0x4FBB; 251*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x62] = 0x71DF; 252*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x68] = 0x9104; 253*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x6B] = 0x9DF0; 254*b1cdbd2cSJim Jagielski$big5_map[0x9C][0x77] = 0x83CF; 255*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xBC] = 0x5C10; 256*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xBD] = 0x79E3; 257*b1cdbd2cSJim Jagielski$big5_map[0x9C][0xD0] = 0x5A67; 258*b1cdbd2cSJim Jagielski$big5_map[0x9D][0x57] = 0x8F0B; 259*b1cdbd2cSJim Jagielski$big5_map[0x9D][0x5A] = 0x7B51; 260*b1cdbd2cSJim Jagielski$big5_map[0x9D][0xC4] = 0x62D0; 261*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xA9] = 0x6062; 262*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xEF] = 0x75F9; 263*b1cdbd2cSJim Jagielski$big5_map[0x9E][0xFD] = 0x6C4A; 264*b1cdbd2cSJim Jagielski$big5_map[0x9F][0x60] = 0x9B2E; 265*b1cdbd2cSJim Jagielski$big5_map[0x9F][0x66] = 0x9F17; 266*b1cdbd2cSJim Jagielski$big5_map[0x9F][0xCB] = 0x50ED; 267*b1cdbd2cSJim Jagielski$big5_map[0x9F][0xD8] = 0x5F0C; 268*b1cdbd2cSJim Jagielski$big5_map[0xA0][0x63] = 0x880F; 269*b1cdbd2cSJim Jagielski$big5_map[0xA0][0x77] = 0x62CE; 270*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xD5] = 0x7468; 271*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xDF] = 0x7162; 272*b1cdbd2cSJim Jagielski$big5_map[0xA0][0xE4] = 0x7250; 273*b1cdbd2cSJim Jagielski$big5_map[0xFA][0x5F] = 0x5029; 274*b1cdbd2cSJim Jagielski$big5_map[0xFA][0x66] = 0x507D; 275*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xBD] = 0x5305; 276*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xC5] = 0x5344; 277*b1cdbd2cSJim Jagielski$big5_map[0xFA][0xD5] = 0x537F; 278*b1cdbd2cSJim Jagielski$big5_map[0xFB][0x48] = 0x5605; 279*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xB8] = 0x5A77; 280*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xF3] = 0x5E75; 281*b1cdbd2cSJim Jagielski$big5_map[0xFB][0xF9] = 0x5ED0; 282*b1cdbd2cSJim Jagielski$big5_map[0xFC][0x4F] = 0x5F58; 283*b1cdbd2cSJim Jagielski$big5_map[0xFC][0x6C] = 0x60A4; 284*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xB9] = 0x6490; 285*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xE2] = 0x6674; 286*b1cdbd2cSJim Jagielski$big5_map[0xFC][0xF1] = 0x675E; 287*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xB7] = 0x6C9C; 288*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xB8] = 0x6E1D; 289*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xBB] = 0x6E2F; 290*b1cdbd2cSJim Jagielski$big5_map[0xFD][0xF1] = 0x716E; 291*b1cdbd2cSJim Jagielski$big5_map[0xFE][0x52] = 0x732A; 292*b1cdbd2cSJim Jagielski$big5_map[0xFE][0x6F] = 0x745C; 293*b1cdbd2cSJim Jagielski$big5_map[0xFE][0xAA] = 0x74E9; 294*b1cdbd2cSJim Jagielski$big5_map[0xFE][0xDD] = 0x7809; 295*b1cdbd2cSJim Jagielski 296*b1cdbd2cSJim Jagielski$pua = 0; 297*b1cdbd2cSJim Jagielski$compat[0] = 0; # 1993 298*b1cdbd2cSJim Jagielski$compat[1] = 0; # 2000 299*b1cdbd2cSJim Jagielski$compat[2] = 0; # 2001 300*b1cdbd2cSJim Jagielski 301*b1cdbd2cSJim Jagielski$filename = "big5-iso.txt"; 302*b1cdbd2cSJim Jagielskiopen IN, ("input/" . $filename) or die "Cannot read " . $filename; 303*b1cdbd2cSJim Jagielskiwhile (<IN>) 304*b1cdbd2cSJim Jagielski{ 305*b1cdbd2cSJim Jagielski if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/) 306*b1cdbd2cSJim Jagielski { 307*b1cdbd2cSJim Jagielski $big5 = oct("0x" . $1); 308*b1cdbd2cSJim Jagielski $utf32_1993 = oct("0x" . $2); 309*b1cdbd2cSJim Jagielski $utf32_2000 = oct("0x" . $3); 310*b1cdbd2cSJim Jagielski $utf32_2001 = oct("0x" . $4); 311*b1cdbd2cSJim Jagielski isValidBig5($big5) 312*b1cdbd2cSJim Jagielski or die "Bad Big5 char " . printBig5($big5); 313*b1cdbd2cSJim Jagielski isValidUtf32($utf32_1993) 314*b1cdbd2cSJim Jagielski or die "Bad UTF32 char " . printUtf32($utf32_1993); 315*b1cdbd2cSJim Jagielski isValidUtf32($utf32_2000) 316*b1cdbd2cSJim Jagielski or die "Bad UTF32 char " . printUtf32($utf32_2000); 317*b1cdbd2cSJim Jagielski isValidUtf32($utf32_2001) 318*b1cdbd2cSJim Jagielski or die "Bad UTF32 char " . printUtf32($utf32_2001); 319*b1cdbd2cSJim Jagielski 320*b1cdbd2cSJim Jagielski $utf32 = $surrogates ? $utf32_2001 : $utf32_2000; 321*b1cdbd2cSJim Jagielski 322*b1cdbd2cSJim Jagielski if (defined($underlying_big5[$utf32]) 323*b1cdbd2cSJim Jagielski || defined($underlying_cp950[$utf32])) 324*b1cdbd2cSJim Jagielski { 325*b1cdbd2cSJim Jagielski if (defined($underlying_big5[$utf32]) 326*b1cdbd2cSJim Jagielski && defined($underlying_cp950[$utf32]) 327*b1cdbd2cSJim Jagielski && $underlying_big5[$utf32] == $underlying_cp950[$utf32] 328*b1cdbd2cSJim Jagielski && $underlying_big5[$utf32] == $big5 329*b1cdbd2cSJim Jagielski || 330*b1cdbd2cSJim Jagielski defined($underlying_big5[$utf32]) 331*b1cdbd2cSJim Jagielski && !defined($underlying_cp950[$utf32]) 332*b1cdbd2cSJim Jagielski && $underlying_big5[$utf32] == $big5 333*b1cdbd2cSJim Jagielski || 334*b1cdbd2cSJim Jagielski !defined($underlying_big5[$utf32]) 335*b1cdbd2cSJim Jagielski && defined($underlying_cp950[$utf32]) 336*b1cdbd2cSJim Jagielski && $underlying_cp950[$utf32] == $big5) 337*b1cdbd2cSJim Jagielski { 338*b1cdbd2cSJim Jagielski # ignore 339*b1cdbd2cSJim Jagielski 340*b1cdbd2cSJim Jagielski # Depending on real underlying mapping (cf. 341*b1cdbd2cSJim Jagielski # ../convertbig5hkscs.tab), it would be possible to save some 342*b1cdbd2cSJim Jagielski # table space by dropping those HKSCS code points that are 343*b1cdbd2cSJim Jagielski # already covered by the underlying mapping. 344*b1cdbd2cSJim Jagielski } 345*b1cdbd2cSJim Jagielski else 346*b1cdbd2cSJim Jagielski { 347*b1cdbd2cSJim Jagielski print "XXX mapping underlying"; 348*b1cdbd2cSJim Jagielski if (defined($underlying_big5[$utf32]) 349*b1cdbd2cSJim Jagielski && defined($underlying_cp950[$utf32]) 350*b1cdbd2cSJim Jagielski && $underlying_big5[$utf32] == $underlying_cp950[$utf32]) 351*b1cdbd2cSJim Jagielski { 352*b1cdbd2cSJim Jagielski print " Big5/CP950 ", printBig5($underlying_big5[$utf32]); 353*b1cdbd2cSJim Jagielski } 354*b1cdbd2cSJim Jagielski else 355*b1cdbd2cSJim Jagielski { 356*b1cdbd2cSJim Jagielski if (defined($underlying_big5[$utf32])) 357*b1cdbd2cSJim Jagielski { 358*b1cdbd2cSJim Jagielski print " Big5 ", printBig5($underlying_big5[$utf32]); 359*b1cdbd2cSJim Jagielski } 360*b1cdbd2cSJim Jagielski if (defined($underlying_cp950[$utf32])) 361*b1cdbd2cSJim Jagielski { 362*b1cdbd2cSJim Jagielski print " CP950 ", printBig5($underlying_cp950[$utf32]); 363*b1cdbd2cSJim Jagielski } 364*b1cdbd2cSJim Jagielski } 365*b1cdbd2cSJim Jagielski print " and HKSCS ", printBig5($big5), " to ", 366*b1cdbd2cSJim Jagielski printUtf32($utf32), "\n"; 367*b1cdbd2cSJim Jagielski } 368*b1cdbd2cSJim Jagielski } 369*b1cdbd2cSJim Jagielski 370*b1cdbd2cSJim Jagielski if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF) 371*b1cdbd2cSJim Jagielski { 372*b1cdbd2cSJim Jagielski ++$pua; 373*b1cdbd2cSJim Jagielski } 374*b1cdbd2cSJim Jagielski 375*b1cdbd2cSJim Jagielski $big5_row = $big5 >> 8; 376*b1cdbd2cSJim Jagielski $big5_column = $big5 & 0xFF; 377*b1cdbd2cSJim Jagielski if (defined($big5_map[$big5_row][$big5_column])) 378*b1cdbd2cSJim Jagielski { 379*b1cdbd2cSJim Jagielski die "Bad Big5 mapping " . printBig5($big5); 380*b1cdbd2cSJim Jagielski } 381*b1cdbd2cSJim Jagielski $big5_map[$big5_row][$big5_column] = $utf32; 382*b1cdbd2cSJim Jagielski 383*b1cdbd2cSJim Jagielski addMapping($utf32, $big5, -1); 384*b1cdbd2cSJim Jagielski 385*b1cdbd2cSJim Jagielski if ($utf32_2001 != $utf32) 386*b1cdbd2cSJim Jagielski { 387*b1cdbd2cSJim Jagielski addMapping($utf32_2001, $big5, 2); 388*b1cdbd2cSJim Jagielski } 389*b1cdbd2cSJim Jagielski if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001) 390*b1cdbd2cSJim Jagielski { 391*b1cdbd2cSJim Jagielski addMapping($utf32_2000, $big5, 1); 392*b1cdbd2cSJim Jagielski } 393*b1cdbd2cSJim Jagielski if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000 394*b1cdbd2cSJim Jagielski && $utf32_1993 != $utf32_2001) 395*b1cdbd2cSJim Jagielski { 396*b1cdbd2cSJim Jagielski addMapping($utf32_1993, $big5, 0); 397*b1cdbd2cSJim Jagielski } 398*b1cdbd2cSJim Jagielski } 399*b1cdbd2cSJim Jagielski} 400*b1cdbd2cSJim Jagielskiclose IN; 401*b1cdbd2cSJim Jagielski 402*b1cdbd2cSJim Jagielskiprint $pua, " mappings to PUA\n"; 403*b1cdbd2cSJim Jagielskiprint $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0); 404*b1cdbd2cSJim Jagielskiprint $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0); 405*b1cdbd2cSJim Jagielskiprint $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0); 406*b1cdbd2cSJim Jagielski 407*b1cdbd2cSJim Jagielskiif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 408*b1cdbd2cSJim Jagielski{ 409*b1cdbd2cSJim Jagielski for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 410*b1cdbd2cSJim Jagielski { 411*b1cdbd2cSJim Jagielski if (defined($uni_map[0][0][$uni_index])) 412*b1cdbd2cSJim Jagielski { 413*b1cdbd2cSJim Jagielski $big5 = $uni_map[0][0][$utf32]; 414*b1cdbd2cSJim Jagielski die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5); 415*b1cdbd2cSJim Jagielski } 416*b1cdbd2cSJim Jagielski } 417*b1cdbd2cSJim Jagielski} 418*b1cdbd2cSJim Jagielski 419*b1cdbd2cSJim Jagielski$filename = lc($id) . ".tab"; 420*b1cdbd2cSJim Jagielskiopen OUT, ("> " . $filename) or die "Cannot write " . $filename; 421*b1cdbd2cSJim Jagielski 422*b1cdbd2cSJim Jagielski{ 423*b1cdbd2cSJim Jagielski $filename = lc($id). ".pl"; 424*b1cdbd2cSJim Jagielski open IN, $filename or die "Cannot read ". $filename; 425*b1cdbd2cSJim Jagielski $first = 1; 426*b1cdbd2cSJim Jagielski while (<IN>) 427*b1cdbd2cSJim Jagielski { 428*b1cdbd2cSJim Jagielski if (/^\#!.*$/) 429*b1cdbd2cSJim Jagielski { 430*b1cdbd2cSJim Jagielski } 431*b1cdbd2cSJim Jagielski elsif (/^\#(\*.*)$/) 432*b1cdbd2cSJim Jagielski { 433*b1cdbd2cSJim Jagielski if ($first == 1) 434*b1cdbd2cSJim Jagielski { 435*b1cdbd2cSJim Jagielski print OUT "/", $1, "\n"; 436*b1cdbd2cSJim Jagielski $first = 0; 437*b1cdbd2cSJim Jagielski } 438*b1cdbd2cSJim Jagielski else 439*b1cdbd2cSJim Jagielski { 440*b1cdbd2cSJim Jagielski print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 441*b1cdbd2cSJim Jagielski } 442*b1cdbd2cSJim Jagielski } 443*b1cdbd2cSJim Jagielski elsif (/^\# (.*)$/) 444*b1cdbd2cSJim Jagielski { 445*b1cdbd2cSJim Jagielski print OUT " *", $1, "\n"; 446*b1cdbd2cSJim Jagielski } 447*b1cdbd2cSJim Jagielski elsif (/^\#(.*)$/) 448*b1cdbd2cSJim Jagielski { 449*b1cdbd2cSJim Jagielski print OUT " *", $1, "\n"; 450*b1cdbd2cSJim Jagielski } 451*b1cdbd2cSJim Jagielski else 452*b1cdbd2cSJim Jagielski { 453*b1cdbd2cSJim Jagielski goto done; 454*b1cdbd2cSJim Jagielski } 455*b1cdbd2cSJim Jagielski } 456*b1cdbd2cSJim Jagielski done: 457*b1cdbd2cSJim Jagielski} 458*b1cdbd2cSJim Jagielski 459*b1cdbd2cSJim Jagielskiprint OUT "\n", 460*b1cdbd2cSJim Jagielski "#ifndef _SAL_TYPES_H_\n", 461*b1cdbd2cSJim Jagielski "#include \"sal/types.h\"\n", 462*b1cdbd2cSJim Jagielski "#endif\n", 463*b1cdbd2cSJim Jagielski "\n"; 464*b1cdbd2cSJim Jagielski 465*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 466*b1cdbd2cSJim Jagielski$big5_data_index = 0; 467*b1cdbd2cSJim Jagielski$big5_rows = 0; 468*b1cdbd2cSJim Jagielski$big5_chars = 0; 469*b1cdbd2cSJim Jagielskifor ($big5_row = 0; $big5_row <= 255; ++$big5_row) 470*b1cdbd2cSJim Jagielski{ 471*b1cdbd2cSJim Jagielski $big5_row_first = -1; 472*b1cdbd2cSJim Jagielski for ($big5_column = 0; $big5_column <= 255; ++$big5_column) 473*b1cdbd2cSJim Jagielski { 474*b1cdbd2cSJim Jagielski if (defined($big5_map[$big5_row][$big5_column])) 475*b1cdbd2cSJim Jagielski { 476*b1cdbd2cSJim Jagielski if ($big5_row_first == -1) 477*b1cdbd2cSJim Jagielski { 478*b1cdbd2cSJim Jagielski $big5_row_first = $big5_column; 479*b1cdbd2cSJim Jagielski } 480*b1cdbd2cSJim Jagielski $big5_row_last = $big5_column; 481*b1cdbd2cSJim Jagielski } 482*b1cdbd2cSJim Jagielski } 483*b1cdbd2cSJim Jagielski if ($big5_row_first != -1) 484*b1cdbd2cSJim Jagielski { 485*b1cdbd2cSJim Jagielski $big5_data_offsets[$big5_row] = $big5_data_index; 486*b1cdbd2cSJim Jagielski ++$big5_rows; 487*b1cdbd2cSJim Jagielski print OUT " /* row ", $big5_row, " */\n"; 488*b1cdbd2cSJim Jagielski 489*b1cdbd2cSJim Jagielski $big5_row_surrogates_first = -1; 490*b1cdbd2cSJim Jagielski $big5_row_chars = 0; 491*b1cdbd2cSJim Jagielski $big5_row_surrogates = 0; 492*b1cdbd2cSJim Jagielski 493*b1cdbd2cSJim Jagielski print OUT " ", $big5_row_first, " | (", $big5_row_last, 494*b1cdbd2cSJim Jagielski " << 8), /* first, last */\n"; 495*b1cdbd2cSJim Jagielski ++$big5_data_index; 496*b1cdbd2cSJim Jagielski 497*b1cdbd2cSJim Jagielski print OUT " ", printSpaces(7, 10, $big5_row_first); 498*b1cdbd2cSJim Jagielski $bol = 0; 499*b1cdbd2cSJim Jagielski for ($big5_column = $big5_row_first; 500*b1cdbd2cSJim Jagielski $big5_column <= $big5_row_last; 501*b1cdbd2cSJim Jagielski ++$big5_column) 502*b1cdbd2cSJim Jagielski { 503*b1cdbd2cSJim Jagielski if ($bol == 1) 504*b1cdbd2cSJim Jagielski { 505*b1cdbd2cSJim Jagielski print OUT " "; 506*b1cdbd2cSJim Jagielski $bol = 0; 507*b1cdbd2cSJim Jagielski } 508*b1cdbd2cSJim Jagielski if (defined($big5_map[$big5_row][$big5_column])) 509*b1cdbd2cSJim Jagielski { 510*b1cdbd2cSJim Jagielski $utf32 = $big5_map[$big5_row][$big5_column]; 511*b1cdbd2cSJim Jagielski ++$big5_row_chars; 512*b1cdbd2cSJim Jagielski if ($utf32 <= 0xFFFF) 513*b1cdbd2cSJim Jagielski { 514*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", $utf32; 515*b1cdbd2cSJim Jagielski } 516*b1cdbd2cSJim Jagielski else 517*b1cdbd2cSJim Jagielski { 518*b1cdbd2cSJim Jagielski ++$big5_row_surrogates; 519*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", 520*b1cdbd2cSJim Jagielski (0xD800 | (($utf32 - 0x10000) >> 10)); 521*b1cdbd2cSJim Jagielski if ($big5_row_surrogates_first == -1) 522*b1cdbd2cSJim Jagielski { 523*b1cdbd2cSJim Jagielski $big5_row_surrogates_first = $big5_column; 524*b1cdbd2cSJim Jagielski } 525*b1cdbd2cSJim Jagielski $big5_row_surrogates_last = $big5_column; 526*b1cdbd2cSJim Jagielski } 527*b1cdbd2cSJim Jagielski } 528*b1cdbd2cSJim Jagielski else 529*b1cdbd2cSJim Jagielski { 530*b1cdbd2cSJim Jagielski printf OUT "0xffff,"; 531*b1cdbd2cSJim Jagielski } 532*b1cdbd2cSJim Jagielski ++$big5_data_index; 533*b1cdbd2cSJim Jagielski if ($big5_column % 10 == 9) 534*b1cdbd2cSJim Jagielski { 535*b1cdbd2cSJim Jagielski print OUT "\n"; 536*b1cdbd2cSJim Jagielski $bol = 1; 537*b1cdbd2cSJim Jagielski } 538*b1cdbd2cSJim Jagielski } 539*b1cdbd2cSJim Jagielski if ($bol == 0) 540*b1cdbd2cSJim Jagielski { 541*b1cdbd2cSJim Jagielski print OUT "\n"; 542*b1cdbd2cSJim Jagielski } 543*b1cdbd2cSJim Jagielski 544*b1cdbd2cSJim Jagielski if ($big5_row_surrogates_first != -1) 545*b1cdbd2cSJim Jagielski { 546*b1cdbd2cSJim Jagielski print OUT " ", $big5_row_surrogates_first, 547*b1cdbd2cSJim Jagielski ", /* first low-surrogate */\n"; 548*b1cdbd2cSJim Jagielski ++$big5_data_index; 549*b1cdbd2cSJim Jagielski 550*b1cdbd2cSJim Jagielski print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first); 551*b1cdbd2cSJim Jagielski $bol = 0; 552*b1cdbd2cSJim Jagielski for ($big5_column = $big5_row_surrogates_first; 553*b1cdbd2cSJim Jagielski $big5_column <= $big5_row_surrogates_last; 554*b1cdbd2cSJim Jagielski ++$big5_column) 555*b1cdbd2cSJim Jagielski { 556*b1cdbd2cSJim Jagielski if ($bol == 1) 557*b1cdbd2cSJim Jagielski { 558*b1cdbd2cSJim Jagielski print OUT " "; 559*b1cdbd2cSJim Jagielski $bol = 0; 560*b1cdbd2cSJim Jagielski } 561*b1cdbd2cSJim Jagielski $utf32 = 0; 562*b1cdbd2cSJim Jagielski if (defined($big5_map[$big5_row][$big5_column])) 563*b1cdbd2cSJim Jagielski { 564*b1cdbd2cSJim Jagielski $utf32 = $big5_map[$big5_row][$big5_column]; 565*b1cdbd2cSJim Jagielski } 566*b1cdbd2cSJim Jagielski if ($utf32 <= 0xFFFF) 567*b1cdbd2cSJim Jagielski { 568*b1cdbd2cSJim Jagielski printf OUT " 0,"; 569*b1cdbd2cSJim Jagielski } 570*b1cdbd2cSJim Jagielski else 571*b1cdbd2cSJim Jagielski { 572*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", 573*b1cdbd2cSJim Jagielski (0xDC00 | (($utf32 - 0x10000) & 0x3FF)); 574*b1cdbd2cSJim Jagielski } 575*b1cdbd2cSJim Jagielski ++$big5_data_index; 576*b1cdbd2cSJim Jagielski if ($big5_column % 10 == 9) 577*b1cdbd2cSJim Jagielski { 578*b1cdbd2cSJim Jagielski print OUT "\n"; 579*b1cdbd2cSJim Jagielski $bol = 1; 580*b1cdbd2cSJim Jagielski } 581*b1cdbd2cSJim Jagielski } 582*b1cdbd2cSJim Jagielski if ($bol == 0) 583*b1cdbd2cSJim Jagielski { 584*b1cdbd2cSJim Jagielski print OUT "\n"; 585*b1cdbd2cSJim Jagielski } 586*b1cdbd2cSJim Jagielski } 587*b1cdbd2cSJim Jagielski 588*b1cdbd2cSJim Jagielski $big5_chars += $big5_row_chars; 589*b1cdbd2cSJim Jagielski $big5_data_space[$big5_row] 590*b1cdbd2cSJim Jagielski = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2; 591*b1cdbd2cSJim Jagielski $big5_data_used[$big5_row] 592*b1cdbd2cSJim Jagielski = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ? 593*b1cdbd2cSJim Jagielski 0 : 1 + $big5_row_surrogates)) 594*b1cdbd2cSJim Jagielski * 2; 595*b1cdbd2cSJim Jagielski } 596*b1cdbd2cSJim Jagielski else 597*b1cdbd2cSJim Jagielski { 598*b1cdbd2cSJim Jagielski print OUT " /* row ", $big5_row, ": --- */\n"; 599*b1cdbd2cSJim Jagielski $big5_data_offsets[$big5_row] = -1; 600*b1cdbd2cSJim Jagielski } 601*b1cdbd2cSJim Jagielski} 602*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 603*b1cdbd2cSJim Jagielskiprint "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n"; 604*b1cdbd2cSJim Jagielski 605*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 606*b1cdbd2cSJim Jagielski$big5_rowoffsets_used = 0; 607*b1cdbd2cSJim Jagielskifor ($big5_row = 0; $big5_row <= 255; ++$big5_row) 608*b1cdbd2cSJim Jagielski{ 609*b1cdbd2cSJim Jagielski if ($big5_data_offsets[$big5_row] == -1) 610*b1cdbd2cSJim Jagielski { 611*b1cdbd2cSJim Jagielski print OUT " -1, /* row ", $big5_row, " */\n"; 612*b1cdbd2cSJim Jagielski } 613*b1cdbd2cSJim Jagielski else 614*b1cdbd2cSJim Jagielski { 615*b1cdbd2cSJim Jagielski print OUT " ", 616*b1cdbd2cSJim Jagielski $big5_data_offsets[$big5_row], 617*b1cdbd2cSJim Jagielski ", /* row ", 618*b1cdbd2cSJim Jagielski $big5_row, 619*b1cdbd2cSJim Jagielski "; ", 620*b1cdbd2cSJim Jagielski printStats($big5_data_used[$big5_row], 621*b1cdbd2cSJim Jagielski $big5_data_space[$big5_row]), 622*b1cdbd2cSJim Jagielski " */\n"; 623*b1cdbd2cSJim Jagielski $big5_rowoffsets_used += 4; 624*b1cdbd2cSJim Jagielski } 625*b1cdbd2cSJim Jagielski} 626*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 627*b1cdbd2cSJim Jagielski 628*b1cdbd2cSJim Jagielskiprint OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n"; 629*b1cdbd2cSJim Jagielski$uni_data_index = 0; 630*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 631*b1cdbd2cSJim Jagielski{ 632*b1cdbd2cSJim Jagielski if (defined($uni_plane_used[$uni_plane])) 633*b1cdbd2cSJim Jagielski { 634*b1cdbd2cSJim Jagielski for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 635*b1cdbd2cSJim Jagielski { 636*b1cdbd2cSJim Jagielski if (defined($uni_page_used[$uni_plane][$uni_page])) 637*b1cdbd2cSJim Jagielski { 638*b1cdbd2cSJim Jagielski $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 639*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 640*b1cdbd2cSJim Jagielski " */\n"; 641*b1cdbd2cSJim Jagielski 642*b1cdbd2cSJim Jagielski $uni_page_first = -1; 643*b1cdbd2cSJim Jagielski for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 644*b1cdbd2cSJim Jagielski { 645*b1cdbd2cSJim Jagielski if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 646*b1cdbd2cSJim Jagielski { 647*b1cdbd2cSJim Jagielski if ($uni_page_first == -1) 648*b1cdbd2cSJim Jagielski { 649*b1cdbd2cSJim Jagielski $uni_page_first = $uni_index; 650*b1cdbd2cSJim Jagielski } 651*b1cdbd2cSJim Jagielski $uni_page_last = $uni_index; 652*b1cdbd2cSJim Jagielski } 653*b1cdbd2cSJim Jagielski } 654*b1cdbd2cSJim Jagielski 655*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] = 0; 656*b1cdbd2cSJim Jagielski 657*b1cdbd2cSJim Jagielski print OUT " ", $uni_page_first, " | (", $uni_page_last, 658*b1cdbd2cSJim Jagielski " << 8), /* first, last */\n"; 659*b1cdbd2cSJim Jagielski ++$uni_data_index; 660*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] += 2; 661*b1cdbd2cSJim Jagielski 662*b1cdbd2cSJim Jagielski print OUT " ", printSpaces(7, 10, $uni_page_first); 663*b1cdbd2cSJim Jagielski $bol = 0; 664*b1cdbd2cSJim Jagielski for ($uni_index = $uni_page_first; 665*b1cdbd2cSJim Jagielski $uni_index <= $uni_page_last; 666*b1cdbd2cSJim Jagielski ++$uni_index) 667*b1cdbd2cSJim Jagielski { 668*b1cdbd2cSJim Jagielski if ($bol == 1) 669*b1cdbd2cSJim Jagielski { 670*b1cdbd2cSJim Jagielski print OUT " "; 671*b1cdbd2cSJim Jagielski $bol = 0; 672*b1cdbd2cSJim Jagielski } 673*b1cdbd2cSJim Jagielski if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 674*b1cdbd2cSJim Jagielski { 675*b1cdbd2cSJim Jagielski $big5 = $uni_map[$uni_plane][$uni_page][$uni_index]; 676*b1cdbd2cSJim Jagielski printf OUT "0x%04X,", $big5; 677*b1cdbd2cSJim Jagielski $uni_data_used[$uni_plane][$uni_page] += 2; 678*b1cdbd2cSJim Jagielski } 679*b1cdbd2cSJim Jagielski else 680*b1cdbd2cSJim Jagielski { 681*b1cdbd2cSJim Jagielski print OUT " 0,"; 682*b1cdbd2cSJim Jagielski } 683*b1cdbd2cSJim Jagielski ++$uni_data_index; 684*b1cdbd2cSJim Jagielski if ($uni_index % 10 == 9) 685*b1cdbd2cSJim Jagielski { 686*b1cdbd2cSJim Jagielski print OUT "\n"; 687*b1cdbd2cSJim Jagielski $bol = 1; 688*b1cdbd2cSJim Jagielski } 689*b1cdbd2cSJim Jagielski } 690*b1cdbd2cSJim Jagielski if ($bol == 0) 691*b1cdbd2cSJim Jagielski { 692*b1cdbd2cSJim Jagielski print OUT "\n"; 693*b1cdbd2cSJim Jagielski } 694*b1cdbd2cSJim Jagielski 695*b1cdbd2cSJim Jagielski $uni_data_space[$uni_plane][$uni_page] 696*b1cdbd2cSJim Jagielski = ($uni_data_index 697*b1cdbd2cSJim Jagielski - $uni_data_offsets[$uni_plane][$uni_page]) * 2; 698*b1cdbd2cSJim Jagielski } 699*b1cdbd2cSJim Jagielski else 700*b1cdbd2cSJim Jagielski { 701*b1cdbd2cSJim Jagielski $uni_data_offsets[$uni_plane][$uni_page] = -1; 702*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 703*b1cdbd2cSJim Jagielski ": --- */\n"; 704*b1cdbd2cSJim Jagielski } 705*b1cdbd2cSJim Jagielski } 706*b1cdbd2cSJim Jagielski } 707*b1cdbd2cSJim Jagielski else 708*b1cdbd2cSJim Jagielski { 709*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ": --- */\n"; 710*b1cdbd2cSJim Jagielski } 711*b1cdbd2cSJim Jagielski} 712*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 713*b1cdbd2cSJim Jagielski 714*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 715*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 716*b1cdbd2cSJim Jagielski{ 717*b1cdbd2cSJim Jagielski if (defined($uni_plane_used[$uni_plane])) 718*b1cdbd2cSJim Jagielski { 719*b1cdbd2cSJim Jagielski $uni_pageoffsets_used[$uni_plane] = 0; 720*b1cdbd2cSJim Jagielski $uni_data_used_sum[$uni_plane] = 0; 721*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane] = 0; 722*b1cdbd2cSJim Jagielski for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 723*b1cdbd2cSJim Jagielski { 724*b1cdbd2cSJim Jagielski $offset = $uni_data_offsets[$uni_plane][$uni_page]; 725*b1cdbd2cSJim Jagielski if ($offset == -1) 726*b1cdbd2cSJim Jagielski { 727*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", 728*b1cdbd2cSJim Jagielski $uni_plane, 729*b1cdbd2cSJim Jagielski ", page ", 730*b1cdbd2cSJim Jagielski $uni_page, 731*b1cdbd2cSJim Jagielski " */\n"; 732*b1cdbd2cSJim Jagielski } 733*b1cdbd2cSJim Jagielski else 734*b1cdbd2cSJim Jagielski { 735*b1cdbd2cSJim Jagielski print OUT " ", 736*b1cdbd2cSJim Jagielski $offset, 737*b1cdbd2cSJim Jagielski ", /* plane ", 738*b1cdbd2cSJim Jagielski $uni_plane, 739*b1cdbd2cSJim Jagielski ", page ", 740*b1cdbd2cSJim Jagielski $uni_page, 741*b1cdbd2cSJim Jagielski "; ", 742*b1cdbd2cSJim Jagielski printStats($uni_data_used[$uni_plane][$uni_page], 743*b1cdbd2cSJim Jagielski $uni_data_space[$uni_plane][$uni_page]), 744*b1cdbd2cSJim Jagielski " */\n"; 745*b1cdbd2cSJim Jagielski $uni_pageoffsets_used[$uni_plane] += 4; 746*b1cdbd2cSJim Jagielski $uni_data_used_sum[$uni_plane] 747*b1cdbd2cSJim Jagielski += $uni_data_used[$uni_plane][$uni_page]; 748*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane] 749*b1cdbd2cSJim Jagielski += $uni_data_space[$uni_plane][$uni_page]; 750*b1cdbd2cSJim Jagielski } 751*b1cdbd2cSJim Jagielski } 752*b1cdbd2cSJim Jagielski } 753*b1cdbd2cSJim Jagielski else 754*b1cdbd2cSJim Jagielski { 755*b1cdbd2cSJim Jagielski print OUT " /* plane ", $uni_plane, ": --- */\n"; 756*b1cdbd2cSJim Jagielski } 757*b1cdbd2cSJim Jagielski} 758*b1cdbd2cSJim Jagielskiprint OUT "};\n\n"; 759*b1cdbd2cSJim Jagielski 760*b1cdbd2cSJim Jagielskiprint OUT "static sal_Int32 const aImplUnicodeTo", 761*b1cdbd2cSJim Jagielski $id, 762*b1cdbd2cSJim Jagielski "PlaneOffsets[] = {\n"; 763*b1cdbd2cSJim Jagielski$uni_page_offset = 0; 764*b1cdbd2cSJim Jagielski$uni_planeoffsets_used = 0; 765*b1cdbd2cSJim Jagielski$uni_pageoffsets_used_sum = 0; 766*b1cdbd2cSJim Jagielski$uni_pageoffsets_space_sum = 0; 767*b1cdbd2cSJim Jagielski$uni_data_used_sum2 = 0; 768*b1cdbd2cSJim Jagielski$uni_data_space_sum2 = 0; 769*b1cdbd2cSJim Jagielskifor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 770*b1cdbd2cSJim Jagielski{ 771*b1cdbd2cSJim Jagielski if (defined ($uni_plane_used[$uni_plane])) 772*b1cdbd2cSJim Jagielski { 773*b1cdbd2cSJim Jagielski print OUT " ", 774*b1cdbd2cSJim Jagielski $uni_page_offset++, 775*b1cdbd2cSJim Jagielski " * 256, /* plane ", 776*b1cdbd2cSJim Jagielski $uni_plane, 777*b1cdbd2cSJim Jagielski "; ", 778*b1cdbd2cSJim Jagielski printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 779*b1cdbd2cSJim Jagielski ", ", 780*b1cdbd2cSJim Jagielski printStats($uni_data_used_sum[$uni_plane], 781*b1cdbd2cSJim Jagielski $uni_data_space_sum[$uni_plane]), 782*b1cdbd2cSJim Jagielski " */\n"; 783*b1cdbd2cSJim Jagielski $uni_planeoffsets_used += 4; 784*b1cdbd2cSJim Jagielski $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 785*b1cdbd2cSJim Jagielski $uni_pageoffsets_space_sum += 256 * 4; 786*b1cdbd2cSJim Jagielski $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 787*b1cdbd2cSJim Jagielski $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 788*b1cdbd2cSJim Jagielski } 789*b1cdbd2cSJim Jagielski else 790*b1cdbd2cSJim Jagielski { 791*b1cdbd2cSJim Jagielski print OUT " -1, /* plane ", $uni_plane, " */\n"; 792*b1cdbd2cSJim Jagielski } 793*b1cdbd2cSJim Jagielski} 794*b1cdbd2cSJim Jagielskiprint OUT " /* ", 795*b1cdbd2cSJim Jagielski printStats($uni_planeoffsets_used, 17 * 4), 796*b1cdbd2cSJim Jagielski ", ", 797*b1cdbd2cSJim Jagielski printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 798*b1cdbd2cSJim Jagielski ", ", 799*b1cdbd2cSJim Jagielski printStats($uni_data_used_sum2, $uni_data_space_sum2), 800*b1cdbd2cSJim Jagielski " */\n};\n"; 801*b1cdbd2cSJim Jagielski 802*b1cdbd2cSJim Jagielskiclose OUT; 803