1*cdf0e10cSrcweir#!/usr/bin/perl 2*cdf0e10cSrcweir#************************************************************************* 3*cdf0e10cSrcweir# 4*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5*cdf0e10cSrcweir# 6*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates. 7*cdf0e10cSrcweir# 8*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite 9*cdf0e10cSrcweir# 10*cdf0e10cSrcweir# This file is part of OpenOffice.org. 11*cdf0e10cSrcweir# 12*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify 13*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3 14*cdf0e10cSrcweir# only, as published by the Free Software Foundation. 15*cdf0e10cSrcweir# 16*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful, 17*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of 18*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details 20*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code). 21*cdf0e10cSrcweir# 22*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License 23*cdf0e10cSrcweir# version 3 along with OpenOffice.org. If not, see 24*cdf0e10cSrcweir# <http://www.openoffice.org/license.html> 25*cdf0e10cSrcweir# for a copy of the LGPLv3 License. 26*cdf0e10cSrcweir# 27*cdf0e10cSrcweir#************************************************************************* 28*cdf0e10cSrcweir 29*cdf0e10cSrcweir# The following files must be available in a ./input subdir: 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt> 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>: 34*cdf0e10cSrcweir# "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994" 35*cdf0e10cSrcweir# Only used to track Unicode characters that are mapped from both Big5 and 36*cdf0e10cSrcweir# HKSCS. 37*cdf0e10cSrcweir 38*cdf0e10cSrcweir# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>: 39*cdf0e10cSrcweir# "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000" 40*cdf0e10cSrcweir# Only used to track Unicode characters that are mapped from both CP950 and 41*cdf0e10cSrcweir# HKSCS. 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0 44*cdf0e10cSrcweir 45*cdf0e10cSrcweir$id = "Big5Hkscs2001"; 46*cdf0e10cSrcweir 47*cdf0e10cSrcweirsub isValidUtf32 48*cdf0e10cSrcweir{ 49*cdf0e10cSrcweir my $utf32 = $_[0]; 50*cdf0e10cSrcweir return $utf32 >= 0 && $utf32 <= 0x10FFFF 51*cdf0e10cSrcweir && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 52*cdf0e10cSrcweir && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 53*cdf0e10cSrcweir && ($utf32 & 0xFFFF) < 0xFFFE; 54*cdf0e10cSrcweir} 55*cdf0e10cSrcweir 56*cdf0e10cSrcweirsub printUtf32 57*cdf0e10cSrcweir{ 58*cdf0e10cSrcweir my $utf32 = $_[0]; 59*cdf0e10cSrcweir return sprintf("U+%04X", $utf32); 60*cdf0e10cSrcweir} 61*cdf0e10cSrcweir 62*cdf0e10cSrcweirsub isValidBig5 63*cdf0e10cSrcweir{ 64*cdf0e10cSrcweir my $big5 = $_[0]; 65*cdf0e10cSrcweir my $big5_row = $big5 >> 8; 66*cdf0e10cSrcweir my $big5_column = $big5 & 0xFF; 67*cdf0e10cSrcweir return $big5_row >= 0x81 && $big5_row <= 0xFE 68*cdf0e10cSrcweir && ($big5_column >= 0x40 && $big5_column <= 0x7E 69*cdf0e10cSrcweir || $big5_column >= 0xA1 && $big5_column <= 0xFE); 70*cdf0e10cSrcweir} 71*cdf0e10cSrcweir 72*cdf0e10cSrcweirsub printBig5 73*cdf0e10cSrcweir{ 74*cdf0e10cSrcweir my $big5 = $_[0]; 75*cdf0e10cSrcweir return sprintf("%04X", $big5); 76*cdf0e10cSrcweir} 77*cdf0e10cSrcweir 78*cdf0e10cSrcweirsub printStats 79*cdf0e10cSrcweir{ 80*cdf0e10cSrcweir my $used = $_[0]; 81*cdf0e10cSrcweir my $space = $_[1]; 82*cdf0e10cSrcweir return sprintf("%d/%d bytes (%.1f%%)", 83*cdf0e10cSrcweir $used, 84*cdf0e10cSrcweir $space, 85*cdf0e10cSrcweir $used * 100 / $space); 86*cdf0e10cSrcweir} 87*cdf0e10cSrcweir 88*cdf0e10cSrcweirsub printSpaces 89*cdf0e10cSrcweir{ 90*cdf0e10cSrcweir my $column_width = $_[0]; 91*cdf0e10cSrcweir my $columns_per_line = $_[1]; 92*cdf0e10cSrcweir my $end = $_[2]; 93*cdf0e10cSrcweir $output = ""; 94*cdf0e10cSrcweir for ($i = int($end / $columns_per_line) * $columns_per_line; 95*cdf0e10cSrcweir $i < $end; 96*cdf0e10cSrcweir ++$i) 97*cdf0e10cSrcweir { 98*cdf0e10cSrcweir for ($j = 0; $j < $column_width; ++$j) 99*cdf0e10cSrcweir { 100*cdf0e10cSrcweir $output = $output . " "; 101*cdf0e10cSrcweir } 102*cdf0e10cSrcweir } 103*cdf0e10cSrcweir return $output; 104*cdf0e10cSrcweir} 105*cdf0e10cSrcweir 106*cdf0e10cSrcweirsub addMapping 107*cdf0e10cSrcweir{ 108*cdf0e10cSrcweir my $utf32 = $_[0]; 109*cdf0e10cSrcweir my $big5 = $_[1]; 110*cdf0e10cSrcweir my $comp = $_[2]; 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir $uni_plane = $utf32 >> 16; 113*cdf0e10cSrcweir $uni_page = ($utf32 >> 8) & 0xFF; 114*cdf0e10cSrcweir $uni_index = $utf32 & 0xFF; 115*cdf0e10cSrcweir 116*cdf0e10cSrcweir if (!defined($uni_plane_used[$uni_plane]) 117*cdf0e10cSrcweir || !defined($uni_page_used[$uni_plane][$uni_page]) 118*cdf0e10cSrcweir || !defined($uni_map[$uni_plane][$uni_page][$uni_index])) 119*cdf0e10cSrcweir { 120*cdf0e10cSrcweir $uni_map[$uni_plane][$uni_page][$uni_index] = $big5; 121*cdf0e10cSrcweir $uni_plane_used[$uni_plane] = 1; 122*cdf0e10cSrcweir $uni_page_used[$uni_plane][$uni_page] = 1; 123*cdf0e10cSrcweir if ($comp != -1) 124*cdf0e10cSrcweir { 125*cdf0e10cSrcweir ++$compat[$comp]; 126*cdf0e10cSrcweir } 127*cdf0e10cSrcweir } 128*cdf0e10cSrcweir else 129*cdf0e10cSrcweir { 130*cdf0e10cSrcweir $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 131*cdf0e10cSrcweir print "WARNING! Mapping ", printUtf32($utf32), " to ", 132*cdf0e10cSrcweir printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""), 133*cdf0e10cSrcweir printBig5($big5), "\n"; 134*cdf0e10cSrcweir } 135*cdf0e10cSrcweir} 136*cdf0e10cSrcweir 137*cdf0e10cSrcweir# Build mappings to track Unicode characters that are mapped from both Big5/ 138*cdf0e10cSrcweir# CP950 and HKSCS: 139*cdf0e10cSrcweir{ 140*cdf0e10cSrcweir $filename = "BIG5.TXT"; 141*cdf0e10cSrcweir open IN, ("input/" . $filename) or die "Cannot read " . $filename; 142*cdf0e10cSrcweir while (<IN>) 143*cdf0e10cSrcweir { 144*cdf0e10cSrcweir if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 145*cdf0e10cSrcweir { 146*cdf0e10cSrcweir $big5 = oct($1); 147*cdf0e10cSrcweir $utf32 = oct($2); 148*cdf0e10cSrcweir isValidBig5($big5) 149*cdf0e10cSrcweir or die "Bad Big5 char " . printBig5($big5); 150*cdf0e10cSrcweir isValidUtf32($utf32) 151*cdf0e10cSrcweir or die "Bad UTF32 char " . printUtf32($utf32); 152*cdf0e10cSrcweir if ($utf32 != 0xFFFD) 153*cdf0e10cSrcweir { 154*cdf0e10cSrcweir if (defined($underlying_big5[$utf32])) 155*cdf0e10cSrcweir { 156*cdf0e10cSrcweir print "WARNING! In ", $filename, ", both ", 157*cdf0e10cSrcweir printBig5($underlying_big5[$utf32]), " and ", 158*cdf0e10cSrcweir printBig5($big5), " map to ", printUtf32($utf32), 159*cdf0e10cSrcweir "\n"; 160*cdf0e10cSrcweir } 161*cdf0e10cSrcweir else 162*cdf0e10cSrcweir { 163*cdf0e10cSrcweir $underlying_big5[$utf32] = $big5; 164*cdf0e10cSrcweir } 165*cdf0e10cSrcweir } 166*cdf0e10cSrcweir } 167*cdf0e10cSrcweir } 168*cdf0e10cSrcweir close IN; 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir $filename = "CP950.TXT"; 171*cdf0e10cSrcweir open IN, ("input/" . $filename) or die "Cannot read " . $filename; 172*cdf0e10cSrcweir while (<IN>) 173*cdf0e10cSrcweir { 174*cdf0e10cSrcweir if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir $big5 = oct($1); 177*cdf0e10cSrcweir $utf32 = oct($2); 178*cdf0e10cSrcweir isValidBig5($big5) 179*cdf0e10cSrcweir or die "Bad Big5 char " . printBig5($big5); 180*cdf0e10cSrcweir isValidUtf32($utf32) 181*cdf0e10cSrcweir or die "Bad UTF32 char " . printUtf32($utf32); 182*cdf0e10cSrcweir if (defined($underlying_cp950[$utf32])) 183*cdf0e10cSrcweir { 184*cdf0e10cSrcweir print "WARNING! In ", $filename, ", both ", 185*cdf0e10cSrcweir printBig5($underlying_cp950[$utf32]), " and ", 186*cdf0e10cSrcweir printBig5($big5), " map to ", printUtf32($utf32), "\n"; 187*cdf0e10cSrcweir } 188*cdf0e10cSrcweir else 189*cdf0e10cSrcweir { 190*cdf0e10cSrcweir $underlying_cp950[$utf32] = $big5; 191*cdf0e10cSrcweir } 192*cdf0e10cSrcweir } 193*cdf0e10cSrcweir } 194*cdf0e10cSrcweir close IN; 195*cdf0e10cSrcweir} 196*cdf0e10cSrcweir 197*cdf0e10cSrcweir# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some 198*cdf0e10cSrcweir# nonstandard Unicode points, so they are explicitly mentioned here to map 199*cdf0e10cSrcweir# to the standard Unicode PUA points. (In the other direction, the unofficial 200*cdf0e10cSrcweir# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless, 201*cdf0e10cSrcweir# since all Unicode characters involved are already covered by the official 202*cdf0e10cSrcweir# Big5-HKSCS mappings.) 203*cdf0e10cSrcweir$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1); 204*cdf0e10cSrcweir$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1); 205*cdf0e10cSrcweir$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1); 206*cdf0e10cSrcweir$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1); 207*cdf0e10cSrcweir$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1); 208*cdf0e10cSrcweir$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1); 209*cdf0e10cSrcweir 210*cdf0e10cSrcweir# The following implements the mapping of Big5-HKSCS compatibility points 211*cdf0e10cSrcweir# (GCCS characters unified with other HKSCS characters) to Unicode. In the 212*cdf0e10cSrcweir# other direction, characters from Unicode's PUA will map to these Big5-HKSCS 213*cdf0e10cSrcweir# compatibility points. (See the first list in <http://www.info.gov.hk/ 214*cdf0e10cSrcweir# digital21/eng/hkscs/download/big5cmp.txt>.) 215*cdf0e10cSrcweir$big5_map[0x8E][0x69] = 0x7BB8; 216*cdf0e10cSrcweir$big5_map[0x8E][0x6F] = 0x7C06; 217*cdf0e10cSrcweir$big5_map[0x8E][0x7E] = 0x7CCE; 218*cdf0e10cSrcweir$big5_map[0x8E][0xAB] = 0x7DD2; 219*cdf0e10cSrcweir$big5_map[0x8E][0xB4] = 0x7E1D; 220*cdf0e10cSrcweir$big5_map[0x8E][0xCD] = 0x8005; 221*cdf0e10cSrcweir$big5_map[0x8E][0xD0] = 0x8028; 222*cdf0e10cSrcweir$big5_map[0x8F][0x57] = 0x83C1; 223*cdf0e10cSrcweir$big5_map[0x8F][0x69] = 0x84A8; 224*cdf0e10cSrcweir$big5_map[0x8F][0x6E] = 0x840F; 225*cdf0e10cSrcweir$big5_map[0x8F][0xCB] = 0x89A6; 226*cdf0e10cSrcweir$big5_map[0x8F][0xCC] = 0x89A9; 227*cdf0e10cSrcweir$big5_map[0x8F][0xFE] = 0x8D77; 228*cdf0e10cSrcweir$big5_map[0x90][0x6D] = 0x90FD; 229*cdf0e10cSrcweir$big5_map[0x90][0x7A] = 0x92B9; 230*cdf0e10cSrcweir$big5_map[0x90][0xDC] = 0x975C; 231*cdf0e10cSrcweir$big5_map[0x90][0xF1] = 0x97FF; 232*cdf0e10cSrcweir$big5_map[0x91][0xBF] = 0x9F16; 233*cdf0e10cSrcweir$big5_map[0x92][0x44] = 0x8503; 234*cdf0e10cSrcweir$big5_map[0x92][0xAF] = 0x5159; 235*cdf0e10cSrcweir$big5_map[0x92][0xB0] = 0x515B; 236*cdf0e10cSrcweir$big5_map[0x92][0xB1] = 0x515D; 237*cdf0e10cSrcweir$big5_map[0x92][0xB2] = 0x515E; 238*cdf0e10cSrcweir$big5_map[0x92][0xC8] = 0x936E; 239*cdf0e10cSrcweir$big5_map[0x92][0xD1] = 0x7479; 240*cdf0e10cSrcweir$big5_map[0x94][0x47] = 0x6D67; 241*cdf0e10cSrcweir$big5_map[0x94][0xCA] = 0x799B; 242*cdf0e10cSrcweir$big5_map[0x95][0xD9] = 0x9097; 243*cdf0e10cSrcweir$big5_map[0x96][0x44] = 0x975D; 244*cdf0e10cSrcweir$big5_map[0x96][0xED] = 0x701E; 245*cdf0e10cSrcweir$big5_map[0x96][0xFC] = 0x5B28; 246*cdf0e10cSrcweir$big5_map[0x9B][0x76] = 0x7201; 247*cdf0e10cSrcweir$big5_map[0x9B][0x78] = 0x77D7; 248*cdf0e10cSrcweir$big5_map[0x9B][0x7B] = 0x7E87; 249*cdf0e10cSrcweir$big5_map[0x9B][0xC6] = 0x99D6; 250*cdf0e10cSrcweir$big5_map[0x9B][0xDE] = 0x91D4; 251*cdf0e10cSrcweir$big5_map[0x9B][0xEC] = 0x60DE; 252*cdf0e10cSrcweir$big5_map[0x9B][0xF6] = 0x6FB6; 253*cdf0e10cSrcweir$big5_map[0x9C][0x42] = 0x8F36; 254*cdf0e10cSrcweir$big5_map[0x9C][0x53] = 0x4FBB; 255*cdf0e10cSrcweir$big5_map[0x9C][0x62] = 0x71DF; 256*cdf0e10cSrcweir$big5_map[0x9C][0x68] = 0x9104; 257*cdf0e10cSrcweir$big5_map[0x9C][0x6B] = 0x9DF0; 258*cdf0e10cSrcweir$big5_map[0x9C][0x77] = 0x83CF; 259*cdf0e10cSrcweir$big5_map[0x9C][0xBC] = 0x5C10; 260*cdf0e10cSrcweir$big5_map[0x9C][0xBD] = 0x79E3; 261*cdf0e10cSrcweir$big5_map[0x9C][0xD0] = 0x5A67; 262*cdf0e10cSrcweir$big5_map[0x9D][0x57] = 0x8F0B; 263*cdf0e10cSrcweir$big5_map[0x9D][0x5A] = 0x7B51; 264*cdf0e10cSrcweir$big5_map[0x9D][0xC4] = 0x62D0; 265*cdf0e10cSrcweir$big5_map[0x9E][0xA9] = 0x6062; 266*cdf0e10cSrcweir$big5_map[0x9E][0xEF] = 0x75F9; 267*cdf0e10cSrcweir$big5_map[0x9E][0xFD] = 0x6C4A; 268*cdf0e10cSrcweir$big5_map[0x9F][0x60] = 0x9B2E; 269*cdf0e10cSrcweir$big5_map[0x9F][0x66] = 0x9F17; 270*cdf0e10cSrcweir$big5_map[0x9F][0xCB] = 0x50ED; 271*cdf0e10cSrcweir$big5_map[0x9F][0xD8] = 0x5F0C; 272*cdf0e10cSrcweir$big5_map[0xA0][0x63] = 0x880F; 273*cdf0e10cSrcweir$big5_map[0xA0][0x77] = 0x62CE; 274*cdf0e10cSrcweir$big5_map[0xA0][0xD5] = 0x7468; 275*cdf0e10cSrcweir$big5_map[0xA0][0xDF] = 0x7162; 276*cdf0e10cSrcweir$big5_map[0xA0][0xE4] = 0x7250; 277*cdf0e10cSrcweir$big5_map[0xFA][0x5F] = 0x5029; 278*cdf0e10cSrcweir$big5_map[0xFA][0x66] = 0x507D; 279*cdf0e10cSrcweir$big5_map[0xFA][0xBD] = 0x5305; 280*cdf0e10cSrcweir$big5_map[0xFA][0xC5] = 0x5344; 281*cdf0e10cSrcweir$big5_map[0xFA][0xD5] = 0x537F; 282*cdf0e10cSrcweir$big5_map[0xFB][0x48] = 0x5605; 283*cdf0e10cSrcweir$big5_map[0xFB][0xB8] = 0x5A77; 284*cdf0e10cSrcweir$big5_map[0xFB][0xF3] = 0x5E75; 285*cdf0e10cSrcweir$big5_map[0xFB][0xF9] = 0x5ED0; 286*cdf0e10cSrcweir$big5_map[0xFC][0x4F] = 0x5F58; 287*cdf0e10cSrcweir$big5_map[0xFC][0x6C] = 0x60A4; 288*cdf0e10cSrcweir$big5_map[0xFC][0xB9] = 0x6490; 289*cdf0e10cSrcweir$big5_map[0xFC][0xE2] = 0x6674; 290*cdf0e10cSrcweir$big5_map[0xFC][0xF1] = 0x675E; 291*cdf0e10cSrcweir$big5_map[0xFD][0xB7] = 0x6C9C; 292*cdf0e10cSrcweir$big5_map[0xFD][0xB8] = 0x6E1D; 293*cdf0e10cSrcweir$big5_map[0xFD][0xBB] = 0x6E2F; 294*cdf0e10cSrcweir$big5_map[0xFD][0xF1] = 0x716E; 295*cdf0e10cSrcweir$big5_map[0xFE][0x52] = 0x732A; 296*cdf0e10cSrcweir$big5_map[0xFE][0x6F] = 0x745C; 297*cdf0e10cSrcweir$big5_map[0xFE][0xAA] = 0x74E9; 298*cdf0e10cSrcweir$big5_map[0xFE][0xDD] = 0x7809; 299*cdf0e10cSrcweir 300*cdf0e10cSrcweir$pua = 0; 301*cdf0e10cSrcweir$compat[0] = 0; # 1993 302*cdf0e10cSrcweir$compat[1] = 0; # 2000 303*cdf0e10cSrcweir$compat[2] = 0; # 2001 304*cdf0e10cSrcweir 305*cdf0e10cSrcweir$filename = "big5-iso.txt"; 306*cdf0e10cSrcweiropen IN, ("input/" . $filename) or die "Cannot read " . $filename; 307*cdf0e10cSrcweirwhile (<IN>) 308*cdf0e10cSrcweir{ 309*cdf0e10cSrcweir if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/) 310*cdf0e10cSrcweir { 311*cdf0e10cSrcweir $big5 = oct("0x" . $1); 312*cdf0e10cSrcweir $utf32_1993 = oct("0x" . $2); 313*cdf0e10cSrcweir $utf32_2000 = oct("0x" . $3); 314*cdf0e10cSrcweir $utf32_2001 = oct("0x" . $4); 315*cdf0e10cSrcweir isValidBig5($big5) 316*cdf0e10cSrcweir or die "Bad Big5 char " . printBig5($big5); 317*cdf0e10cSrcweir isValidUtf32($utf32_1993) 318*cdf0e10cSrcweir or die "Bad UTF32 char " . printUtf32($utf32_1993); 319*cdf0e10cSrcweir isValidUtf32($utf32_2000) 320*cdf0e10cSrcweir or die "Bad UTF32 char " . printUtf32($utf32_2000); 321*cdf0e10cSrcweir isValidUtf32($utf32_2001) 322*cdf0e10cSrcweir or die "Bad UTF32 char " . printUtf32($utf32_2001); 323*cdf0e10cSrcweir 324*cdf0e10cSrcweir $utf32 = $surrogates ? $utf32_2001 : $utf32_2000; 325*cdf0e10cSrcweir 326*cdf0e10cSrcweir if (defined($underlying_big5[$utf32]) 327*cdf0e10cSrcweir || defined($underlying_cp950[$utf32])) 328*cdf0e10cSrcweir { 329*cdf0e10cSrcweir if (defined($underlying_big5[$utf32]) 330*cdf0e10cSrcweir && defined($underlying_cp950[$utf32]) 331*cdf0e10cSrcweir && $underlying_big5[$utf32] == $underlying_cp950[$utf32] 332*cdf0e10cSrcweir && $underlying_big5[$utf32] == $big5 333*cdf0e10cSrcweir || 334*cdf0e10cSrcweir defined($underlying_big5[$utf32]) 335*cdf0e10cSrcweir && !defined($underlying_cp950[$utf32]) 336*cdf0e10cSrcweir && $underlying_big5[$utf32] == $big5 337*cdf0e10cSrcweir || 338*cdf0e10cSrcweir !defined($underlying_big5[$utf32]) 339*cdf0e10cSrcweir && defined($underlying_cp950[$utf32]) 340*cdf0e10cSrcweir && $underlying_cp950[$utf32] == $big5) 341*cdf0e10cSrcweir { 342*cdf0e10cSrcweir # ignore 343*cdf0e10cSrcweir 344*cdf0e10cSrcweir # Depending on real underlying mapping (cf. 345*cdf0e10cSrcweir # ../convertbig5hkscs.tab), it would be possible to save some 346*cdf0e10cSrcweir # table space by dropping those HKSCS code points that are 347*cdf0e10cSrcweir # already covered by the underlying mapping. 348*cdf0e10cSrcweir } 349*cdf0e10cSrcweir else 350*cdf0e10cSrcweir { 351*cdf0e10cSrcweir print "XXX mapping underlying"; 352*cdf0e10cSrcweir if (defined($underlying_big5[$utf32]) 353*cdf0e10cSrcweir && defined($underlying_cp950[$utf32]) 354*cdf0e10cSrcweir && $underlying_big5[$utf32] == $underlying_cp950[$utf32]) 355*cdf0e10cSrcweir { 356*cdf0e10cSrcweir print " Big5/CP950 ", printBig5($underlying_big5[$utf32]); 357*cdf0e10cSrcweir } 358*cdf0e10cSrcweir else 359*cdf0e10cSrcweir { 360*cdf0e10cSrcweir if (defined($underlying_big5[$utf32])) 361*cdf0e10cSrcweir { 362*cdf0e10cSrcweir print " Big5 ", printBig5($underlying_big5[$utf32]); 363*cdf0e10cSrcweir } 364*cdf0e10cSrcweir if (defined($underlying_cp950[$utf32])) 365*cdf0e10cSrcweir { 366*cdf0e10cSrcweir print " CP950 ", printBig5($underlying_cp950[$utf32]); 367*cdf0e10cSrcweir } 368*cdf0e10cSrcweir } 369*cdf0e10cSrcweir print " and HKSCS ", printBig5($big5), " to ", 370*cdf0e10cSrcweir printUtf32($utf32), "\n"; 371*cdf0e10cSrcweir } 372*cdf0e10cSrcweir } 373*cdf0e10cSrcweir 374*cdf0e10cSrcweir if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF) 375*cdf0e10cSrcweir { 376*cdf0e10cSrcweir ++$pua; 377*cdf0e10cSrcweir } 378*cdf0e10cSrcweir 379*cdf0e10cSrcweir $big5_row = $big5 >> 8; 380*cdf0e10cSrcweir $big5_column = $big5 & 0xFF; 381*cdf0e10cSrcweir if (defined($big5_map[$big5_row][$big5_column])) 382*cdf0e10cSrcweir { 383*cdf0e10cSrcweir die "Bad Big5 mapping " . printBig5($big5); 384*cdf0e10cSrcweir } 385*cdf0e10cSrcweir $big5_map[$big5_row][$big5_column] = $utf32; 386*cdf0e10cSrcweir 387*cdf0e10cSrcweir addMapping($utf32, $big5, -1); 388*cdf0e10cSrcweir 389*cdf0e10cSrcweir if ($utf32_2001 != $utf32) 390*cdf0e10cSrcweir { 391*cdf0e10cSrcweir addMapping($utf32_2001, $big5, 2); 392*cdf0e10cSrcweir } 393*cdf0e10cSrcweir if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001) 394*cdf0e10cSrcweir { 395*cdf0e10cSrcweir addMapping($utf32_2000, $big5, 1); 396*cdf0e10cSrcweir } 397*cdf0e10cSrcweir if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000 398*cdf0e10cSrcweir && $utf32_1993 != $utf32_2001) 399*cdf0e10cSrcweir { 400*cdf0e10cSrcweir addMapping($utf32_1993, $big5, 0); 401*cdf0e10cSrcweir } 402*cdf0e10cSrcweir } 403*cdf0e10cSrcweir} 404*cdf0e10cSrcweirclose IN; 405*cdf0e10cSrcweir 406*cdf0e10cSrcweirprint $pua, " mappings to PUA\n"; 407*cdf0e10cSrcweirprint $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0); 408*cdf0e10cSrcweirprint $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0); 409*cdf0e10cSrcweirprint $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0); 410*cdf0e10cSrcweir 411*cdf0e10cSrcweirif (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 412*cdf0e10cSrcweir{ 413*cdf0e10cSrcweir for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 414*cdf0e10cSrcweir { 415*cdf0e10cSrcweir if (defined($uni_map[0][0][$uni_index])) 416*cdf0e10cSrcweir { 417*cdf0e10cSrcweir $big5 = $uni_map[0][0][$utf32]; 418*cdf0e10cSrcweir die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5); 419*cdf0e10cSrcweir } 420*cdf0e10cSrcweir } 421*cdf0e10cSrcweir} 422*cdf0e10cSrcweir 423*cdf0e10cSrcweir$filename = lc($id) . ".tab"; 424*cdf0e10cSrcweiropen OUT, ("> " . $filename) or die "Cannot write " . $filename; 425*cdf0e10cSrcweir 426*cdf0e10cSrcweir{ 427*cdf0e10cSrcweir $filename = lc($id). ".pl"; 428*cdf0e10cSrcweir open IN, $filename or die "Cannot read ". $filename; 429*cdf0e10cSrcweir $first = 1; 430*cdf0e10cSrcweir while (<IN>) 431*cdf0e10cSrcweir { 432*cdf0e10cSrcweir if (/^\#!.*$/) 433*cdf0e10cSrcweir { 434*cdf0e10cSrcweir } 435*cdf0e10cSrcweir elsif (/^\#(\*.*)$/) 436*cdf0e10cSrcweir { 437*cdf0e10cSrcweir if ($first == 1) 438*cdf0e10cSrcweir { 439*cdf0e10cSrcweir print OUT "/", $1, "\n"; 440*cdf0e10cSrcweir $first = 0; 441*cdf0e10cSrcweir } 442*cdf0e10cSrcweir else 443*cdf0e10cSrcweir { 444*cdf0e10cSrcweir print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 445*cdf0e10cSrcweir } 446*cdf0e10cSrcweir } 447*cdf0e10cSrcweir elsif (/^\# (.*)$/) 448*cdf0e10cSrcweir { 449*cdf0e10cSrcweir print OUT " *", $1, "\n"; 450*cdf0e10cSrcweir } 451*cdf0e10cSrcweir elsif (/^\#(.*)$/) 452*cdf0e10cSrcweir { 453*cdf0e10cSrcweir print OUT " *", $1, "\n"; 454*cdf0e10cSrcweir } 455*cdf0e10cSrcweir else 456*cdf0e10cSrcweir { 457*cdf0e10cSrcweir goto done; 458*cdf0e10cSrcweir } 459*cdf0e10cSrcweir } 460*cdf0e10cSrcweir done: 461*cdf0e10cSrcweir} 462*cdf0e10cSrcweir 463*cdf0e10cSrcweirprint OUT "\n", 464*cdf0e10cSrcweir "#ifndef _SAL_TYPES_H_\n", 465*cdf0e10cSrcweir "#include \"sal/types.h\"\n", 466*cdf0e10cSrcweir "#endif\n", 467*cdf0e10cSrcweir "\n"; 468*cdf0e10cSrcweir 469*cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 470*cdf0e10cSrcweir$big5_data_index = 0; 471*cdf0e10cSrcweir$big5_rows = 0; 472*cdf0e10cSrcweir$big5_chars = 0; 473*cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row) 474*cdf0e10cSrcweir{ 475*cdf0e10cSrcweir $big5_row_first = -1; 476*cdf0e10cSrcweir for ($big5_column = 0; $big5_column <= 255; ++$big5_column) 477*cdf0e10cSrcweir { 478*cdf0e10cSrcweir if (defined($big5_map[$big5_row][$big5_column])) 479*cdf0e10cSrcweir { 480*cdf0e10cSrcweir if ($big5_row_first == -1) 481*cdf0e10cSrcweir { 482*cdf0e10cSrcweir $big5_row_first = $big5_column; 483*cdf0e10cSrcweir } 484*cdf0e10cSrcweir $big5_row_last = $big5_column; 485*cdf0e10cSrcweir } 486*cdf0e10cSrcweir } 487*cdf0e10cSrcweir if ($big5_row_first != -1) 488*cdf0e10cSrcweir { 489*cdf0e10cSrcweir $big5_data_offsets[$big5_row] = $big5_data_index; 490*cdf0e10cSrcweir ++$big5_rows; 491*cdf0e10cSrcweir print OUT " /* row ", $big5_row, " */\n"; 492*cdf0e10cSrcweir 493*cdf0e10cSrcweir $big5_row_surrogates_first = -1; 494*cdf0e10cSrcweir $big5_row_chars = 0; 495*cdf0e10cSrcweir $big5_row_surrogates = 0; 496*cdf0e10cSrcweir 497*cdf0e10cSrcweir print OUT " ", $big5_row_first, " | (", $big5_row_last, 498*cdf0e10cSrcweir " << 8), /* first, last */\n"; 499*cdf0e10cSrcweir ++$big5_data_index; 500*cdf0e10cSrcweir 501*cdf0e10cSrcweir print OUT " ", printSpaces(7, 10, $big5_row_first); 502*cdf0e10cSrcweir $bol = 0; 503*cdf0e10cSrcweir for ($big5_column = $big5_row_first; 504*cdf0e10cSrcweir $big5_column <= $big5_row_last; 505*cdf0e10cSrcweir ++$big5_column) 506*cdf0e10cSrcweir { 507*cdf0e10cSrcweir if ($bol == 1) 508*cdf0e10cSrcweir { 509*cdf0e10cSrcweir print OUT " "; 510*cdf0e10cSrcweir $bol = 0; 511*cdf0e10cSrcweir } 512*cdf0e10cSrcweir if (defined($big5_map[$big5_row][$big5_column])) 513*cdf0e10cSrcweir { 514*cdf0e10cSrcweir $utf32 = $big5_map[$big5_row][$big5_column]; 515*cdf0e10cSrcweir ++$big5_row_chars; 516*cdf0e10cSrcweir if ($utf32 <= 0xFFFF) 517*cdf0e10cSrcweir { 518*cdf0e10cSrcweir printf OUT "0x%04X,", $utf32; 519*cdf0e10cSrcweir } 520*cdf0e10cSrcweir else 521*cdf0e10cSrcweir { 522*cdf0e10cSrcweir ++$big5_row_surrogates; 523*cdf0e10cSrcweir printf OUT "0x%04X,", 524*cdf0e10cSrcweir (0xD800 | (($utf32 - 0x10000) >> 10)); 525*cdf0e10cSrcweir if ($big5_row_surrogates_first == -1) 526*cdf0e10cSrcweir { 527*cdf0e10cSrcweir $big5_row_surrogates_first = $big5_column; 528*cdf0e10cSrcweir } 529*cdf0e10cSrcweir $big5_row_surrogates_last = $big5_column; 530*cdf0e10cSrcweir } 531*cdf0e10cSrcweir } 532*cdf0e10cSrcweir else 533*cdf0e10cSrcweir { 534*cdf0e10cSrcweir printf OUT "0xffff,"; 535*cdf0e10cSrcweir } 536*cdf0e10cSrcweir ++$big5_data_index; 537*cdf0e10cSrcweir if ($big5_column % 10 == 9) 538*cdf0e10cSrcweir { 539*cdf0e10cSrcweir print OUT "\n"; 540*cdf0e10cSrcweir $bol = 1; 541*cdf0e10cSrcweir } 542*cdf0e10cSrcweir } 543*cdf0e10cSrcweir if ($bol == 0) 544*cdf0e10cSrcweir { 545*cdf0e10cSrcweir print OUT "\n"; 546*cdf0e10cSrcweir } 547*cdf0e10cSrcweir 548*cdf0e10cSrcweir if ($big5_row_surrogates_first != -1) 549*cdf0e10cSrcweir { 550*cdf0e10cSrcweir print OUT " ", $big5_row_surrogates_first, 551*cdf0e10cSrcweir ", /* first low-surrogate */\n"; 552*cdf0e10cSrcweir ++$big5_data_index; 553*cdf0e10cSrcweir 554*cdf0e10cSrcweir print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first); 555*cdf0e10cSrcweir $bol = 0; 556*cdf0e10cSrcweir for ($big5_column = $big5_row_surrogates_first; 557*cdf0e10cSrcweir $big5_column <= $big5_row_surrogates_last; 558*cdf0e10cSrcweir ++$big5_column) 559*cdf0e10cSrcweir { 560*cdf0e10cSrcweir if ($bol == 1) 561*cdf0e10cSrcweir { 562*cdf0e10cSrcweir print OUT " "; 563*cdf0e10cSrcweir $bol = 0; 564*cdf0e10cSrcweir } 565*cdf0e10cSrcweir $utf32 = 0; 566*cdf0e10cSrcweir if (defined($big5_map[$big5_row][$big5_column])) 567*cdf0e10cSrcweir { 568*cdf0e10cSrcweir $utf32 = $big5_map[$big5_row][$big5_column]; 569*cdf0e10cSrcweir } 570*cdf0e10cSrcweir if ($utf32 <= 0xFFFF) 571*cdf0e10cSrcweir { 572*cdf0e10cSrcweir printf OUT " 0,"; 573*cdf0e10cSrcweir } 574*cdf0e10cSrcweir else 575*cdf0e10cSrcweir { 576*cdf0e10cSrcweir printf OUT "0x%04X,", 577*cdf0e10cSrcweir (0xDC00 | (($utf32 - 0x10000) & 0x3FF)); 578*cdf0e10cSrcweir } 579*cdf0e10cSrcweir ++$big5_data_index; 580*cdf0e10cSrcweir if ($big5_column % 10 == 9) 581*cdf0e10cSrcweir { 582*cdf0e10cSrcweir print OUT "\n"; 583*cdf0e10cSrcweir $bol = 1; 584*cdf0e10cSrcweir } 585*cdf0e10cSrcweir } 586*cdf0e10cSrcweir if ($bol == 0) 587*cdf0e10cSrcweir { 588*cdf0e10cSrcweir print OUT "\n"; 589*cdf0e10cSrcweir } 590*cdf0e10cSrcweir } 591*cdf0e10cSrcweir 592*cdf0e10cSrcweir $big5_chars += $big5_row_chars; 593*cdf0e10cSrcweir $big5_data_space[$big5_row] 594*cdf0e10cSrcweir = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2; 595*cdf0e10cSrcweir $big5_data_used[$big5_row] 596*cdf0e10cSrcweir = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ? 597*cdf0e10cSrcweir 0 : 1 + $big5_row_surrogates)) 598*cdf0e10cSrcweir * 2; 599*cdf0e10cSrcweir } 600*cdf0e10cSrcweir else 601*cdf0e10cSrcweir { 602*cdf0e10cSrcweir print OUT " /* row ", $big5_row, ": --- */\n"; 603*cdf0e10cSrcweir $big5_data_offsets[$big5_row] = -1; 604*cdf0e10cSrcweir } 605*cdf0e10cSrcweir} 606*cdf0e10cSrcweirprint OUT "};\n\n"; 607*cdf0e10cSrcweirprint "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n"; 608*cdf0e10cSrcweir 609*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 610*cdf0e10cSrcweir$big5_rowoffsets_used = 0; 611*cdf0e10cSrcweirfor ($big5_row = 0; $big5_row <= 255; ++$big5_row) 612*cdf0e10cSrcweir{ 613*cdf0e10cSrcweir if ($big5_data_offsets[$big5_row] == -1) 614*cdf0e10cSrcweir { 615*cdf0e10cSrcweir print OUT " -1, /* row ", $big5_row, " */\n"; 616*cdf0e10cSrcweir } 617*cdf0e10cSrcweir else 618*cdf0e10cSrcweir { 619*cdf0e10cSrcweir print OUT " ", 620*cdf0e10cSrcweir $big5_data_offsets[$big5_row], 621*cdf0e10cSrcweir ", /* row ", 622*cdf0e10cSrcweir $big5_row, 623*cdf0e10cSrcweir "; ", 624*cdf0e10cSrcweir printStats($big5_data_used[$big5_row], 625*cdf0e10cSrcweir $big5_data_space[$big5_row]), 626*cdf0e10cSrcweir " */\n"; 627*cdf0e10cSrcweir $big5_rowoffsets_used += 4; 628*cdf0e10cSrcweir } 629*cdf0e10cSrcweir} 630*cdf0e10cSrcweirprint OUT "};\n\n"; 631*cdf0e10cSrcweir 632*cdf0e10cSrcweirprint OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n"; 633*cdf0e10cSrcweir$uni_data_index = 0; 634*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 635*cdf0e10cSrcweir{ 636*cdf0e10cSrcweir if (defined($uni_plane_used[$uni_plane])) 637*cdf0e10cSrcweir { 638*cdf0e10cSrcweir for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 639*cdf0e10cSrcweir { 640*cdf0e10cSrcweir if (defined($uni_page_used[$uni_plane][$uni_page])) 641*cdf0e10cSrcweir { 642*cdf0e10cSrcweir $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 643*cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 644*cdf0e10cSrcweir " */\n"; 645*cdf0e10cSrcweir 646*cdf0e10cSrcweir $uni_page_first = -1; 647*cdf0e10cSrcweir for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 648*cdf0e10cSrcweir { 649*cdf0e10cSrcweir if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 650*cdf0e10cSrcweir { 651*cdf0e10cSrcweir if ($uni_page_first == -1) 652*cdf0e10cSrcweir { 653*cdf0e10cSrcweir $uni_page_first = $uni_index; 654*cdf0e10cSrcweir } 655*cdf0e10cSrcweir $uni_page_last = $uni_index; 656*cdf0e10cSrcweir } 657*cdf0e10cSrcweir } 658*cdf0e10cSrcweir 659*cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] = 0; 660*cdf0e10cSrcweir 661*cdf0e10cSrcweir print OUT " ", $uni_page_first, " | (", $uni_page_last, 662*cdf0e10cSrcweir " << 8), /* first, last */\n"; 663*cdf0e10cSrcweir ++$uni_data_index; 664*cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] += 2; 665*cdf0e10cSrcweir 666*cdf0e10cSrcweir print OUT " ", printSpaces(7, 10, $uni_page_first); 667*cdf0e10cSrcweir $bol = 0; 668*cdf0e10cSrcweir for ($uni_index = $uni_page_first; 669*cdf0e10cSrcweir $uni_index <= $uni_page_last; 670*cdf0e10cSrcweir ++$uni_index) 671*cdf0e10cSrcweir { 672*cdf0e10cSrcweir if ($bol == 1) 673*cdf0e10cSrcweir { 674*cdf0e10cSrcweir print OUT " "; 675*cdf0e10cSrcweir $bol = 0; 676*cdf0e10cSrcweir } 677*cdf0e10cSrcweir if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 678*cdf0e10cSrcweir { 679*cdf0e10cSrcweir $big5 = $uni_map[$uni_plane][$uni_page][$uni_index]; 680*cdf0e10cSrcweir printf OUT "0x%04X,", $big5; 681*cdf0e10cSrcweir $uni_data_used[$uni_plane][$uni_page] += 2; 682*cdf0e10cSrcweir } 683*cdf0e10cSrcweir else 684*cdf0e10cSrcweir { 685*cdf0e10cSrcweir print OUT " 0,"; 686*cdf0e10cSrcweir } 687*cdf0e10cSrcweir ++$uni_data_index; 688*cdf0e10cSrcweir if ($uni_index % 10 == 9) 689*cdf0e10cSrcweir { 690*cdf0e10cSrcweir print OUT "\n"; 691*cdf0e10cSrcweir $bol = 1; 692*cdf0e10cSrcweir } 693*cdf0e10cSrcweir } 694*cdf0e10cSrcweir if ($bol == 0) 695*cdf0e10cSrcweir { 696*cdf0e10cSrcweir print OUT "\n"; 697*cdf0e10cSrcweir } 698*cdf0e10cSrcweir 699*cdf0e10cSrcweir $uni_data_space[$uni_plane][$uni_page] 700*cdf0e10cSrcweir = ($uni_data_index 701*cdf0e10cSrcweir - $uni_data_offsets[$uni_plane][$uni_page]) * 2; 702*cdf0e10cSrcweir } 703*cdf0e10cSrcweir else 704*cdf0e10cSrcweir { 705*cdf0e10cSrcweir $uni_data_offsets[$uni_plane][$uni_page] = -1; 706*cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 707*cdf0e10cSrcweir ": --- */\n"; 708*cdf0e10cSrcweir } 709*cdf0e10cSrcweir } 710*cdf0e10cSrcweir } 711*cdf0e10cSrcweir else 712*cdf0e10cSrcweir { 713*cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ": --- */\n"; 714*cdf0e10cSrcweir } 715*cdf0e10cSrcweir} 716*cdf0e10cSrcweirprint OUT "};\n\n"; 717*cdf0e10cSrcweir 718*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 719*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 720*cdf0e10cSrcweir{ 721*cdf0e10cSrcweir if (defined($uni_plane_used[$uni_plane])) 722*cdf0e10cSrcweir { 723*cdf0e10cSrcweir $uni_pageoffsets_used[$uni_plane] = 0; 724*cdf0e10cSrcweir $uni_data_used_sum[$uni_plane] = 0; 725*cdf0e10cSrcweir $uni_data_space_sum[$uni_plane] = 0; 726*cdf0e10cSrcweir for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 727*cdf0e10cSrcweir { 728*cdf0e10cSrcweir $offset = $uni_data_offsets[$uni_plane][$uni_page]; 729*cdf0e10cSrcweir if ($offset == -1) 730*cdf0e10cSrcweir { 731*cdf0e10cSrcweir print OUT " -1, /* plane ", 732*cdf0e10cSrcweir $uni_plane, 733*cdf0e10cSrcweir ", page ", 734*cdf0e10cSrcweir $uni_page, 735*cdf0e10cSrcweir " */\n"; 736*cdf0e10cSrcweir } 737*cdf0e10cSrcweir else 738*cdf0e10cSrcweir { 739*cdf0e10cSrcweir print OUT " ", 740*cdf0e10cSrcweir $offset, 741*cdf0e10cSrcweir ", /* plane ", 742*cdf0e10cSrcweir $uni_plane, 743*cdf0e10cSrcweir ", page ", 744*cdf0e10cSrcweir $uni_page, 745*cdf0e10cSrcweir "; ", 746*cdf0e10cSrcweir printStats($uni_data_used[$uni_plane][$uni_page], 747*cdf0e10cSrcweir $uni_data_space[$uni_plane][$uni_page]), 748*cdf0e10cSrcweir " */\n"; 749*cdf0e10cSrcweir $uni_pageoffsets_used[$uni_plane] += 4; 750*cdf0e10cSrcweir $uni_data_used_sum[$uni_plane] 751*cdf0e10cSrcweir += $uni_data_used[$uni_plane][$uni_page]; 752*cdf0e10cSrcweir $uni_data_space_sum[$uni_plane] 753*cdf0e10cSrcweir += $uni_data_space[$uni_plane][$uni_page]; 754*cdf0e10cSrcweir } 755*cdf0e10cSrcweir } 756*cdf0e10cSrcweir } 757*cdf0e10cSrcweir else 758*cdf0e10cSrcweir { 759*cdf0e10cSrcweir print OUT " /* plane ", $uni_plane, ": --- */\n"; 760*cdf0e10cSrcweir } 761*cdf0e10cSrcweir} 762*cdf0e10cSrcweirprint OUT "};\n\n"; 763*cdf0e10cSrcweir 764*cdf0e10cSrcweirprint OUT "static sal_Int32 const aImplUnicodeTo", 765*cdf0e10cSrcweir $id, 766*cdf0e10cSrcweir "PlaneOffsets[] = {\n"; 767*cdf0e10cSrcweir$uni_page_offset = 0; 768*cdf0e10cSrcweir$uni_planeoffsets_used = 0; 769*cdf0e10cSrcweir$uni_pageoffsets_used_sum = 0; 770*cdf0e10cSrcweir$uni_pageoffsets_space_sum = 0; 771*cdf0e10cSrcweir$uni_data_used_sum2 = 0; 772*cdf0e10cSrcweir$uni_data_space_sum2 = 0; 773*cdf0e10cSrcweirfor ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 774*cdf0e10cSrcweir{ 775*cdf0e10cSrcweir if (defined ($uni_plane_used[$uni_plane])) 776*cdf0e10cSrcweir { 777*cdf0e10cSrcweir print OUT " ", 778*cdf0e10cSrcweir $uni_page_offset++, 779*cdf0e10cSrcweir " * 256, /* plane ", 780*cdf0e10cSrcweir $uni_plane, 781*cdf0e10cSrcweir "; ", 782*cdf0e10cSrcweir printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 783*cdf0e10cSrcweir ", ", 784*cdf0e10cSrcweir printStats($uni_data_used_sum[$uni_plane], 785*cdf0e10cSrcweir $uni_data_space_sum[$uni_plane]), 786*cdf0e10cSrcweir " */\n"; 787*cdf0e10cSrcweir $uni_planeoffsets_used += 4; 788*cdf0e10cSrcweir $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 789*cdf0e10cSrcweir $uni_pageoffsets_space_sum += 256 * 4; 790*cdf0e10cSrcweir $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 791*cdf0e10cSrcweir $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 792*cdf0e10cSrcweir } 793*cdf0e10cSrcweir else 794*cdf0e10cSrcweir { 795*cdf0e10cSrcweir print OUT " -1, /* plane ", $uni_plane, " */\n"; 796*cdf0e10cSrcweir } 797*cdf0e10cSrcweir} 798*cdf0e10cSrcweirprint OUT " /* ", 799*cdf0e10cSrcweir printStats($uni_planeoffsets_used, 17 * 4), 800*cdf0e10cSrcweir ", ", 801*cdf0e10cSrcweir printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 802*cdf0e10cSrcweir ", ", 803*cdf0e10cSrcweir printStats($uni_data_used_sum2, $uni_data_space_sum2), 804*cdf0e10cSrcweir " */\n};\n"; 805*cdf0e10cSrcweir 806*cdf0e10cSrcweirclose OUT; 807