1#!/usr/bin/perl 2#************************************************************************* 3# 4# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5# 6# Copyright 2000, 2010 Oracle and/or its affiliates. 7# 8# OpenOffice.org - a multi-platform office productivity suite 9# 10# This file is part of OpenOffice.org. 11# 12# OpenOffice.org is free software: you can redistribute it and/or modify 13# it under the terms of the GNU Lesser General Public License version 3 14# only, as published by the Free Software Foundation. 15# 16# OpenOffice.org is distributed in the hope that it will be useful, 17# but WITHOUT ANY WARRANTY; without even the implied warranty of 18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19# GNU Lesser General Public License version 3 for more details 20# (a copy is included in the LICENSE file that accompanied this code). 21# 22# You should have received a copy of the GNU Lesser General Public License 23# version 3 along with OpenOffice.org. If not, see 24# <http://www.openoffice.org/license.html> 25# for a copy of the LGPLv3 License. 26# 27#************************************************************************* 28 29# The following files must be available in a ./input subdir: 30 31# <http://www.info.gov.hk/digital21/eng/hkscs/download/big5-iso.txt> 32 33# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/BIG5.TXT>: 34# "Unicode version: 1.1 Table version: 0.0d3 Date: 11 February 1994" 35# Only used to track Unicode characters that are mapped from both Big5 and 36# HKSCS. 37 38# <http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT>: 39# "Unicode version: 2.0 Table version: 2.01 Date: 1/7/2000" 40# Only used to track Unicode characters that are mapped from both CP950 and 41# HKSCS. 42 43$surrogates = 0; # set to 1 to allow mappings to Unicode beyond Plane 0 44 45$id = "Big5Hkscs2001"; 46 47sub isValidUtf32 48{ 49 my $utf32 = $_[0]; 50 return $utf32 >= 0 && $utf32 <= 0x10FFFF 51 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 52 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 53 && ($utf32 & 0xFFFF) < 0xFFFE; 54} 55 56sub printUtf32 57{ 58 my $utf32 = $_[0]; 59 return sprintf("U+%04X", $utf32); 60} 61 62sub isValidBig5 63{ 64 my $big5 = $_[0]; 65 my $big5_row = $big5 >> 8; 66 my $big5_column = $big5 & 0xFF; 67 return $big5_row >= 0x81 && $big5_row <= 0xFE 68 && ($big5_column >= 0x40 && $big5_column <= 0x7E 69 || $big5_column >= 0xA1 && $big5_column <= 0xFE); 70} 71 72sub printBig5 73{ 74 my $big5 = $_[0]; 75 return sprintf("%04X", $big5); 76} 77 78sub printStats 79{ 80 my $used = $_[0]; 81 my $space = $_[1]; 82 return sprintf("%d/%d bytes (%.1f%%)", 83 $used, 84 $space, 85 $used * 100 / $space); 86} 87 88sub printSpaces 89{ 90 my $column_width = $_[0]; 91 my $columns_per_line = $_[1]; 92 my $end = $_[2]; 93 $output = ""; 94 for ($i = int($end / $columns_per_line) * $columns_per_line; 95 $i < $end; 96 ++$i) 97 { 98 for ($j = 0; $j < $column_width; ++$j) 99 { 100 $output = $output . " "; 101 } 102 } 103 return $output; 104} 105 106sub addMapping 107{ 108 my $utf32 = $_[0]; 109 my $big5 = $_[1]; 110 my $comp = $_[2]; 111 112 $uni_plane = $utf32 >> 16; 113 $uni_page = ($utf32 >> 8) & 0xFF; 114 $uni_index = $utf32 & 0xFF; 115 116 if (!defined($uni_plane_used[$uni_plane]) 117 || !defined($uni_page_used[$uni_plane][$uni_page]) 118 || !defined($uni_map[$uni_plane][$uni_page][$uni_index])) 119 { 120 $uni_map[$uni_plane][$uni_page][$uni_index] = $big5; 121 $uni_plane_used[$uni_plane] = 1; 122 $uni_page_used[$uni_plane][$uni_page] = 1; 123 if ($comp != -1) 124 { 125 ++$compat[$comp]; 126 } 127 } 128 else 129 { 130 $big5_1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 131 print "WARNING! Mapping ", printUtf32($utf32), " to ", 132 printBig5($big5_1), ", NOT ", ($comp ? "compat " : ""), 133 printBig5($big5), "\n"; 134 } 135} 136 137# Build mappings to track Unicode characters that are mapped from both Big5/ 138# CP950 and HKSCS: 139{ 140 $filename = "BIG5.TXT"; 141 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 142 while (<IN>) 143 { 144 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 145 { 146 $big5 = oct($1); 147 $utf32 = oct($2); 148 isValidBig5($big5) 149 or die "Bad Big5 char " . printBig5($big5); 150 isValidUtf32($utf32) 151 or die "Bad UTF32 char " . printUtf32($utf32); 152 if ($utf32 != 0xFFFD) 153 { 154 if (defined($underlying_big5[$utf32])) 155 { 156 print "WARNING! In ", $filename, ", both ", 157 printBig5($underlying_big5[$utf32]), " and ", 158 printBig5($big5), " map to ", printUtf32($utf32), 159 "\n"; 160 } 161 else 162 { 163 $underlying_big5[$utf32] = $big5; 164 } 165 } 166 } 167 } 168 close IN; 169 170 $filename = "CP950.TXT"; 171 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 172 while (<IN>) 173 { 174 if (/(0x[0-9A-F][0-9A-F][0-9A-F][0-9A-F])[ \t]+(0x[0-9A-F]+)[ \t]+\#.*$/) 175 { 176 $big5 = oct($1); 177 $utf32 = oct($2); 178 isValidBig5($big5) 179 or die "Bad Big5 char " . printBig5($big5); 180 isValidUtf32($utf32) 181 or die "Bad UTF32 char " . printUtf32($utf32); 182 if (defined($underlying_cp950[$utf32])) 183 { 184 print "WARNING! In ", $filename, ", both ", 185 printBig5($underlying_cp950[$utf32]), " and ", 186 printBig5($big5), " map to ", printUtf32($utf32), "\n"; 187 } 188 else 189 { 190 $underlying_cp950[$utf32] = $big5; 191 } 192 } 193 } 194 close IN; 195} 196 197# The following are mapped by the underlying RTL_TEXTENCODING_BIG5 to some 198# nonstandard Unicode points, so they are explicitly mentioned here to map 199# to the standard Unicode PUA points. (In the other direction, the unofficial 200# mappings from Unicode to RTL_TEXTENCODING_BIG5 C6A1--C7FE are harmless, 201# since all Unicode characters involved are already covered by the official 202# Big5-HKSCS mappings.) 203$big5_map[0xC6][0xCF] = 0xF6E0; addMapping(0xF6E0, 0xC6CF, -1); 204$big5_map[0xC6][0xD3] = 0xF6E4; addMapping(0xF6E4, 0xC6D3, -1); 205$big5_map[0xC6][0xD5] = 0xF6E6; addMapping(0xF6E6, 0xC6D5, -1); 206$big5_map[0xC6][0xD7] = 0xF6E8; addMapping(0xF6E8, 0xC6D7, -1); 207$big5_map[0xC6][0xDE] = 0xF6EF; addMapping(0xF6EF, 0xC6DE, -1); 208$big5_map[0xC6][0xDF] = 0xF6F0; addMapping(0xF6F0, 0xC6DF, -1); 209 210# The following implements the mapping of Big5-HKSCS compatibility points 211# (GCCS characters unified with other HKSCS characters) to Unicode. In the 212# other direction, characters from Unicode's PUA will map to these Big5-HKSCS 213# compatibility points. (See the first list in <http://www.info.gov.hk/ 214# digital21/eng/hkscs/download/big5cmp.txt>.) 215$big5_map[0x8E][0x69] = 0x7BB8; 216$big5_map[0x8E][0x6F] = 0x7C06; 217$big5_map[0x8E][0x7E] = 0x7CCE; 218$big5_map[0x8E][0xAB] = 0x7DD2; 219$big5_map[0x8E][0xB4] = 0x7E1D; 220$big5_map[0x8E][0xCD] = 0x8005; 221$big5_map[0x8E][0xD0] = 0x8028; 222$big5_map[0x8F][0x57] = 0x83C1; 223$big5_map[0x8F][0x69] = 0x84A8; 224$big5_map[0x8F][0x6E] = 0x840F; 225$big5_map[0x8F][0xCB] = 0x89A6; 226$big5_map[0x8F][0xCC] = 0x89A9; 227$big5_map[0x8F][0xFE] = 0x8D77; 228$big5_map[0x90][0x6D] = 0x90FD; 229$big5_map[0x90][0x7A] = 0x92B9; 230$big5_map[0x90][0xDC] = 0x975C; 231$big5_map[0x90][0xF1] = 0x97FF; 232$big5_map[0x91][0xBF] = 0x9F16; 233$big5_map[0x92][0x44] = 0x8503; 234$big5_map[0x92][0xAF] = 0x5159; 235$big5_map[0x92][0xB0] = 0x515B; 236$big5_map[0x92][0xB1] = 0x515D; 237$big5_map[0x92][0xB2] = 0x515E; 238$big5_map[0x92][0xC8] = 0x936E; 239$big5_map[0x92][0xD1] = 0x7479; 240$big5_map[0x94][0x47] = 0x6D67; 241$big5_map[0x94][0xCA] = 0x799B; 242$big5_map[0x95][0xD9] = 0x9097; 243$big5_map[0x96][0x44] = 0x975D; 244$big5_map[0x96][0xED] = 0x701E; 245$big5_map[0x96][0xFC] = 0x5B28; 246$big5_map[0x9B][0x76] = 0x7201; 247$big5_map[0x9B][0x78] = 0x77D7; 248$big5_map[0x9B][0x7B] = 0x7E87; 249$big5_map[0x9B][0xC6] = 0x99D6; 250$big5_map[0x9B][0xDE] = 0x91D4; 251$big5_map[0x9B][0xEC] = 0x60DE; 252$big5_map[0x9B][0xF6] = 0x6FB6; 253$big5_map[0x9C][0x42] = 0x8F36; 254$big5_map[0x9C][0x53] = 0x4FBB; 255$big5_map[0x9C][0x62] = 0x71DF; 256$big5_map[0x9C][0x68] = 0x9104; 257$big5_map[0x9C][0x6B] = 0x9DF0; 258$big5_map[0x9C][0x77] = 0x83CF; 259$big5_map[0x9C][0xBC] = 0x5C10; 260$big5_map[0x9C][0xBD] = 0x79E3; 261$big5_map[0x9C][0xD0] = 0x5A67; 262$big5_map[0x9D][0x57] = 0x8F0B; 263$big5_map[0x9D][0x5A] = 0x7B51; 264$big5_map[0x9D][0xC4] = 0x62D0; 265$big5_map[0x9E][0xA9] = 0x6062; 266$big5_map[0x9E][0xEF] = 0x75F9; 267$big5_map[0x9E][0xFD] = 0x6C4A; 268$big5_map[0x9F][0x60] = 0x9B2E; 269$big5_map[0x9F][0x66] = 0x9F17; 270$big5_map[0x9F][0xCB] = 0x50ED; 271$big5_map[0x9F][0xD8] = 0x5F0C; 272$big5_map[0xA0][0x63] = 0x880F; 273$big5_map[0xA0][0x77] = 0x62CE; 274$big5_map[0xA0][0xD5] = 0x7468; 275$big5_map[0xA0][0xDF] = 0x7162; 276$big5_map[0xA0][0xE4] = 0x7250; 277$big5_map[0xFA][0x5F] = 0x5029; 278$big5_map[0xFA][0x66] = 0x507D; 279$big5_map[0xFA][0xBD] = 0x5305; 280$big5_map[0xFA][0xC5] = 0x5344; 281$big5_map[0xFA][0xD5] = 0x537F; 282$big5_map[0xFB][0x48] = 0x5605; 283$big5_map[0xFB][0xB8] = 0x5A77; 284$big5_map[0xFB][0xF3] = 0x5E75; 285$big5_map[0xFB][0xF9] = 0x5ED0; 286$big5_map[0xFC][0x4F] = 0x5F58; 287$big5_map[0xFC][0x6C] = 0x60A4; 288$big5_map[0xFC][0xB9] = 0x6490; 289$big5_map[0xFC][0xE2] = 0x6674; 290$big5_map[0xFC][0xF1] = 0x675E; 291$big5_map[0xFD][0xB7] = 0x6C9C; 292$big5_map[0xFD][0xB8] = 0x6E1D; 293$big5_map[0xFD][0xBB] = 0x6E2F; 294$big5_map[0xFD][0xF1] = 0x716E; 295$big5_map[0xFE][0x52] = 0x732A; 296$big5_map[0xFE][0x6F] = 0x745C; 297$big5_map[0xFE][0xAA] = 0x74E9; 298$big5_map[0xFE][0xDD] = 0x7809; 299 300$pua = 0; 301$compat[0] = 0; # 1993 302$compat[1] = 0; # 2000 303$compat[2] = 0; # 2001 304 305$filename = "big5-iso.txt"; 306open IN, ("input/" . $filename) or die "Cannot read " . $filename; 307while (<IN>) 308{ 309 if (/^([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+) +([0-9A-F]+)$/) 310 { 311 $big5 = oct("0x" . $1); 312 $utf32_1993 = oct("0x" . $2); 313 $utf32_2000 = oct("0x" . $3); 314 $utf32_2001 = oct("0x" . $4); 315 isValidBig5($big5) 316 or die "Bad Big5 char " . printBig5($big5); 317 isValidUtf32($utf32_1993) 318 or die "Bad UTF32 char " . printUtf32($utf32_1993); 319 isValidUtf32($utf32_2000) 320 or die "Bad UTF32 char " . printUtf32($utf32_2000); 321 isValidUtf32($utf32_2001) 322 or die "Bad UTF32 char " . printUtf32($utf32_2001); 323 324 $utf32 = $surrogates ? $utf32_2001 : $utf32_2000; 325 326 if (defined($underlying_big5[$utf32]) 327 || defined($underlying_cp950[$utf32])) 328 { 329 if (defined($underlying_big5[$utf32]) 330 && defined($underlying_cp950[$utf32]) 331 && $underlying_big5[$utf32] == $underlying_cp950[$utf32] 332 && $underlying_big5[$utf32] == $big5 333 || 334 defined($underlying_big5[$utf32]) 335 && !defined($underlying_cp950[$utf32]) 336 && $underlying_big5[$utf32] == $big5 337 || 338 !defined($underlying_big5[$utf32]) 339 && defined($underlying_cp950[$utf32]) 340 && $underlying_cp950[$utf32] == $big5) 341 { 342 # ignore 343 344 # Depending on real underlying mapping (cf. 345 # ../convertbig5hkscs.tab), it would be possible to save some 346 # table space by dropping those HKSCS code points that are 347 # already covered by the underlying mapping. 348 } 349 else 350 { 351 print "XXX mapping underlying"; 352 if (defined($underlying_big5[$utf32]) 353 && defined($underlying_cp950[$utf32]) 354 && $underlying_big5[$utf32] == $underlying_cp950[$utf32]) 355 { 356 print " Big5/CP950 ", printBig5($underlying_big5[$utf32]); 357 } 358 else 359 { 360 if (defined($underlying_big5[$utf32])) 361 { 362 print " Big5 ", printBig5($underlying_big5[$utf32]); 363 } 364 if (defined($underlying_cp950[$utf32])) 365 { 366 print " CP950 ", printBig5($underlying_cp950[$utf32]); 367 } 368 } 369 print " and HKSCS ", printBig5($big5), " to ", 370 printUtf32($utf32), "\n"; 371 } 372 } 373 374 if ($utf32 >= 0xE000 && $utf32 <= 0xF8FF) 375 { 376 ++$pua; 377 } 378 379 $big5_row = $big5 >> 8; 380 $big5_column = $big5 & 0xFF; 381 if (defined($big5_map[$big5_row][$big5_column])) 382 { 383 die "Bad Big5 mapping " . printBig5($big5); 384 } 385 $big5_map[$big5_row][$big5_column] = $utf32; 386 387 addMapping($utf32, $big5, -1); 388 389 if ($utf32_2001 != $utf32) 390 { 391 addMapping($utf32_2001, $big5, 2); 392 } 393 if ($utf32_2000 != $utf32 && $utf32_2000 != $utf32_2001) 394 { 395 addMapping($utf32_2000, $big5, 1); 396 } 397 if ($utf32_1993 != $utf32 && $utf32_1993 != $utf32_2000 398 && $utf32_1993 != $utf32_2001) 399 { 400 addMapping($utf32_1993, $big5, 0); 401 } 402 } 403} 404close IN; 405 406print $pua, " mappings to PUA\n"; 407print $compat[0], " 1993 compatibility mappings\n" if ($compat[0] != 0); 408print $compat[1], " 2000 compatibility mappings\n" if ($compat[1] != 0); 409print $compat[2], " 2001 compatibility mappings\n" if ($compat[2] != 0); 410 411if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 412{ 413 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 414 { 415 if (defined($uni_map[0][0][$uni_index])) 416 { 417 $big5 = $uni_map[0][0][$utf32]; 418 die "Mapping " . printUtf32($utf32) . " to " . printBig5($big5); 419 } 420 } 421} 422 423$filename = lc($id) . ".tab"; 424open OUT, ("> " . $filename) or die "Cannot write " . $filename; 425 426{ 427 $filename = lc($id). ".pl"; 428 open IN, $filename or die "Cannot read ". $filename; 429 $first = 1; 430 while (<IN>) 431 { 432 if (/^\#!.*$/) 433 { 434 } 435 elsif (/^\#(\*.*)$/) 436 { 437 if ($first == 1) 438 { 439 print OUT "/", $1, "\n"; 440 $first = 0; 441 } 442 else 443 { 444 print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 445 } 446 } 447 elsif (/^\# (.*)$/) 448 { 449 print OUT " *", $1, "\n"; 450 } 451 elsif (/^\#(.*)$/) 452 { 453 print OUT " *", $1, "\n"; 454 } 455 else 456 { 457 goto done; 458 } 459 } 460 done: 461} 462 463print OUT "\n", 464 "#ifndef _SAL_TYPES_H_\n", 465 "#include \"sal/types.h\"\n", 466 "#endif\n", 467 "\n"; 468 469print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 470$big5_data_index = 0; 471$big5_rows = 0; 472$big5_chars = 0; 473for ($big5_row = 0; $big5_row <= 255; ++$big5_row) 474{ 475 $big5_row_first = -1; 476 for ($big5_column = 0; $big5_column <= 255; ++$big5_column) 477 { 478 if (defined($big5_map[$big5_row][$big5_column])) 479 { 480 if ($big5_row_first == -1) 481 { 482 $big5_row_first = $big5_column; 483 } 484 $big5_row_last = $big5_column; 485 } 486 } 487 if ($big5_row_first != -1) 488 { 489 $big5_data_offsets[$big5_row] = $big5_data_index; 490 ++$big5_rows; 491 print OUT " /* row ", $big5_row, " */\n"; 492 493 $big5_row_surrogates_first = -1; 494 $big5_row_chars = 0; 495 $big5_row_surrogates = 0; 496 497 print OUT " ", $big5_row_first, " | (", $big5_row_last, 498 " << 8), /* first, last */\n"; 499 ++$big5_data_index; 500 501 print OUT " ", printSpaces(7, 10, $big5_row_first); 502 $bol = 0; 503 for ($big5_column = $big5_row_first; 504 $big5_column <= $big5_row_last; 505 ++$big5_column) 506 { 507 if ($bol == 1) 508 { 509 print OUT " "; 510 $bol = 0; 511 } 512 if (defined($big5_map[$big5_row][$big5_column])) 513 { 514 $utf32 = $big5_map[$big5_row][$big5_column]; 515 ++$big5_row_chars; 516 if ($utf32 <= 0xFFFF) 517 { 518 printf OUT "0x%04X,", $utf32; 519 } 520 else 521 { 522 ++$big5_row_surrogates; 523 printf OUT "0x%04X,", 524 (0xD800 | (($utf32 - 0x10000) >> 10)); 525 if ($big5_row_surrogates_first == -1) 526 { 527 $big5_row_surrogates_first = $big5_column; 528 } 529 $big5_row_surrogates_last = $big5_column; 530 } 531 } 532 else 533 { 534 printf OUT "0xffff,"; 535 } 536 ++$big5_data_index; 537 if ($big5_column % 10 == 9) 538 { 539 print OUT "\n"; 540 $bol = 1; 541 } 542 } 543 if ($bol == 0) 544 { 545 print OUT "\n"; 546 } 547 548 if ($big5_row_surrogates_first != -1) 549 { 550 print OUT " ", $big5_row_surrogates_first, 551 ", /* first low-surrogate */\n"; 552 ++$big5_data_index; 553 554 print OUT " ", printSpaces(7, 10, $big5_row_surrogates_first); 555 $bol = 0; 556 for ($big5_column = $big5_row_surrogates_first; 557 $big5_column <= $big5_row_surrogates_last; 558 ++$big5_column) 559 { 560 if ($bol == 1) 561 { 562 print OUT " "; 563 $bol = 0; 564 } 565 $utf32 = 0; 566 if (defined($big5_map[$big5_row][$big5_column])) 567 { 568 $utf32 = $big5_map[$big5_row][$big5_column]; 569 } 570 if ($utf32 <= 0xFFFF) 571 { 572 printf OUT " 0,"; 573 } 574 else 575 { 576 printf OUT "0x%04X,", 577 (0xDC00 | (($utf32 - 0x10000) & 0x3FF)); 578 } 579 ++$big5_data_index; 580 if ($big5_column % 10 == 9) 581 { 582 print OUT "\n"; 583 $bol = 1; 584 } 585 } 586 if ($bol == 0) 587 { 588 print OUT "\n"; 589 } 590 } 591 592 $big5_chars += $big5_row_chars; 593 $big5_data_space[$big5_row] 594 = ($big5_data_index - $big5_data_offsets[$big5_row]) * 2; 595 $big5_data_used[$big5_row] 596 = (1 + $big5_row_chars + ($big5_row_surrogates == 0 ? 597 0 : 1 + $big5_row_surrogates)) 598 * 2; 599 } 600 else 601 { 602 print OUT " /* row ", $big5_row, ": --- */\n"; 603 $big5_data_offsets[$big5_row] = -1; 604 } 605} 606print OUT "};\n\n"; 607print "big5 rows = ", $big5_rows, ", chars = ", $big5_chars, "\n"; 608 609print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 610$big5_rowoffsets_used = 0; 611for ($big5_row = 0; $big5_row <= 255; ++$big5_row) 612{ 613 if ($big5_data_offsets[$big5_row] == -1) 614 { 615 print OUT " -1, /* row ", $big5_row, " */\n"; 616 } 617 else 618 { 619 print OUT " ", 620 $big5_data_offsets[$big5_row], 621 ", /* row ", 622 $big5_row, 623 "; ", 624 printStats($big5_data_used[$big5_row], 625 $big5_data_space[$big5_row]), 626 " */\n"; 627 $big5_rowoffsets_used += 4; 628 } 629} 630print OUT "};\n\n"; 631 632print OUT "static sal_uInt16 const aImplUnicodeTo", $id, "Data[] = {\n"; 633$uni_data_index = 0; 634for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 635{ 636 if (defined($uni_plane_used[$uni_plane])) 637 { 638 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 639 { 640 if (defined($uni_page_used[$uni_plane][$uni_page])) 641 { 642 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 643 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 644 " */\n"; 645 646 $uni_page_first = -1; 647 for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 648 { 649 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 650 { 651 if ($uni_page_first == -1) 652 { 653 $uni_page_first = $uni_index; 654 } 655 $uni_page_last = $uni_index; 656 } 657 } 658 659 $uni_data_used[$uni_plane][$uni_page] = 0; 660 661 print OUT " ", $uni_page_first, " | (", $uni_page_last, 662 " << 8), /* first, last */\n"; 663 ++$uni_data_index; 664 $uni_data_used[$uni_plane][$uni_page] += 2; 665 666 print OUT " ", printSpaces(7, 10, $uni_page_first); 667 $bol = 0; 668 for ($uni_index = $uni_page_first; 669 $uni_index <= $uni_page_last; 670 ++$uni_index) 671 { 672 if ($bol == 1) 673 { 674 print OUT " "; 675 $bol = 0; 676 } 677 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 678 { 679 $big5 = $uni_map[$uni_plane][$uni_page][$uni_index]; 680 printf OUT "0x%04X,", $big5; 681 $uni_data_used[$uni_plane][$uni_page] += 2; 682 } 683 else 684 { 685 print OUT " 0,"; 686 } 687 ++$uni_data_index; 688 if ($uni_index % 10 == 9) 689 { 690 print OUT "\n"; 691 $bol = 1; 692 } 693 } 694 if ($bol == 0) 695 { 696 print OUT "\n"; 697 } 698 699 $uni_data_space[$uni_plane][$uni_page] 700 = ($uni_data_index 701 - $uni_data_offsets[$uni_plane][$uni_page]) * 2; 702 } 703 else 704 { 705 $uni_data_offsets[$uni_plane][$uni_page] = -1; 706 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 707 ": --- */\n"; 708 } 709 } 710 } 711 else 712 { 713 print OUT " /* plane ", $uni_plane, ": --- */\n"; 714 } 715} 716print OUT "};\n\n"; 717 718print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 719for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 720{ 721 if (defined($uni_plane_used[$uni_plane])) 722 { 723 $uni_pageoffsets_used[$uni_plane] = 0; 724 $uni_data_used_sum[$uni_plane] = 0; 725 $uni_data_space_sum[$uni_plane] = 0; 726 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 727 { 728 $offset = $uni_data_offsets[$uni_plane][$uni_page]; 729 if ($offset == -1) 730 { 731 print OUT " -1, /* plane ", 732 $uni_plane, 733 ", page ", 734 $uni_page, 735 " */\n"; 736 } 737 else 738 { 739 print OUT " ", 740 $offset, 741 ", /* plane ", 742 $uni_plane, 743 ", page ", 744 $uni_page, 745 "; ", 746 printStats($uni_data_used[$uni_plane][$uni_page], 747 $uni_data_space[$uni_plane][$uni_page]), 748 " */\n"; 749 $uni_pageoffsets_used[$uni_plane] += 4; 750 $uni_data_used_sum[$uni_plane] 751 += $uni_data_used[$uni_plane][$uni_page]; 752 $uni_data_space_sum[$uni_plane] 753 += $uni_data_space[$uni_plane][$uni_page]; 754 } 755 } 756 } 757 else 758 { 759 print OUT " /* plane ", $uni_plane, ": --- */\n"; 760 } 761} 762print OUT "};\n\n"; 763 764print OUT "static sal_Int32 const aImplUnicodeTo", 765 $id, 766 "PlaneOffsets[] = {\n"; 767$uni_page_offset = 0; 768$uni_planeoffsets_used = 0; 769$uni_pageoffsets_used_sum = 0; 770$uni_pageoffsets_space_sum = 0; 771$uni_data_used_sum2 = 0; 772$uni_data_space_sum2 = 0; 773for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 774{ 775 if (defined ($uni_plane_used[$uni_plane])) 776 { 777 print OUT " ", 778 $uni_page_offset++, 779 " * 256, /* plane ", 780 $uni_plane, 781 "; ", 782 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 783 ", ", 784 printStats($uni_data_used_sum[$uni_plane], 785 $uni_data_space_sum[$uni_plane]), 786 " */\n"; 787 $uni_planeoffsets_used += 4; 788 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 789 $uni_pageoffsets_space_sum += 256 * 4; 790 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 791 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 792 } 793 else 794 { 795 print OUT " -1, /* plane ", $uni_plane, " */\n"; 796 } 797} 798print OUT " /* ", 799 printStats($uni_planeoffsets_used, 17 * 4), 800 ", ", 801 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 802 ", ", 803 printStats($uni_data_used_sum2, $uni_data_space_sum2), 804 " */\n};\n"; 805 806close OUT; 807