1#!/usr/bin/perl 2#************************************************************************* 3# 4# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5# 6# Copyright 2000, 2010 Oracle and/or its affiliates. 7# 8# OpenOffice.org - a multi-platform office productivity suite 9# 10# This file is part of OpenOffice.org. 11# 12# OpenOffice.org is free software: you can redistribute it and/or modify 13# it under the terms of the GNU Lesser General Public License version 3 14# only, as published by the Free Software Foundation. 15# 16# OpenOffice.org is distributed in the hope that it will be useful, 17# but WITHOUT ANY WARRANTY; without even the implied warranty of 18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19# GNU Lesser General Public License version 3 for more details 20# (a copy is included in the LICENSE file that accompanied this code). 21# 22# You should have received a copy of the GNU Lesser General Public License 23# version 3 along with OpenOffice.org. If not, see 24# <http://www.openoffice.org/license.html> 25# for a copy of the LGPLv3 License. 26# 27#************************************************************************* 28 29# The following files must be available in a ./input subdir: 30 31# <http://www.unicode.org/Public/UNIDATA/Unihan.txt>: 32# "Unicode version: 3.1.1 Table version: 1.1 Date: 28 June 2001" 33# contains descriptions for: 34# U+3400..4DFF CJK Unified Ideographs Extension A 35# U+4E00..9FFF CJK Unified Ideographs 36# U+F900..FAFF CJK Compatibility Ideographs 37# U+20000..2F7FF CJK Unified Ideographs Extension B 38# U+2F800..2FFFF CJK Compatibility Ideographs Supplement 39 40# <http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/CNS11643.TXT>: 41# "Unicode version: 1.1 Table version: 0.0d1 Date: 21 October 1994" 42# contains mappings for CNS 11643-1986 43 44# <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/Uni2CNS.Z>: 45# "Unicode version: 1.1 Table version: 0.49 Date: 26 March 1998" 46# contains mappings for CNS 11643-1992 that are incompatible with 47# CNS11643.TXT 48 49$id = "Cns116431992"; 50 51sub isValidUtf32 52{ 53 my $utf32 = $_[0]; 54 return $utf32 >= 0 && $utf32 <= 0x10FFFF 55 && !($utf32 >= 0xD800 && $utf32 <= 0xDFFF) 56 && !($utf32 >= 0xFDD0 && $utf32 <= 0xFDEF) 57 && ($utf32 & 0xFFFF) < 0xFFFE; 58} 59 60sub printUtf32 61{ 62 my $utf32 = $_[0]; 63 return sprintf("U+%04X", $utf32); 64} 65 66sub isValidCns116431992 67{ 68 my $plane = $_[0]; 69 my $row = $_[1]; 70 my $column = $_[2]; 71 return $plane >= 1 && $plane <= 16 72 && $row >= 1 && $row <= 94 73 && $column >= 1 && $column <= 94; 74} 75 76sub printCns116431992 77{ 78 my $plane = $_[0]; 79 my $row = $_[1]; 80 my $column = $_[2]; 81 return sprintf("%d-%02d/%02d", $plane, $row, $column); 82} 83 84sub printStats 85{ 86 my $used = $_[0]; 87 my $space = $_[1]; 88 return sprintf("%d/%d bytes (%.1f%%)", 89 $used, 90 $space, 91 $used * 100 / $space); 92} 93 94sub printSpaces 95{ 96 my $column_width = $_[0]; 97 my $columns_per_line = $_[1]; 98 my $end = $_[2]; 99 $output = ""; 100 for ($i = int($end / $columns_per_line) * $columns_per_line; 101 $i < $end; 102 ++$i) 103 { 104 for ($j = 0; $j < $column_width; ++$j) 105 { 106 $output = $output . " "; 107 } 108 } 109 return $output; 110} 111 112$count_Unihan_txt = 0; 113$count_CNS11643_TXT = 0; 114$count_Uni2CNS = 0; 115 116if (1) 117{ 118 $filename = "Unihan.txt"; 119 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 120 while (<IN>) 121 { 122 if (/^U\+([0-9A-F]+)\tkCNS1992\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 123 { 124 $utf32 = oct("0x" . $1); 125 $cns_plane = oct("0x" . $2); 126 $cns_row = oct("0x" . $3) - 0x20; 127 $cns_column = oct("0x" . $4) - 0x20; 128 isValidUtf32($utf32) 129 or die "Bad UTF32 char U+" . printUtf32($utf32); 130 isValidCns116431992($cns_plane, $cns_row, $cns_column) 131 or die "Bad CNS11643-1992 char " 132 . printCns116431992($cns_plane, 133 $cns_row, 134 $cns_column); 135 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 136 { 137 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 138 $cns_plane_used[$cns_plane] = 1; 139 ++$count_Unihan_txt; 140 } 141 else 142 { 143 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 144 or die "Mapping " 145 . printCns116431992($cns_plane, 146 $cns_row, 147 $cns_column) 148 . " to " 149 . printUtf32($cns_map[$cns_plane] 150 [$cns_row] 151 [$cns_column]) 152 . ", NOT " 153 . printUtf32($utf32); 154 } 155 } 156 elsif (/^U\+([0-9A-F]+)\tkIRG_TSource\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])$/) 157 { 158 $utf32 = oct("0x" . $1); 159 $cns_plane = oct("0x" . $2); 160 $cns_row = oct("0x" . $3) - 0x20; 161 $cns_column = oct("0x" . $4) - 0x20; 162 isValidUtf32($utf32) 163 or die "Bad UTF32 char U+" . printUtf32($utf32); 164 isValidCns116431992($cns_plane, $cns_row, $cns_column) 165 or die "Bad CNS11643-1992 char " 166 . printCns116431992($cns_plane, 167 $cns_row, 168 $cns_column); 169 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 170 { 171 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 172 $cns_plane_used[$cns_plane] = 1; 173 ++$count_Unihan_txt; 174 } 175 else 176 { 177 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 178 or print "WARNING! Mapping ", 179 printCns116431992($cns_plane, 180 $cns_row, 181 $cns_column), 182 " to ", 183 printUtf32($cns_map[$cns_plane] 184 [$cns_row] 185 [$cns_column]), 186 ", NOT ", 187 printUtf32($utf32), 188 "\n"; 189 } 190 } 191 elsif (/^U\+([0-9A-F]+)\tkCNS1992\t.*$/) 192 { 193 die "Bad format"; 194 } 195 } 196 close IN; 197} 198 199if (1) 200{ 201 $filename = "CNS11643.TXT"; 202 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 203 while (<IN>) 204 { 205 if (/0x([0-9A-F])([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t0x([0-9A-F]+)\t\#.*$/) 206 { 207 $utf32 = oct("0x" . $4); 208 $cns_plane = oct("0x" . $1); 209 $cns_row = oct("0x" . $2) - 0x20; 210 $cns_column = oct("0x" . $3) - 0x20; 211 isValidUtf32($utf32) 212 or die "Bad UTF32 char U+" . printUtf32($utf32); 213 isValidCns116431992($cns_plane, $cns_row, $cns_column) 214 or die "Bad CNS11643-1992 char " 215 . printCns116431992($cns_plane, 216 $cns_row, 217 $cns_column); 218 if ($cns_plane <= 2) 219 { 220 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 221 { 222 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 223 $cns_plane_used[$cns_plane] = 1; 224 ++$count_CNS11643_TXT; 225 } 226 else 227 { 228 ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 229 or die "Mapping " 230 . printCns116431992($cns_plane, 231 $cns_row, 232 $cns_column) 233 . " to " 234 . printUtf32($cns_map[$cns_plane] 235 [$cns_row] 236 [$cns_column]) 237 . ", NOT " 238 . printUtf32($utf32); 239 } 240 } 241 } 242 } 243 close IN; 244} 245 246if (0) 247{ 248 $filename = "Uni2CNS"; 249 open IN, ("input/" . $filename) or die "Cannot read " . $filename; 250 while (<IN>) 251 { 252 if (/([0-9A-F]+)\t([0-9A-F])-([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])\t.*$/) 253 { 254 $utf32 = oct("0x" . $1); 255 $cns_plane = oct("0x" . $2); 256 $cns_row = oct("0x" . $3) - 0x20; 257 $cns_column = oct("0x" . $4) - 0x20; 258 isValidUtf32($utf32) 259 or die "Bad UTF32 char U+" . printUtf32($utf32); 260 isValidCns116431992($cns_plane, $cns_row, $cns_column) 261 or die "Bad CNS11643-1992 char " 262 . printCns116431992($cns_plane, 263 $cns_row, 264 $cns_column); 265 if (!defined($cns_map[$cns_plane][$cns_row][$cns_column])) 266 { 267 $cns_map[$cns_plane][$cns_row][$cns_column] = $utf32; 268 $cns_plane_used[$cns_plane] = 1; 269 ++$count_Uni2CNS; 270 } 271 else 272 { 273# ($cns_map[$cns_plane][$cns_row][$cns_column] == $utf32) 274# or die "Mapping " 275# . printCns116431992($cns_plane, 276# $cns_row, 277# $cns_column) 278# . " to " 279# . printUtf32($cns_map[$cns_plane] 280# [$cns_row] 281# [$cns_column]) 282# . ", NOT " 283# . printUtf32($utf32); 284 } 285 if ($cns_plane == 1) 286 { 287 print printCns116431992($cns_plane, $cns_row, $cns_column), 288 "\n"; 289 } 290 } 291 } 292 close IN; 293} 294 295for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 296{ 297 if (defined($cns_plane_used[$cns_plane])) 298 { 299 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 300 { 301 for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 302 { 303 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 304 { 305 $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 306 $uni_plane = $utf32 >> 16; 307 $uni_page = ($utf32 >> 8) & 0xFF; 308 $uni_index = $utf32 & 0xFF; 309 if (!defined($uni_plane_used[$uni_plane]) 310 || !defined($uni_page_used[$uni_plane][$uni_page]) 311 || !defined($uni_map[$uni_plane] 312 [$uni_page] 313 [$uni_index])) 314 { 315 $uni_map[$uni_plane][$uni_page][$uni_index] 316 = ($cns_plane << 16) 317 | ($cns_row << 8) 318 | $cns_column; 319 $uni_plane_used[$uni_plane] = 1; 320 $uni_page_used[$uni_plane][$uni_page] = 1; 321 } 322 else 323 { 324 $cns1 = $uni_map[$uni_plane][$uni_page][$uni_index]; 325 $cns1_plane = $cns1 >> 16; 326 $cns1_row = ($cns1 >> 8) & 0xFF; 327 $cns1_column = $cns1 & 0xFF; 328 329 # Do not map from Unicode to Fictious Character Set 330 # Extensions (Lunde, p. 131), if possible: 331 if ($cns_plane == 3 332 && ($cns_row == 66 && $cns_column > 38 333 || $cns_row > 66)) 334 { 335 print " (", 336 printUtf32($utf32), 337 " to fictious ", 338 printCns116431992($cns_plane, 339 $cns_row, 340 $cns_column), 341 " ignored, favouring ", 342 printCns116431992($cns1_plane, 343 $cns1_row, 344 $cns1_column), 345 ")\n"; 346 } 347 elsif ($cns1_plane == 3 348 && ($cns1_row == 66 && $cns1_column > 38 349 || $cns1_row > 66)) 350 { 351 $uni_map[$uni_plane][$uni_page][$uni_index] 352 = ($cns_plane << 16) 353 | ($cns_row << 8) 354 | $cns_column; 355 print " (", 356 printUtf32($utf32), 357 " to fictious ", 358 printCns116431992($cns1_plane, 359 $cns1_row, 360 $cns1_column), 361 " ignored, favouring ", 362 printCns116431992($cns_plane, 363 $cns_row, 364 $cns_column), 365 ")\n"; 366 } 367 else 368 { 369 print "WARNING! Mapping ", 370 printUtf32($utf32), 371 " to ", 372 printCns116431992($cns1_plane, 373 $cns1_row, 374 $cns1_column), 375 ", NOT ", 376 printCns116431992($cns_plane, 377 $cns_row, 378 $cns_column), 379 "\n"; 380 } 381 } 382 } 383 } 384 } 385 } 386} 387if (defined($uni_plane_used[0]) && defined($uni_page_used[0][0])) 388{ 389 for ($utf32 = 0; $utf32 <= 0x7F; ++$utf32) 390 { 391 if (defined($uni_map[0][0][$uni_index])) 392 { 393 $cns = $uni_map[0][0][$utf32]; 394 die "Mapping " 395 . printUtf32($utf32) 396 . " to " 397 . printCns116431992($cns >> 16, 398 ($cns >> 8) & 0xFF, 399 $cns & 0xFF); 400 } 401 } 402} 403 404$filename = lc($id) . ".tab"; 405open OUT, ("> " . $filename) or die "Cannot write " . $filename; 406 407{ 408 $filename = lc($id). ".pl"; 409 open IN, $filename or die "Cannot read ". $filename; 410 $first = 1; 411 while (<IN>) 412 { 413 if (/^\#!.*$/) 414 { 415 } 416 elsif (/^\#(\*.*)$/) 417 { 418 if ($first == 1) 419 { 420 print OUT "/", $1, "\n"; 421 $first = 0; 422 } 423 else 424 { 425 print OUT " ", substr($1, 0, length($1) - 1), "/\n"; 426 } 427 } 428 elsif (/^\# (.*)$/) 429 { 430 print OUT " *", $1, "\n"; 431 } 432 elsif (/^\#(.*)$/) 433 { 434 print OUT " *", $1, "\n"; 435 } 436 else 437 { 438 goto done; 439 } 440 } 441 done: 442} 443 444print OUT "\n", 445 "#ifndef _SAL_TYPES_H_\n", 446 "#include \"sal/types.h\"\n", 447 "#endif\n", 448 "\n"; 449 450print OUT "static sal_uInt16 const aImpl", $id, "ToUnicodeData[] = {\n"; 451$cns_data_index = 0; 452for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 453{ 454 if (defined($cns_plane_used[$cns_plane])) 455 { 456 $cns_rows = 0; 457 $cns_chars = 0; 458 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 459 { 460 $cns_row_first = -1; 461 for ($cns_column = 1; $cns_column <= 94; ++$cns_column) 462 { 463 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 464 { 465 if ($cns_row_first == -1) 466 { 467 $cns_row_first = $cns_column; 468 } 469 $cns_row_last = $cns_column; 470 } 471 } 472 if ($cns_row_first != -1) 473 { 474 $cns_data_offsets[$cns_plane][$cns_row] = $cns_data_index; 475 ++$cns_rows; 476 print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 477 " */\n"; 478 479 $cns_row_surrogates_first = -1; 480 $cns_row_chars = 0; 481 $cns_row_surrogates = 0; 482 483 print OUT " ", $cns_row_first, " | (", $cns_row_last, 484 " << 8), /* first, last */\n"; 485 ++$cns_data_index; 486 487 print OUT " ", printSpaces(7, 10, $cns_row_first); 488 $bol = 0; 489 for ($cns_column = $cns_row_first; 490 $cns_column <= $cns_row_last; 491 ++$cns_column) 492 { 493 if ($bol == 1) 494 { 495 print OUT " "; 496 $bol = 0; 497 } 498 if (defined($cns_map[$cns_plane][$cns_row][$cns_column])) 499 { 500 $utf32 = $cns_map[$cns_plane][$cns_row][$cns_column]; 501 ++$cns_row_chars; 502 if ($utf32 <= 0xFFFF) 503 { 504 printf OUT "0x%04X,", $utf32; 505 } 506 else 507 { 508 ++$cns_row_surrogates; 509 printf OUT "0x%04X,", 510 (0xD800 | (($utf32 - 0x10000) >> 10)); 511 if ($cns_row_surrogates_first == -1) 512 { 513 $cns_row_surrogates_first = $cns_column; 514 } 515 $cns_row_surrogates_last = $cns_column; 516 } 517 } 518 else 519 { 520 printf OUT "0xffff,"; 521 } 522 ++$cns_data_index; 523 if ($cns_column % 10 == 9) 524 { 525 print OUT "\n"; 526 $bol = 1; 527 } 528 } 529 if ($bol == 0) 530 { 531 print OUT "\n"; 532 } 533 534 if ($cns_row_surrogates_first != -1) 535 { 536 print OUT " ", $cns_row_surrogates_first, 537 ", /* first low-surrogate */\n"; 538 ++$cns_data_index; 539 540 print OUT " ", 541 printSpaces(7, 10, $cns_row_surrogates_first); 542 $bol = 0; 543 for ($cns_column = $cns_row_surrogates_first; 544 $cns_column <= $cns_row_surrogates_last; 545 ++$cns_column) 546 { 547 if ($bol == 1) 548 { 549 print OUT " "; 550 $bol = 0; 551 } 552 $utf32 = 0; 553 if (defined($cns_map[$cns_plane] 554 [$cns_row] 555 [$cns_column])) 556 { 557 $utf32 558 = $cns_map[$cns_plane][$cns_row][$cns_column]; 559 } 560 if ($utf32 <= 0xFFFF) 561 { 562 printf OUT " 0,"; 563 } 564 else 565 { 566 printf OUT "0x%04X,", 567 (0xDC00 568 | (($utf32 - 0x10000) & 0x3FF)); 569 } 570 ++$cns_data_index; 571 if ($cns_column % 10 == 9) 572 { 573 print OUT "\n"; 574 $bol = 1; 575 } 576 } 577 if ($bol == 0) 578 { 579 print OUT "\n"; 580 } 581 } 582 583 $cns_chars += $cns_row_chars; 584 $cns_data_space[$cns_plane][$cns_row] 585 = ($cns_data_index 586 - $cns_data_offsets[$cns_plane][$cns_row]) * 2; 587 $cns_data_used[$cns_plane][$cns_row] 588 = (1 + $cns_row_chars 589 + ($cns_row_surrogates == 0 ? 590 0 : 1 + $cns_row_surrogates)) * 2; 591 } 592 else 593 { 594 print OUT " /* plane ", $cns_plane, ", row ", $cns_row, 595 ": --- */\n"; 596 $cns_data_offsets[$cns_plane][$cns_row] = -1; 597 } 598 } 599 print "cns plane ", 600 $cns_plane, 601 ": rows = ", 602 $cns_rows, 603 ", chars = ", 604 $cns_chars, 605 "\n"; 606 } 607} 608print OUT "};\n\n"; 609 610print OUT "static sal_Int32 const aImpl", $id, "ToUnicodeRowOffsets[] = {\n"; 611for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 612{ 613 if (defined ($cns_plane_used[$cns_plane])) 614 { 615 $cns_rowoffsets_used[$cns_plane] = 0; 616 for ($cns_row = 1; $cns_row <= 94; ++$cns_row) 617 { 618 if ($cns_data_offsets[$cns_plane][$cns_row] == -1) 619 { 620 print OUT " -1, /* plane ", 621 $cns_plane, 622 ", row ", 623 $cns_row, 624 " */\n"; 625 } 626 else 627 { 628 print OUT " ", 629 $cns_data_offsets[$cns_plane][$cns_row], 630 ", /* plane ", 631 $cns_plane, 632 ", row ", 633 $cns_row, 634 "; ", 635 printStats($cns_data_used[$cns_plane][$cns_row], 636 $cns_data_space[$cns_plane][$cns_row]), 637 " */\n"; 638 $cns_rowoffsets_used[$cns_plane] += 4; 639 } 640 } 641 } 642 else 643 { 644 print OUT " /* plane ", $cns_plane, ": --- */\n"; 645 } 646} 647print OUT "};\n\n"; 648 649print OUT "static sal_Int32 const aImpl", 650 $id, 651 "ToUnicodePlaneOffsets[] = {\n"; 652$cns_row_offset = 0; 653for ($cns_plane = 1; $cns_plane <= 16; ++$cns_plane) 654{ 655 if (defined ($cns_plane_used[$cns_plane])) 656 { 657 print OUT " ", 658 $cns_row_offset++, 659 " * 94, /* plane ", 660 $cns_plane, 661 "; ", 662 printStats($cns_rowoffsets_used[$cns_plane], 94 * 4), 663 " */\n"; 664 } 665 else 666 { 667 print OUT " -1, /* plane ", $cns_plane, " */\n"; 668 } 669} 670print OUT "};\n\n"; 671 672print OUT "static sal_uInt8 const aImplUnicodeTo", $id, "Data[] = {\n"; 673$uni_data_index = 0; 674for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 675{ 676 if (defined($uni_plane_used[$uni_plane])) 677 { 678 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 679 { 680 if (defined($uni_page_used[$uni_plane][$uni_page])) 681 { 682 $uni_data_offsets[$uni_plane][$uni_page] = $uni_data_index; 683 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 684 " */\n"; 685 686 $uni_page_first = -1; 687 for ($uni_index = 0; $uni_index <= 255; ++$uni_index) 688 { 689 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 690 { 691 if ($uni_page_first == -1) 692 { 693 $uni_page_first = $uni_index; 694 } 695 $uni_page_last = $uni_index; 696 } 697 } 698 699 $uni_data_used[$uni_plane][$uni_page] = 0; 700 701 print OUT " ", $uni_page_first, ", ", $uni_page_last, 702 ", /* first, last */\n"; 703 $uni_data_index += 2; 704 $uni_data_used[$uni_plane][$uni_page] += 2; 705 706 print OUT " ", printSpaces(9, 8, $uni_page_first); 707 $bol = 0; 708 for ($uni_index = $uni_page_first; 709 $uni_index <= $uni_page_last; 710 ++$uni_index) 711 { 712 if ($bol == 1) 713 { 714 print OUT " "; 715 $bol = 0; 716 } 717 if (defined($uni_map[$uni_plane][$uni_page][$uni_index])) 718 { 719 $cns = $uni_map[$uni_plane][$uni_page][$uni_index]; 720 printf OUT "%2d,%2d,%2d,", 721 $cns >> 16, 722 $cns >> 8 & 0xFF, 723 $cns & 0xFF; 724 $uni_data_used[$uni_plane][$uni_page] += 3; 725 } 726 else 727 { 728 print OUT " 0, 0, 0,"; 729 } 730 $uni_data_index += 3; 731 if ($uni_index % 8 == 7) 732 { 733 print OUT "\n"; 734 $bol = 1; 735 } 736 } 737 if ($bol == 0) 738 { 739 print OUT "\n"; 740 } 741 742 $uni_data_space[$uni_plane][$uni_page] 743 = $uni_data_index 744 - $uni_data_offsets[$uni_plane][$uni_page]; 745 } 746 else 747 { 748 $uni_data_offsets[$uni_plane][$uni_page] = -1; 749 print OUT " /* plane ", $uni_plane, ", page ", $uni_page, 750 ": --- */\n"; 751 } 752 } 753 } 754 else 755 { 756 print OUT " /* plane ", $uni_plane, ": --- */\n"; 757 } 758} 759print OUT "};\n\n"; 760 761print OUT "static sal_Int32 const aImplUnicodeTo", $id, "PageOffsets[] = {\n"; 762for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 763{ 764 if (defined($uni_plane_used[$uni_plane])) 765 { 766 $uni_pageoffsets_used[$uni_plane] = 0; 767 $uni_data_used_sum[$uni_plane] = 0; 768 $uni_data_space_sum[$uni_plane] = 0; 769 for ($uni_page = 0; $uni_page <= 255; ++$uni_page) 770 { 771 $offset = $uni_data_offsets[$uni_plane][$uni_page]; 772 if ($offset == -1) 773 { 774 print OUT " -1, /* plane ", 775 $uni_plane, 776 ", page ", 777 $uni_page, 778 " */\n"; 779 } 780 else 781 { 782 print OUT " ", 783 $offset, 784 ", /* plane ", 785 $uni_plane, 786 ", page ", 787 $uni_page, 788 "; ", 789 printStats($uni_data_used[$uni_plane][$uni_page], 790 $uni_data_space[$uni_plane][$uni_page]), 791 " */\n"; 792 $uni_pageoffsets_used[$uni_plane] += 4; 793 $uni_data_used_sum[$uni_plane] 794 += $uni_data_used[$uni_plane][$uni_page]; 795 $uni_data_space_sum[$uni_plane] 796 += $uni_data_space[$uni_plane][$uni_page]; 797 } 798 } 799 } 800 else 801 { 802 print OUT " /* plane ", $uni_plane, ": --- */\n"; 803 } 804} 805print OUT "};\n\n"; 806 807print OUT "static sal_Int32 const aImplUnicodeTo", 808 $id, 809 "PlaneOffsets[] = {\n"; 810$uni_page_offset = 0; 811$uni_planeoffsets_used = 0; 812$uni_pageoffsets_used_sum = 0; 813$uni_pageoffsets_space_sum = 0; 814$uni_data_used_sum2 = 0; 815$uni_data_space_sum2 = 0; 816for ($uni_plane = 0; $uni_plane <= 16; ++$uni_plane) 817{ 818 if (defined ($uni_plane_used[$uni_plane])) 819 { 820 print OUT " ", 821 $uni_page_offset++, 822 " * 256, /* plane ", 823 $uni_plane, 824 "; ", 825 printStats($uni_pageoffsets_used[$uni_plane], 256 * 4), 826 ", ", 827 printStats($uni_data_used_sum[$uni_plane], 828 $uni_data_space_sum[$uni_plane]), 829 " */\n"; 830 $uni_planeoffsets_used += 4; 831 $uni_pageoffsets_used_sum += $uni_pageoffsets_used[$uni_plane]; 832 $uni_pageoffsets_space_sum += 256 * 4; 833 $uni_data_used_sum2 += $uni_data_used_sum[$uni_plane]; 834 $uni_data_space_sum2 += $uni_data_space_sum[$uni_plane]; 835 } 836 else 837 { 838 print OUT " -1, /* plane ", $uni_plane, " */\n"; 839 } 840} 841print OUT " /* ", 842 printStats($uni_planeoffsets_used, 17 * 4), 843 ", ", 844 printStats($uni_pageoffsets_used_sum, $uni_pageoffsets_space_sum), 845 ", ", 846 printStats($uni_data_used_sum2, $uni_data_space_sum2), 847 " */\n};\n"; 848 849close OUT; 850 851print "Unihan.txt = ", $count_Unihan_txt, 852 ", CNS11643.TXT = ", $count_CNS11643_TXT, 853 ", Uni2CNS = ", $count_Uni2CNS, 854 ", total = ", 855 ($count_Unihan_txt + $count_CNS11643_TXT + $count_Uni2CNS), 856 "\n"; 857