1: # -*- perl -*- vim: ft=perl 2eval 'exec perl -w -S $0 ${1+"$@"}' 3if 0; 4#************************************************************************* 5# 6# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7# 8# Copyright 2000, 2010 Oracle and/or its affiliates. 9# 10# OpenOffice.org - a multi-platform office productivity suite 11# 12# This file is part of OpenOffice.org. 13# 14# OpenOffice.org is free software: you can redistribute it and/or modify 15# it under the terms of the GNU Lesser General Public License version 3 16# only, as published by the Free Software Foundation. 17# 18# OpenOffice.org is distributed in the hope that it will be useful, 19# but WITHOUT ANY WARRANTY; without even the implied warranty of 20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21# GNU Lesser General Public License version 3 for more details 22# (a copy is included in the LICENSE file that accompanied this code). 23# 24# You should have received a copy of the GNU Lesser General Public License 25# version 3 along with OpenOffice.org. If not, see 26# <http://www.openoffice.org/license.html> 27# for a copy of the LGPLv3 License. 28# 29#************************************************************************* 30 31# See Usage() below or invoke without arguments for short instructions. 32# For long instructions use the source, Luke ;-) 33 34use strict; 35 36sub Usage() 37{ 38 print STDERR 39 "\n", 40 "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n", 41 "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n", 42 "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n", 43 "msi-encodinglist.txt\n\n", 44 45 "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n", 46 47 "A language string will be used as a generic string match in all searched files.\n", 48 "You may enclose the language string in word delimiters,\n", 49 "e.g. \\blanguage_german\\b for a specific match.\n", 50 "If the language string expression matches more than one define,\n", 51 "e.g. as in 'german', all matching defines will be processed.\n", 52 "If the language string does not match a define or an identifier in\n", 53 "langtab.src, a generic string match of the listbox entries will be tried.\n\n", 54 55 "Numeric values of LangID,primarylanguage,sublanguage can be given\n", 56 "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n", 57 "The exact language_define of an exact match will be used in remaining lookups.\n\n", 58 59 "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n", 60 "for example: 'en-US' or 'de-' or '-CH',\n", 61 "xx and YY can be given case insensitive, will be lowered-uppered internally,\n", 62 "and xx and YY themselfs may be regular expressions.\n", 63 "Also here a list of matches will be processed.\n\n", 64 65 "If option --single is given, only the first match will be processed.\n\n"; 66} 67 68my $SOLARVERSION = $ENV{"SOLARVERSION"}; 69my $INPATH = $ENV{"INPATH"}; 70my $SRC_ROOT = $ENV{"SRC_ROOT"}; 71my $UPDMINOREXT = $ENV{"UPDMINOREXT"}; 72if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT)) 73{ 74 print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n"; 75 Usage(); 76 exit 1; 77} 78if (!defined($UPDMINOREXT)) { 79 $UPDMINOREXT = ''; 80} 81my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT"; 82 83my $LANGUAGE_MASK_PRIMARY = 0x03ff; 84 85sub getPrimaryLanguage($) 86{ 87 my($lcid) = @_; 88 return $lcid & $LANGUAGE_MASK_PRIMARY; 89} 90 91sub getSubLanguage($) 92{ 93 my($lcid) = @_; 94 return $lcid >> 10; 95} 96 97sub makeLangID($$) 98{ 99 my( $sub, $pri) = @_; 100 return ($sub << 10) | $pri; 101} 102 103 104sub grepFile($$$$@) 105{ 106 my( $regex, $path, $module, $name, @addregex) = @_; 107 my @result; 108 my $found = 0; 109 my $areopen = 0; 110 my $arecloser = ''; 111 my $file; 112 # Try module under current working directory first to catch local 113 # modifications. A Not yet delivered lang.h is a special case. 114 if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") { 115 $file = "./$module/inc/i18npool/lang.h"; } 116 else { 117 $file = "./$module/$name"; } 118 if (!($found = open( IN, $file))) 119 { 120 # Then with the given path. 121 $file = "$path/$module/$name"; 122 if (!($found = open( IN, $file))) 123 { 124 print "No $file\n"; 125 $file = "$path/$module.lnk/$name"; 126 if (!($found = open( IN, $file))) { 127 print "No $file.\n"; 128 $file = "$path/$module.link/$name"; 129 if (!($found = open( IN, $file))) { 130 print "No $file either.\n"; } 131 } 132 } 133 } 134 if ($found) 135 { 136 $found = 0; 137 while (my $line = <IN>) 138 { 139 if ($line =~ /$regex/) 140 { 141 if (!$found) 142 { 143 $found = 1; 144 print "$file:\n"; 145 } 146 chomp( $line); 147 print "$line\n"; 148 push( @result, $line); 149 } 150 elsif (@addregex) 151 { 152 # By convention first element is opener, second element is closer. 153 if (!$areopen) 154 { 155 if ($line =~ /$addregex[0]/) 156 { 157 $areopen = 1; 158 $arecloser = $addregex[1]; 159 } 160 } 161 if ($areopen) 162 { 163 for (my $i = 2; $i < @addregex; ++$i) 164 { 165 if ($line =~ /$addregex[$i]/) 166 { 167 if (!$found) 168 { 169 $found = 1; 170 print "$file:\n"; 171 } 172 chomp( $line); 173 print "$line\n"; 174 push( @result, $line); 175 } 176 } 177 if ($line =~ /$arecloser/) 178 { 179 $areopen = 0; 180 } 181 } 182 } 183 } 184 close( IN); 185 } 186 if (!$found) { 187 print "Not found in $file\n"; 188 #print "Not found in $file for $regex @addregex\n"; 189 } 190 return @result; 191} 192 193 194sub main() 195{ 196 my( $lcid, @parts, $grepdef, $options, $single); 197 $grepdef = 0; 198 $single = 0; 199 for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options) 200 { 201 if ($ARGV[$options] eq '--single') { $single = 1; } 202 else { print "Unknown option: $ARGV[$options]\n"; } 203 } 204 if (@ARGV == 1 + $options) 205 { 206 # 0x hex, 0b bin, 0 oct 207 if ($ARGV[$options] =~ /^0/) { 208 $lcid = oct( $ARGV[0]); } 209 elsif ($ARGV[$options] =~ /^[0-9]/) { 210 $lcid = $ARGV[$options]; } 211 else 212 { 213 $grepdef = $ARGV[$options]; 214 $lcid = 0; 215 } 216 $parts[0] = getPrimaryLanguage( $lcid); 217 $parts[1] = getSubLanguage( $lcid); 218 } 219 elsif (@ARGV == 2 + $options) 220 { 221 for (my $i = $options; $i < 2 + $options; ++$i) 222 { 223 if ($ARGV[$i] =~ /^0/) { 224 $parts[$i] = oct( $ARGV[$i]); } 225 else { 226 $parts[$i] = $ARGV[$i]; } 227 } 228 $lcid = makeLangID( $parts[1], $parts[0]); 229 } 230 else 231 { 232 Usage(); 233 return 1; 234 } 235 my $modifier = "(?i)"; 236 my (@resultlist, @greplist, $result); 237 # If no string was given on the command line, but value(s) were, lookup the 238 # LangID value to obtain the define identifier. 239 if ($grepdef) 240 { 241 # #define LANGUAGE_AFRIKAANS 0x0436 242 @resultlist = grepFile( 243 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, 244 $SOLENVINC, "i18npool", "lang.h", ()); 245 } 246 else 247 { 248 printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid, 249 $lcid, $parts[0], $parts[1]); 250 my $buf = sprintf( "0x%04X", $lcid); 251 @resultlist = grepFile( 252 '^\s*#\s*define\s+\w+\s+' . $buf, 253 $SOLENVINC, "i18npool", "lang.h", ()); 254 } 255 for $result (@resultlist) 256 { 257 # #define LANGUAGE_AFRIKAANS 0x0436 258 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) 259 { 260 push( @greplist, '\b' . $1 . '\b'); 261 $modifier = ""; # complete identifier now case sensitive 262 if ($single) { 263 last; } 264 } 265 } 266 # If the string given is of the form xx-yy lookup a language,country pair 267 # to obtain the define identifier. xx and yy themselfs may be regexps. 268 # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy' 269 if ($grepdef =~ /^(.*)-$/) { 270 $grepdef = $1 . "-.*"; } 271 if ($grepdef =~ /^-(.*)$/) { 272 $grepdef = ".*-" . $1; } 273 if ($grepdef =~ /^(.*)-(.*)$/) 274 { 275 my $lang = $1; 276 my $coun = $2; 277 $lang = lc($lang); 278 $coun = uc($coun); 279 # { LANGUAGE_AFRIKAANS, "af", "ZA" }, 280 @resultlist = grepFile( 281 '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"' . $coun . '\"\s*\}\s*,', 282 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); 283 for $result (@resultlist) 284 { 285 if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) 286 { 287 push( @greplist, '\b' . $1 . '\b'); 288 $modifier = ""; # complete identifier now case sensitive 289 if ($single) { 290 last; } 291 } 292 } 293 $grepdef = 0; 294 } 295 if (!@greplist && $grepdef) { 296 push( @greplist, $grepdef); } 297 for $grepdef (@greplist) 298 { 299 print "\nUsing: " . $grepdef . "\n"; 300 301 # Decimal LCID, was needed for Langpack.ulf but isn't used anymore, 302 # keep just in case we'd need it again. 303 # #define LANGUAGE_AFRIKAANS 0x0436 304 @resultlist = grepFile( 305 $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef, 306 $SOLENVINC, "i18npool", "lang.h", ()); 307 my @lcidlist; 308 for $result (@resultlist) 309 { 310 # #define LANGUAGE_AFRIKAANS 0x0436 311 if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/) 312 { 313 push( @lcidlist, oct( $2)); 314 } 315 } 316 317 # { LANGUAGE_AFRIKAANS, "af", "ZA" }, 318 @resultlist = grepFile( 319 $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,', 320 "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ()); 321 322 my @langcoungreplist; 323 for $result (@resultlist) 324 { 325 if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/) 326 { 327 my $lang = $1; 328 my $coun = $2; 329 my $loca; 330 if ($coun) 331 { 332 $loca = $lang . "_" . $coun; 333 push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?'); 334 } 335 else 336 { 337 $loca = $lang; 338 $coun = ""; 339 push( @langcoungreplist, '\b' . $lang . '\b'); 340 } 341 my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml"; 342 my $found; 343 if (!($found = open( LD, $file))) 344 { 345 $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml"; 346 if (!($found = open( LD, $file))) 347 { 348 $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml"; 349 $found = open( LD, $file); 350 } 351 } 352 if ($found) 353 { 354 print "Found $file:\n"; 355 my $on = 0; 356 while (my $line = <LD>) 357 { 358 if ($line =~ /<(Language|Country)>/) { 359 $on = 1; } 360 if ($on) { 361 print $line; } 362 if ($line =~ /<\/(Language|Country)>/) { 363 $on = 0; } 364 } 365 close( LD); 366 } 367 else { 368 print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; } 369 } 370 } 371 372 # case LANGUAGE_ARABIC: 373 grepFile( 374 $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:', 375 "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ()); 376 377 # With CWS 'langstatusbar' the language listbox resource file gets a new location. 378 my $module = "svx"; 379 my $name = "source/dialog/langtab.src"; 380 if (!(-e "$SRC_ROOT/$module/$name")) { 381 $module = "svtools"; 382 $name = "source/misc/langtab.src"; 383 } 384 # < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ; 385 # lookup define 386 @resultlist = grepFile( 387 $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;', 388 "$SRC_ROOT", $module, $name, ()); 389 # lookup string 390 if (!@resultlist) { 391 grepFile( 392 $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;', 393 "$SRC_ROOT", $module, $name, ()); } 394 395 for my $langcoun (@langcoungreplist) 396 { 397 # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu"; 398 grepFile( 399 '^\s*Name\s*\(' . $langcoun . '\)\s*=', 400 "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ()); 401 402 # completelangiso=af ar as-IN ... zu 403 grepFile( 404 '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '', 405 "$SRC_ROOT", "solenv", "inc/postset.mk", 406 # needs a duplicated pair of backslashes to produce a literal \\ 407 ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$')); 408 409 # @noMSLocaleLangs = ( "br", "bs", ... ) 410 grepFile( 411 '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '', 412 "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm", 413 ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"')); 414 415 # af 1252 1078 # Afrikaans 416 grepFile( 417 '^\s*' . $langcoun . '', 418 "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ()); 419 } 420 } 421 return 0; 422} 423 424main(); 425