xref: /trunk/main/i18npool/source/isolang/langid.pl (revision 7e90fac2)
1: # -*- perl -*-  vim: ft=perl
2eval 'exec perl -w -S $0 ${1+"$@"}'
3if 0;
4#**************************************************************
5#
6#  Licensed to the Apache Software Foundation (ASF) under one
7#  or more contributor license agreements.  See the NOTICE file
8#  distributed with this work for additional information
9#  regarding copyright ownership.  The ASF licenses this file
10#  to you under the Apache License, Version 2.0 (the
11#  "License"); you may not use this file except in compliance
12#  with the License.  You may obtain a copy of the License at
13#
14#    http://www.apache.org/licenses/LICENSE-2.0
15#
16#  Unless required by applicable law or agreed to in writing,
17#  software distributed under the License is distributed on an
18#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
19#  KIND, either express or implied.  See the License for the
20#  specific language governing permissions and limitations
21#  under the License.
22#
23#**************************************************************
24
25
26
27# See Usage() below or invoke without arguments for short instructions.
28# For long instructions use the source, Luke ;-)
29
30use strict;
31
32sub Usage()
33{
34    print STDERR
35        "\n",
36        "langid - a hackish utility to lookup lang.h language defines and LangIDs,\n",
37        "isolang.cxx ISO639/ISO3166 mapping, locale data files, langtab.src language\n",
38        "listbox entries, postset.mk, file_ooo.scp registry name, globals.pm and\n",
39        "msi-encodinglist.txt\n\n",
40
41        "Usage: $0 [--single] {language string} | {LangID} | {primarylanguage sublanguage} | {language-country}\n\n",
42
43        "A language string will be used as a generic string match in all searched files.\n",
44        "You may enclose the language string in word delimiters,\n",
45        "e.g. \\blanguage_german\\b for a specific match.\n",
46        "If the language string expression matches more than one define,\n",
47        "e.g. as in 'german', all matching defines will be processed.\n",
48        "If the language string does not match a define or an identifier in\n",
49        "langtab.src, a generic string match of the listbox entries will be tried.\n\n",
50
51        "Numeric values of LangID,primarylanguage,sublanguage can be given\n",
52        "decimal, hexagesimal (leading 0x), octal (leading 0) or binary (leading 0b).\n",
53        "The exact language_define of an exact match will be used in remaining lookups.\n\n",
54
55        "A language-country pair will lookup a xx-YY mapping from isolang.cxx,\n",
56        "for example: 'en-US' or 'de-' or '-CH',\n",
57        "xx and YY can be given case insensitive, will be lowered-uppered internally,\n",
58        "and xx and YY themselfs may be regular expressions.\n",
59        "Also here a list of matches will be processed.\n\n",
60
61        "If option --single is given, only the first match will be processed.\n\n";
62}
63
64my $SOLARVERSION = $ENV{"SOLARVERSION"};
65my $INPATH       = $ENV{"INPATH"};
66my $SRC_ROOT     = $ENV{"SRC_ROOT"};
67my $UPDMINOREXT  = $ENV{"UPDMINOREXT"};
68if (!defined($SOLARVERSION) || !defined($INPATH) || !defined($SRC_ROOT))
69{
70    print "\nNeed \$SOLARVERSION, \$INPATH and \$SRC_ROOT, please set your OOo environment!\n";
71    Usage();
72    exit 1;
73}
74if (!defined($UPDMINOREXT)) {
75    $UPDMINOREXT  = '';
76}
77my $SOLENVINC = "$SOLARVERSION/$INPATH/inc$UPDMINOREXT";
78
79my $LANGUAGE_MASK_PRIMARY = 0x03ff;
80
81sub getPrimaryLanguage($)
82{
83    my($lcid) = @_;
84    return $lcid & $LANGUAGE_MASK_PRIMARY;
85}
86
87sub getSubLanguage($)
88{
89    my($lcid) = @_;
90    return $lcid >> 10;
91}
92
93sub makeLangID($$)
94{
95    my( $sub, $pri) = @_;
96    return ($sub << 10) | $pri;
97}
98
99
100sub grepFile($$$$@)
101{
102    my( $regex, $path, $module, $name, @addregex) = @_;
103    my @result;
104    my $found = 0;
105    my $areopen = 0;
106    my $arecloser = '';
107    my $file;
108    # Try module under current working directory first to catch local
109    # modifications. A Not yet delivered lang.h is a special case.
110    if ("$path/$module/$name" eq "$SOLENVINC/i18npool/lang.h") {
111        $file = "./$module/inc/i18npool/lang.h"; }
112    else {
113        $file = "./$module/$name"; }
114    if (!($found = open( IN, $file)))
115    {
116        # Then with the given path.
117        $file = "$path/$module/$name";
118        if (!($found = open( IN, $file)))
119        {
120            print "No $file\n";
121            $file = "$path/$module.lnk/$name";
122            if (!($found = open( IN, $file))) {
123                print "No $file.\n";
124                $file = "$path/$module.link/$name";
125                if (!($found = open( IN, $file))) {
126                    print "No $file either.\n"; }
127            }
128        }
129    }
130    if ($found)
131    {
132        $found = 0;
133        while (my $line = <IN>)
134        {
135            if ($line =~ /$regex/)
136            {
137                if (!$found)
138                {
139                    $found = 1;
140                    print "$file:\n";
141                }
142                chomp( $line);
143                print "$line\n";
144                push( @result, $line);
145            }
146            elsif (@addregex)
147            {
148                # By convention first element is opener, second element is closer.
149                if (!$areopen)
150                {
151                    if ($line =~ /$addregex[0]/)
152                    {
153                        $areopen = 1;
154                        $arecloser = $addregex[1];
155                    }
156                }
157                if ($areopen)
158                {
159                    for (my $i = 2; $i < @addregex; ++$i)
160                    {
161                        if ($line =~ /$addregex[$i]/)
162                        {
163                            if (!$found)
164                            {
165                                $found = 1;
166                                print "$file:\n";
167                            }
168                            chomp( $line);
169                            print "$line\n";
170                            push( @result, $line);
171                        }
172                    }
173                    if ($line =~ /$arecloser/)
174                    {
175                        $areopen = 0;
176                    }
177                }
178            }
179        }
180        close( IN);
181    }
182    if (!$found) {
183        print "Not found in $file\n";
184        #print "Not found in $file for $regex @addregex\n";
185    }
186    return @result;
187}
188
189
190sub main()
191{
192    my( $lcid, @parts, $grepdef, $options, $single);
193    $grepdef = 0;
194    $single = 0;
195    for ($options = 0; $options < @ARGV && $ARGV[$options] =~ /^--/; ++$options)
196    {
197        if ($ARGV[$options] eq '--single') { $single = 1; }
198        else { print "Unknown option: $ARGV[$options]\n"; }
199    }
200    if (@ARGV == 1 + $options)
201    {
202        # 0x hex, 0b bin, 0 oct
203        if ($ARGV[$options] =~ /^0/) {
204            $lcid = oct( $ARGV[0]); }
205        elsif ($ARGV[$options] =~ /^[0-9]/) {
206            $lcid = $ARGV[$options]; }
207        else
208        {
209            $grepdef = $ARGV[$options];
210            $lcid = 0;
211        }
212        $parts[0] = getPrimaryLanguage( $lcid);
213        $parts[1] = getSubLanguage( $lcid);
214    }
215    elsif (@ARGV == 2 + $options)
216    {
217        for (my $i = $options; $i < 2 + $options; ++$i)
218        {
219            if ($ARGV[$i] =~ /^0/) {
220                $parts[$i] = oct( $ARGV[$i]); }
221            else {
222                $parts[$i] = $ARGV[$i]; }
223        }
224        $lcid = makeLangID( $parts[1], $parts[0]);
225    }
226    else
227    {
228        Usage();
229        return 1;
230    }
231    my $modifier = "(?i)";
232    my (@resultlist, @greplist, $result);
233    # If no string was given on the command line, but value(s) were, lookup the
234    # LangID value to obtain the define identifier.
235    if ($grepdef)
236    {
237        # #define LANGUAGE_AFRIKAANS                  0x0436
238        @resultlist = grepFile(
239            $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
240            $SOLENVINC, "i18npool", "lang.h", ());
241    }
242    else
243    {
244        printf( "LangID: 0x%04X (dec %d), primary: 0x%03x, sub 0x%02x\n", $lcid,
245                $lcid, $parts[0], $parts[1]);
246        my $buf = sprintf( "0x%04X", $lcid);
247        @resultlist = grepFile(
248            '^\s*#\s*define\s+\w+\s+' . $buf,
249            $SOLENVINC, "i18npool", "lang.h", ());
250    }
251    for $result (@resultlist)
252    {
253        # #define LANGUAGE_AFRIKAANS                  0x0436
254        if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
255        {
256            push( @greplist, '\b' . $1 . '\b');
257            $modifier = "";     # complete identifier now case sensitive
258            if ($single) {
259                last; }
260        }
261    }
262    # If the string given is of the form xx-yy lookup a language,country pair
263    # to obtain the define identifier. xx and yy themselfs may be regexps.
264    # xx- is a short form for 'xx-.*' and -yy a short form for '.*-yy'
265    if ($grepdef =~ /^(.*)-$/) {
266        $grepdef = $1 . "-.*"; }
267    if ($grepdef =~ /^-(.*)$/) {
268        $grepdef = ".*-" . $1; }
269    if ($grepdef =~ /^(.*)-(.*)$/)
270    {
271        my $lang = $1;
272        my $coun = $2;
273        $lang = lc($lang);
274        $coun = uc($coun);
275        #     { LANGUAGE_AFRIKAANS,                   "af", "ZA" },
276        @resultlist = grepFile(
277            '^\s*\{\s*\w+\s*,\s*\"' . $lang . '\"\s*,\s*\"'  . $coun . '\"\s*\}\s*,',
278            "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
279        for $result (@resultlist)
280        {
281            if ($result =~ /^\s*\{\s*(\w+)\s*,\s*\"\w+\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
282            {
283                push( @greplist, '\b' . $1 . '\b');
284                $modifier = "";     # complete identifier now case sensitive
285                if ($single) {
286                    last; }
287            }
288        }
289        $grepdef = 0;
290    }
291    if (!@greplist && $grepdef) {
292        push( @greplist, $grepdef); }
293    for $grepdef (@greplist)
294    {
295        print "\nUsing: " . $grepdef . "\n";
296
297        # Decimal LCID, was needed for Langpack.ulf but isn't used anymore,
298        # keep just in case we'd need it again.
299        # #define LANGUAGE_AFRIKAANS                  0x0436
300        @resultlist = grepFile(
301            $modifier . '^\s*#\s*define\s+[A-Z_]*' . $grepdef,
302            $SOLENVINC, "i18npool", "lang.h", ());
303        my @lcidlist;
304        for $result (@resultlist)
305        {
306            # #define LANGUAGE_AFRIKAANS                  0x0436
307            if ($result =~ /^\s*#\s*define\s+(\w+)\s+(0x[0-9a-fA-F]+)/)
308            {
309                push( @lcidlist, oct( $2));
310            }
311        }
312
313        #     { LANGUAGE_AFRIKAANS,                   "af", "ZA" },
314        @resultlist = grepFile(
315            $modifier . '^\s*\{\s*.*' . $grepdef . '.*\s*,\s*\".*\"\s*,\s*\".*\"\s*\}\s*,',
316            "$SRC_ROOT", "i18npool", "source/isolang/isolang.cxx", ());
317
318        my @langcoungreplist;
319        for $result (@resultlist)
320        {
321            if ($result =~ /^\s*\{\s*\w+\s*,\s*\"(\w+)\"\s*,\s*\"(\w+)?\"\s*\}\s*,/)
322            {
323                my $lang = $1;
324                my $coun = $2;
325                my $loca;
326                if ($coun)
327                {
328                    $loca = $lang . "_" . $coun;
329                    push( @langcoungreplist, '\b' . $lang . '\b(-' . $coun . ')?');
330                }
331                else
332                {
333                    $loca = $lang;
334                    $coun = "";
335                    push( @langcoungreplist, '\b' . $lang . '\b');
336                }
337                my $file = "$SRC_ROOT/i18npool/source/localedata/data/$loca.xml";
338                my $found;
339                if (!($found = open( LD, $file)))
340                {
341                    $file = "$SRC_ROOT/i18npool.lnk/source/localedata/data/$loca.xml";
342                    if (!($found = open( LD, $file)))
343                    {
344                        $file = "$SRC_ROOT/i18npool.link/source/localedata/data/$loca.xml";
345                        $found = open( LD, $file);
346                    }
347                }
348                if ($found)
349                {
350                    print "Found $file:\n";
351                    my $on = 0;
352                    while (my $line = <LD>)
353                    {
354                        if ($line =~ /<(Language|Country)>/) {
355                            $on = 1; }
356                        if ($on) {
357                            print $line; }
358                        if ($line =~ /<\/(Language|Country)>/) {
359                            $on = 0; }
360                    }
361                    close( LD);
362                }
363                else {
364                    print "No $SRC_ROOT/i18npool/source/localedata/data/$loca.xml\n"; }
365            }
366        }
367
368        #         case LANGUAGE_ARABIC:
369        grepFile(
370            $modifier . '^\s*case\s*.*' . $grepdef . '.*\s*:',
371            "$SRC_ROOT", "i18npool", "source/isolang/mslangid.cxx", ());
372
373        # With CWS 'langstatusbar' the language listbox resource file gets a new location.
374        my $module = "svx";
375        my $name = "source/dialog/langtab.src";
376        if (!(-e "$SRC_ROOT/$module/$name")) {
377            $module = "svtools";
378            $name = "source/misc/langtab.src";
379        }
380        #         < "Afrikaans" ; LANGUAGE_AFRIKAANS ; > ;
381        # lookup define
382        @resultlist = grepFile(
383            $modifier . '^\s*<\s*\".*\"\s*;\s*.*' . $grepdef . '.*\s*;\s*>\s*;',
384            "$SRC_ROOT", $module, $name, ());
385        # lookup string
386        if (!@resultlist) {
387            grepFile(
388                $modifier . '^\s*<\s*\".*' . $grepdef . '.*\"\s*;\s*.*\s*;\s*>\s*;',
389                "$SRC_ROOT", $module, $name, ()); }
390
391        for my $langcoun (@langcoungreplist)
392        {
393            # Name (xxx) = "/registry/spool/org/openoffice/Office/Common-ctl.xcu";
394            grepFile(
395                '^\s*Name\s*\(' . $langcoun . '\)\s*=',
396                "$SRC_ROOT", "scp2", "source/ooo/file_ooo.scp", ());
397
398            # completelangiso=af ar as-IN ... zu
399            grepFile(
400                '^\s*completelangiso\s*=\s*(\s*([a-z]{2,3})(-[A-Z][A-Z])?)*' . $langcoun . '',
401                "$SRC_ROOT", "solenv", "inc/postset.mk",
402                # needs a duplicated pair of backslashes to produce a literal \\
403                ('^\s*completelangiso\s*=', '^\s*$', '^\s*' . $langcoun . '\s*\\\\*$'));
404
405            # @noMSLocaleLangs = ( "br", "bs", ... )
406            grepFile(
407                '^\s*@noMSLocaleLangs\s*=\s*\(\s*(\s*"([a-z]{2,3})(-[A-Z][A-Z])?"\s*,?)*' . $langcoun . '',
408                "$SRC_ROOT", "solenv", "bin/modules/installer/globals.pm",
409                ('^\s*@noMSLocaleLangs\s*=', '\)\s*$', '"' . $langcoun . '"'));
410
411            # af    1252  1078   # Afrikaans
412            grepFile(
413                '^\s*' . $langcoun . '',
414                "$SRC_ROOT", "setup_native", "source/win32/msi-encodinglist.txt", ());
415        }
416    }
417    return 0;
418}
419
420main();
421