1#**************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20#**************************************************************
21
22package ExtensionsLst;
23
24use File::Spec;
25use LWP::UserAgent;
26use Digest::MD5;
27
28use base 'Exporter';
29our @EXPORT = qw(DownloadExtensions GetExtensionList);
30
31
32=head1 NAME
33
34    ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file.
35
36=head1 SYNOPSIS
37
38    For downloading extensions during build setup:
39
40    use ExtensionsLst;
41    ExtensionsLst::DownloadExtensions();
42
43    For including extensions into the pack set:
44
45    use ExtensionsLst;
46    ExtensionsLst::GetExtensionList(@language_list);
47
48=head1 DESCRIPTION
49
50    The contents of the extensions.lst file are used at two times in
51    the process of building pack sets.
52
53    Once at the beginning right after configure is run the
54    DownloadExtensions() function determines the list of extensions
55    that are not present locally and downloads them.
56
57    The second time is after all modules are built (and the locally
58    built extensions are present) and the pack sets are created.  For
59    every language (or sets of lanugages) a set of extensions is
60    collected and included into the pack set.
61
62    The content of the extensions.lst file is ignored when the --with-extensions option is given to configure.
63
64=cut
65
66
67# Number of the line in extensions.lst that is currently being processed.
68my $LineNo = 0;
69
70# Set to 1 to get a more verbose output, the default is 0.
71my $Debug = 0;
72
73
74=head3 Prepare
75    Check that some environment variables are properly set and then return the file name
76    of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'.
77=cut
78sub Prepare ()
79{
80    die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'};
81    die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'};
82    die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'};
83    die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'};
84
85    my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst");
86    die "can not read file $candidate" if ! -r $candidate;
87    return $candidate;
88}
89
90
91
92=head 3 EvaluateOperator
93    Evaluate a single test statement like 'language = en.*'.
94    Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'.
95    Therefore the right hand side may be a perl regexp.  It is prefixed with '^'.
96
97    Other operators are at the moment only supported in the way that they are evaluated via eval().
98=cut
99sub EvaluateOperator ($$$)
100{
101    my ($left,$operator,$right) = @_;
102
103    my $result;
104
105    if ($operator =~ /^(=|==|eq)$/)
106    {
107        if ($left =~ /^$right$/)
108        {
109            $result = 1;
110        }
111        else
112        {
113            $result = 0;
114        }
115    }
116    elsif (eval($left.$operator.$right))
117    {
118        $result = 1;
119    }
120    else
121    {
122        $result = 0;
123    }
124
125    return $result;
126}
127
128
129
130
131=head EvaluateTerm
132    Evaluate a string that contains a simple test term of the form
133    left operator right
134    with arbitrary spacing allowed around and between the three parts.
135
136    The left hand side is specially handled:
137
138    - When the left hand side is 'language' then it is replaced by
139    any of the given languages in turn.  When the term evaluates to true for any of the languages then
140    true is returned.  False is returned only when none of the given languages matches.
141
142    - When the left hand side consists only of upper case letters, digits, and '_' then it is
143    interpreted as the name of a environment variable.  It is replaced by its value before the term
144    is evaluated.
145
146    - Any other left hand side is an error (at the moment.)
147=cut
148sub EvaluateTerm ($$)
149{
150    my $term = shift;
151    my $languages = shift;
152
153    my $result;
154
155    if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/)
156    {
157        my ($left,$operator,$right) = ($1,$2,$3);
158
159        if ($operator !~ /^=|==|eq$/)
160        {
161            die "unsupported operator $operator on line $LineNo";
162        }
163
164        die "no right side in condition on line $LineNo ($term)" if ! defined $right;
165
166        if ($left =~ /^[A-Z_0-9]+$/)
167        {
168            # Uppercase words are interpreted as environment variables.
169            my $left_value = $ENV{$left};
170            $left_value = "" if ! defined $left_value;
171
172            # We can check whether the condition is fulfilled right now.
173            $result = EvaluateOperator($left_value, $operator, $right);
174        }
175        elsif ($left eq "language")
176        {
177            if ($right eq "all")
178            {
179                $result = 1;
180            }
181            elsif ($#$languages>=0)
182            {
183                $result = 0;
184                for my $language (@$languages)
185                {
186                    # Unify naming schemes.
187                    $language =~ s/_/-/g;
188                    $right =~ s/_/-/g;
189
190                    # Evaluate language regexp.
191                    $result = EvaluateOperator($language, $operator, $right) ? 1 : 0;
192                    last if $result;
193                }
194            }
195            else
196            {
197                # The set of languages is not yet known.  Return true
198                # to include the following entries.
199                $result = 1;
200            }
201        }
202        elsif ($left eq "platform")
203        {
204            if ($right eq "all")
205            {
206                $result = 1;
207            }
208            else
209            {
210                # Evaluate platform regexp.
211                $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0;
212            }
213        }
214        else
215        {
216            die "can not handle left hand side $left on line $LineNo";
217        }
218    }
219    else
220    {
221        die "syntax error in expression on line $LineNo";
222    }
223
224    return $result;
225}
226
227
228
229
230=head3 EvaluateSelector
231    Evaluate the given expression that is expected to be list of terms of the form
232        left-hand-side operator right-hand-side
233    that are separated by logical operators
234        && ||
235    The expression is lazy evaluated left to right.
236=cut
237sub EvaluateSelector($$);
238sub EvaluateSelector($$)
239{
240    my $expression = shift;
241    my $languages = shift;
242
243    my $result = "";
244
245    if ($expression =~ /^\s*$/)
246    {
247        # Empty selector is always true.
248        return 1;
249    }
250    elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/)
251    {
252        my ($term, $operator) = ($1,$2);
253        $expression = $3;
254
255        my $left_result = EvaluateTerm($term, $languages);
256        # Lazy evaluation of &&
257        return 0 if ($operator eq "&&" && !$left_result);
258        # Lazy evaluation of ||
259        return 1 if ($operator eq "||" && $left_result);
260        my $right_result = EvaluateSelector($expression, $languages);
261
262        if ($operator eq "&&")
263        {
264            return $left_result && $right_result;
265        }
266        else
267        {
268            return $left_result || $right_result;
269        }
270    }
271    elsif ($expression =~ /^\s*(.+?)\s*$/)
272    {
273        return EvaluateTerm($1, $languages);
274    }
275    else
276    {
277        die "invalid expression syntax on line $LineNo ($expression)";
278    }
279}
280
281
282
283
284=head3 ProcessURL
285    Check that the given line contains an optional MD5 sum followed by
286    a URL for one of the protocols file, http, https,
287    followed by an optional file name (which is necessary when it is not the last part of the URL.)
288    Return an array that contains the protocol, the name, the original
289    URL, and the MD5 sum from the beginning of the line.
290    The name of the URL depends on its protocol:
291    - for http(s) the part of the URL after the last '/'.
292    - for file URLS it is everything after the protocol://
293=cut
294sub ProcessURL ($)
295{
296    my $line = shift;
297
298    # Check that we are looking at a valid URL.
299    if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/)
300    {
301        my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6);
302        my $URL = $3.$4.$5;
303
304        die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/;
305
306        # Determine the name.  If an optional name is given then use that.
307        if (defined $optional_name)
308        {
309            die if $optional_name !~ /^\s+\"([^\"]+)\"$/;
310            $name = $1;
311        }
312        else
313        {
314            if ($protocol eq "file")
315            {
316                # For file URLs we use everything after :// as name, or the .
317                $URL =~ /:\/\/(.*)$/;
318                $name = $1;
319            }
320            else
321            {
322                # For http and https use the last part of the URL.
323                $name = $url_name;
324            }
325        }
326
327        return [$protocol, $name, $URL, $md5];
328    }
329    else
330    {
331        die "invalid URL at line $LineNo:\n$line\n";
332    }
333}
334
335
336
337
338=head3 ParseExtensionsLst
339    Parse the extensions.lst file.
340
341    Lines that contain only spaces or comments or are empty are
342    ignored.
343
344    Lines that contain a selector, ie a test enclosed in brackets, are
345    evaluated.  The following lines, until the next selector, are
346    ignored when the selector evaluates to false.  When an empty list
347    of languages is given then any 'language=...' test is evaluated as
348    true.
349
350    All other lines are expected to contain a URL optionally preceded
351    by an MD5 sum.
352=cut
353sub ParseExtensionsLst ($$)
354{
355    my $file_name = shift;
356    my $languages = shift;
357
358    open my $in, "$file_name";
359
360    my $current_selector_value = 1;
361    my @URLs = ();
362
363    while (<$in>)
364    {
365        my $line = $_;
366        $line =~ s/[\r\n]+//g;
367        ++$LineNo;
368
369        # Strip away comments.
370        next if $line =~ /^\s*#/;
371
372        # Ignore empty lines.
373        next if $line =~ /^\s*$/;
374
375        # Process selectors
376        if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/)
377        {
378            $current_selector_value = EvaluateSelector($1, $languages);
379        }
380        else
381        {
382            if ($current_selector_value)
383            {
384                push @URLs, ProcessURL($line);
385            }
386        }
387    }
388
389    close $in;
390
391    return @URLs;
392}
393
394
395
396
397=head3 Download
398    Download a set of files that are specified via URLs.
399
400    File URLs are ignored here because they point to extensions that have not yet been built.
401
402    For http URLs there may be an optional MD5 checksum.  If it is present then downloaded
403    files that do not match that checksum are an error and lead to abortion of the current process.
404    Files that have already been downloaded are not downloaded again.
405=cut
406sub Download (@)
407{
408    my @urls = @_;
409
410    my @missing = ();
411    my $download_path = $ENV{'TARFILE_LOCATION'};
412
413    # First check which (if any) extensions have already been downloaded.
414    for my $entry (@urls)
415    {
416        my ($protocol, $name, $URL, $md5sum) = @{$entry};
417
418        # We can not check the existence of file URLs because they point to extensions that
419        # have yet to be built.
420
421        next if $protocol ne "http";
422        my $candidate = File::Spec->catfile($download_path, $name);
423        if ( ! -f $candidate)
424        {
425            push @missing, $entry;
426        }
427        elsif (defined $md5sum)
428        {
429            # Check that the MD5 sum is still correct.
430            # The datafile may have been updated with a new version of the extension that
431            # still has the same name but a different MD5 sum.
432            my $cur_oxt;
433            if ( ! open($cur_oxt, $candidate))
434            {
435                # Can not read the extension.  Download extension again.
436                push @missing, $entry;
437                unlink($candidate);
438            }
439            binmode($cur_oxt);
440            my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest;
441            close($cur_oxt);
442            if ($md5sum ne $file_md5)
443            {
444                # MD5 does not match.  Download extension again.
445                print "extension $name has wrong MD5 and will be updated\n";
446                push @missing, $entry;
447                unlink($candidate);
448            }
449        }
450    }
451    if ($#missing >= 0)
452    {
453        printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : "";
454        if ( ! -d $download_path)
455        {
456            mkdir File::Spec->catpath($download_path, "tmp")
457                || die "can not create tmp subdirectory of $download_path";
458        }
459    }
460    else
461    {
462        print "all downloadable extensions present\n";
463        return;
464    }
465
466    # Download the missing files.
467    for my $entry (@missing)
468    {
469        my ($protocol, $name, $URL, $md5sum) = @{$entry};
470
471        # Open a .part file for writing.
472        my $filename = File::Spec->catfile($download_path, $name);
473        my $temporary_filename = $filename . ".part";
474        print "downloading to $temporary_filename\n";
475        open my $out, ">$temporary_filename";
476        binmode($out);
477
478        # Prepare md5
479        my $md5 = Digest::MD5->new();
480
481        # Download the extension.
482        my $agent = LWP::UserAgent->new();
483        $agent->timeout(120);
484        $agent->show_progress(1);
485        my $last_was_redirect = 0;
486        $agent->add_handler('response_redirect'
487                            => sub{
488                                $last_was_redirect = 1;
489                                return;
490                            });
491        $agent->add_handler('response_data'
492                            => sub{
493                                if ($last_was_redirect)
494                                {
495                                    $last_was_redirect = 0;
496                                    # Throw away the data we got so far.
497                                    $md5->reset();
498                                    close $out;
499                                    open $out, ">$temporary_filename";
500                                    binmode($out);
501                                }
502                                my($response,$agent,$h,$data)=@_;
503                                print $out $data;
504                                $md5->add($data);
505                            });
506        my $response = $agent->get($URL);
507        close $out;
508
509        # When download was successful then check the md5 checksum and rename the .part file
510        # into the actual extension name.
511        if ($response->is_success())
512        {
513            if (defined $md5sum && length($md5sum)==32)
514            {
515                my $file_md5 = $md5->hexdigest();
516                if ($md5sum eq $file_md5)
517                {
518                    print "md5 is OK\n";
519                }
520                else
521                {
522                    unlink($temporary_filename) if ! $Debug;
523                    die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum";
524                }
525            }
526            else
527            {
528                print "md5 is not present\n";
529                printf "   is %s, length is %d\n", $md5sum, length(md5sum);
530            }
531
532            rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
533        }
534        else
535        {
536            die "failed to download $URL";
537        }
538    }
539}
540
541
542
543
544=head3 DownloadExtensions
545    This function is intended to be called during bootstrapping.  It extracts the set of extensions
546    that will be used later, when the installation sets are built.
547    The set of languages is taken from the WITH_LANG environment variable.
548=cut
549sub DownloadExtensions ()
550{
551    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
552         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
553    {
554        my $full_file_name = Prepare();
555        my $languages = [ "en_US" ];
556        if (defined $ENV{'WITH_LANG'})
557        {
558            @$languages = split(/\s+/, $ENV{'WITH_LANG'});
559            foreach my $l (@$languages)
560            {
561                print "$l\n";
562            }
563        }
564        my @urls = ParseExtensionsLst($full_file_name, $languages);
565        Download(@urls);
566    }
567    else
568    {
569        print "bundling of dictionaries is disabled.\n";
570    }
571}
572
573
574
575
576=head3 GetExtensionList
577    This function is intended to be called when installation sets are built.
578    It expects two arguments:
579        - A protocol selector.  Http URLs reference remotely located
580          extensions that will be bundled as-is into the installation
581          sets due to legal reasons. They are installed on first start
582          of the office.
583          File URLs reference extensions whose source code is part of
584          the repository.  They are pre-registered when installation
585          sets are created.  Their installation is finished when the
586          office is first started.
587        - A set of languages.  This set determines which extensions
588          are returned and then included in an installation set.
589=cut
590sub GetExtensionList ($@)
591{
592    my $protocol_selector = shift;
593    my @language_list = @_;
594
595    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
596         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
597    {
598        my $full_file_name = Prepare();
599        my @urls = ParseExtensionsLst($full_file_name, \@language_list);
600
601        my @result = ();
602        for my $entry (@urls)
603        {
604            my ($protocol, $name, $URL, $md5sum) = @{$entry};
605            if ($protocol =~ /^$protocol_selector$/)
606            {
607                push @result, $name;
608            }
609        }
610
611        return @result;
612    }
613    else
614    {
615        # Bundling of dictionaires is disabled.
616    }
617
618    return ();
619}
620
621
6221;
623