1#**************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20#**************************************************************
21
22package ExtensionsLst;
23
24use File::Spec;
25use LWP::UserAgent;
26use Digest::MD5;
27
28use base 'Exporter';
29our @EXPORT = qw(DownloadExtensions GetExtensionList);
30
31
32=head1 NAME
33
34    ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file.
35
36=head1 SYNOPSIS
37
38    For downloading extensions during build setup:
39
40    use ExtensionsLst;
41    ExtensionsLst::DownloadExtensions();
42
43    For including extensions into the pack set:
44
45    use ExtensionsLst;
46    ExtensionsLst::GetExtensionList(@language_list);
47
48=head1 DESCRIPTION
49
50    The contents of the extensions.lst file are used at two times in
51    the process of building pack sets.
52
53    Once at the beginning right after configure is run the
54    DownloadExtensions() function determines the list of extensions
55    that are not present locally and downloads them.
56
57    The second time is after all modules are built (and the locally
58    built extensions are present) and the pack sets are created.  For
59    every language (or sets of lanugages) a set of extensions is
60    collected and included into the pack set.
61
62    The content of the extensions.lst file is ignored when the --with-extensions option is given to configure.
63
64=cut
65
66
67# Number of the line in extensions.lst that is currently being processed.
68my $LineNo = 0;
69
70# Set to 1 to get a more verbose output, the default is 0.
71my $Debug = 0;
72
73
74=head3 Prepare
75    Check that some environment variables are properly set and then return the file name
76    of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'.
77=cut
78sub Prepare ()
79{
80    die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'};
81    die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'};
82    die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'};
83    die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'};
84
85    my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst");
86    die "can not read file $candidate" if ! -r $candidate;
87    return $candidate;
88}
89
90
91
92=head 3 EvaluateOperator
93    Evaluate a single test statement like 'language = en.*'.
94    Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'.
95    Therefore the right hand side may be a perl regexp.  It is prefixed with '^'.
96
97    Other operators are at the moment only supported in the way that they are evaluated via eval().
98=cut
99sub EvaluateOperator ($$$)
100{
101    my ($left,$operator,$right) = @_;
102
103    my $result;
104
105    if ($operator =~ /^(=|==|eq)$/)
106    {
107        if ($left =~ /^$right$/)
108        {
109            $result = 1;
110        }
111        else
112        {
113            $result = 0;
114        }
115    }
116    elsif (eval($left.$operator.$right))
117    {
118        $result = 1;
119    }
120    else
121    {
122        $result = 0;
123    }
124
125    return $result;
126}
127
128
129
130
131=head EvaluateTerm
132    Evaluate a string that contains a simple test term of the form
133    left operator right
134    with arbitrary spacing allowed around and between the three parts.
135
136    The left hand side is specially handled:
137
138    - When the left hand side is 'language' then it is replaced by
139    any of the given languages in turn.  When the term evaluates to true for any of the languages then
140    true is returned.  False is returned only when none of the given languages matches.
141
142    - When the left hand side consists only of upper case letters, digits, and '_' then it is
143    interpreted as the name of a environment variable.  It is replaced by its value before the term
144    is evaluated.
145
146    - Any other left hand side is an error (at the moment.)
147=cut
148sub EvaluateTerm ($$)
149{
150    my $term = shift;
151    my $languages = shift;
152
153    my $result;
154
155    if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/)
156    {
157        my ($left,$operator,$right) = ($1,$2,$3);
158
159        if ($operator !~ /^=|==|eq$/)
160        {
161            die "unsupported operator $operator on line $LineNo";
162        }
163
164        die "no right side in condition on line $LineNo ($term)" if ! defined $right;
165
166        if ($left =~ /^[A-Z_0-9]+$/)
167        {
168            # Uppercase words are interpreted as environment variables.
169            my $left_value = $ENV{$left};
170            $left_value = "" if ! defined $left_value;
171
172            # We can check whether the condition is fullfilled right now.
173            $result = EvaluateOperator($left_value, $operator, $right);
174        }
175        elsif ($left eq "language")
176        {
177            if ($right eq "all")
178            {
179                $result = 1;
180            }
181            elsif ($#$languages>=0)
182            {
183                $result = 0;
184                for my $language (@$languages)
185                {
186                    # Unify naming schemes.
187                    $language =~ s/_/-/g;
188                    $right =~ s/_/-/g;
189
190                    # Evaluate language regexp.
191                    $result = EvaluateOperator($language, $operator, $right) ? 1 : 0;
192                    last if $result;
193                }
194            }
195            else
196            {
197                # The set of languages is not yet known.  Return true
198                # to include the following entries.
199                $result = 1;
200            }
201        }
202        elsif ($left eq "platform")
203        {
204            if ($right eq "all")
205            {
206                $result = 1;
207            }
208            else
209            {
210                # Evaluate platform regexp.
211                $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0;
212            }
213        }
214        else
215        {
216            die "can not handle left hand side $left on line $LineNo";
217        }
218    }
219    else
220    {
221        die "syntax error in expression on line $LineNo";
222    }
223
224    return $result;
225}
226
227
228
229
230=head3 EvaluateSelector
231    Evaluate the given expression that is expected to be list of terms of the form
232        left-hand-side operator right-hand-side
233    that are separated by logical operators
234        && ||
235    The expression is lazy evaluated left to right.
236=cut
237sub EvaluateSelector($$);
238sub EvaluateSelector($$)
239{
240    my $expression = shift;
241    my $languages = shift;
242
243    my $result = "";
244
245    if ($expression =~ /^\s*$/)
246    {
247        # Empty selector is always true.
248        return 1;
249    }
250    elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/)
251    {
252        my ($term, $operator) = ($1,$2);
253        $expression = $3;
254
255        my $left_result = EvaluateTerm($term, $languages);
256        # Lazy evaluation of &&
257        return 0 if ($operator eq "&&" && !$left_result);
258        # Lazy evaluation of ||
259        return 1 if ($operator eq "||" && $left_result);
260        my $right_result = EvaluateSelector($expression, $languages);
261
262        if ($operator eq "&&")
263        {
264            return $left_result && $right_result;
265        }
266        else
267        {
268            return $left_result || $right_result;
269        }
270    }
271    elsif ($expression =~ /^\s*(.+?)\s$/)
272    {
273        return EvaluateTerm($1, $languages);
274    }
275    else
276    {
277        die "invalid expression syntax on line $LineNo ($expression)";
278    }
279}
280
281
282
283
284=head3 ProcessURL
285    Check that the given line contains an optional MD5 sum followed by
286    a URL for one of the protocols file, http, https,
287    followed by an optional file name (which is necessary when it is not the last part of the URL.)
288    Return an array that contains the protocol, the name, the original
289    URL, and the MD5 sum from the beginning of the line.
290    The name of the URL depends on its protocol:
291    - for http(s) the part of the URL after the last '/'.
292    - for file URLS it is everything after the protocol://
293=cut
294sub ProcessURL ($)
295{
296    my $line = shift;
297
298    # Check that we are looking at a valid URL.
299    if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/)
300    {
301        my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6);
302        my $URL = $3.$4.$5;
303
304        die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/;
305
306        # Determine the name.  If an optional name is given then use that.
307        if (defined $optional_name)
308        {
309            die if $optional_name !~ /^\s+\"([^\"]+)\"$/;
310            $name = $1;
311        }
312        else
313        {
314            if ($protocol eq "file")
315            {
316                # For file URLs we use everything after :// as name, or the .
317                $URL =~ /:\/\/(.*)$/;
318                $name = $1;
319            }
320            else
321            {
322                # For http and https use the last part of the URL.
323                $name = $url_name;
324            }
325        }
326
327        return [$protocol, $name, $URL, $md5];
328    }
329    else
330    {
331        die "invalid URL at line $LineNo:\n$line\n";
332    }
333}
334
335
336
337
338=head3 ParseExtensionsLst
339    Parse the extensions.lst file.
340
341    Lines that contain only spaces or comments or are empty are
342    ignored.
343
344    Lines that contain a selector, ie a test enclosed in brackets, are
345    evaluated.  The following lines, until the next selector, are
346    ignored when the selector evaluates to false.  When an empty list
347    of languages is given then any 'language=...' test is evaluated as
348    true.
349
350    All other lines are expected to contain a URL optionally preceded
351    by an MD5 sum.
352=cut
353sub ParseExtensionsLst ($$)
354{
355    my $file_name = shift;
356    my $languages = shift;
357
358    open my $in, "$file_name";
359
360    my $current_selector_value = 1;
361    my @URLs = ();
362
363    while (<$in>)
364    {
365        my $line = $_;
366        $line =~ s/[\r\n]+//g;
367        ++$LineNo;
368
369        # Strip away comments.
370        next if $line =~ /^\s*#/;
371
372        # Ignore empty lines.
373        next if $line =~ /^\s*$/;
374
375        # Process selectors
376        if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/)
377        {
378            $current_selector_value = EvaluateSelector($1, $languages);
379        }
380        else
381        {
382            if ($current_selector_value)
383            {
384                push @URLs, ProcessURL($line);
385            }
386        }
387    }
388
389    close $in;
390
391    return @URLs;
392}
393
394
395
396
397=head3 Download
398    Download a set of files that are specified via URLs.
399
400    File URLs are ignored here because they point to extensions that have not yet been built.
401
402    For http URLs there may be an optional MD5 checksum.  If it is present then downloaded
403    files that do not match that checksum are an error and lead to abortion of the current process.
404    Files that have already been downloaded are not downloaded again.
405=cut
406sub Download (@)
407{
408    my @urls = @_;
409
410    my @missing = ();
411    my $download_path = $ENV{'TARFILE_LOCATION'};
412
413    # First check which (if any) extensions have already been downloaded.
414    for my $entry (@urls)
415    {
416        my ($protocol, $name, $URL, $md5sum) = @{$entry};
417
418        # We can not check the existence of file URLs because they point to extensions that
419        # have yet to be built.
420
421        next if $protocol ne "http";
422        my $candidate = File::Spec->catfile($download_path, $name);
423        if ( ! -f $candidate)
424        {
425            push @missing, $entry;
426        }
427    }
428    if ($#missing >= 0)
429    {
430        printf "downloading %d missing extension%s\n", $#missing+1, $#missing>0 ? "s" : "";
431        if ( ! -d $download_path)
432        {
433            mkdir File::Spec->catpath($download_path, "tmp")
434                || die "can not create tmp subdirectory of $download_path";
435        }
436    }
437    else
438    {
439        print "all downloadable extensions present\n";
440        return;
441    }
442
443    # Download the missing files.
444    for my $entry (@missing)
445    {
446        my ($protocol, $name, $URL, $md5sum) = @{$entry};
447
448        # Open a .part file for writing.
449        my $filename = File::Spec->catfile($download_path, $name);
450        my $temporary_filename = $filename . ".part";
451        print "downloading to $temporary_filename\n";
452        open my $out, ">$temporary_filename";
453        binmode($out);
454
455        # Prepare md5
456        my $md5 = Digest::MD5->new();
457
458        # Download the extension.
459        my $agent = LWP::UserAgent->new();
460        $agent->timeout(120);
461        $agent->show_progress(1);
462        my $last_was_redirect = 0;
463        $agent->add_handler('response_redirect'
464                            => sub{
465                                $last_was_redirect = 1;
466                                return;
467                            });
468        $agent->add_handler('response_data'
469                            => sub{
470                                if ($last_was_redirect)
471                                {
472                                    $last_was_redirect = 0;
473                                    # Throw away the data we got so far.
474                                    $md5->reset();
475                                    close $out;
476                                    open $out, ">$temporary_filename";
477                                    binmode($out);
478                                }
479                                my($response,$agent,$h,$data)=@_;
480                                print $out $data;
481                                $md5->add($data);
482                            });
483        my $response = $agent->get($URL);
484        close $out;
485
486        # When download was successfull then check the md5 checksum and rename the .part file
487        # into the actual extension name.
488        if ($response->is_success())
489        {
490            if (defined $md5sum && length($md5sum)==32)
491            {
492                my $file_md5 = $md5->hexdigest();
493                if ($md5sum eq $file_md5)
494                {
495                    print "md5 is OK\n";
496                }
497                else
498                {
499                    unlink($temporary_filename) if ! $Debug;
500                    die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum";
501                }
502            }
503            else
504            {
505                print "md5 is not present\n";
506                printf "   is %s, length is %d\n", $md5sum, length(md5sum);
507            }
508
509            rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
510        }
511        else
512        {
513            die "failed to download $URL";
514        }
515    }
516}
517
518
519
520
521=head3 DownloadExtensions
522    This function is intended to be called during bootstrapping.  It extracts the set of extensions
523    that will be used later, when the installation sets are built.
524=cut
525sub DownloadExtensions ()
526{
527    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
528         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
529    {
530        my $full_file_name = Prepare();
531        my @urls = ParseExtensionsLst($full_file_name, []);
532        Download(@urls);
533    }
534    else
535    {
536        print "bundling of dictionaries is disabled.\n";
537    }
538}
539
540
541
542
543=head3 GetExtensionList
544    This function is intended to be called when installation sets are built.
545    It expects two arguments:
546        - A protocol selector.  Http URLs reference remotely located
547          extensions that will be bundled as-is into the installation
548          sets due to legal reasons. They are installed on first start
549          of the office.
550          File URLs reference extensions whose source code is part of
551          the repository.  They are pre-registered when installation
552          sets are created.  Their installation is finished when the
553          office is first started.
554        - A set of languages.  This set determines which extensions
555          are returned and then included in an installation set.
556=cut
557sub GetExtensionList ($@)
558{
559    my $protocol_selector = shift;
560    my @language_list = @_;
561
562    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
563         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
564    {
565        my $full_file_name = Prepare();
566        my @urls = ParseExtensionsLst($full_file_name, \@language_list);
567
568        my @result = ();
569        for my $entry (@urls)
570        {
571            my ($protocol, $name, $URL, $md5sum) = @{$entry};
572            if ($protocol =~ /^$protocol_selector$/)
573            {
574                push @result, $name;
575            }
576        }
577
578        return @result;
579    }
580    else
581    {
582        # Bundling of dictionaires is disabled.
583    }
584}
585
586
5871;
588