1#**************************************************************
2#
3#  Licensed to the Apache Software Foundation (ASF) under one
4#  or more contributor license agreements.  See the NOTICE file
5#  distributed with this work for additional information
6#  regarding copyright ownership.  The ASF licenses this file
7#  to you under the Apache License, Version 2.0 (the
8#  "License"); you may not use this file except in compliance
9#  with the License.  You may obtain a copy of the License at
10#
11#    http://www.apache.org/licenses/LICENSE-2.0
12#
13#  Unless required by applicable law or agreed to in writing,
14#  software distributed under the License is distributed on an
15#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16#  KIND, either express or implied.  See the License for the
17#  specific language governing permissions and limitations
18#  under the License.
19#
20#**************************************************************
21
22package ExtensionsLst;
23
24use File::Spec;
25use LWP::UserAgent;
26use Digest::MD5;
27
28use base 'Exporter';
29our @EXPORT = qw(DownloadExtensions GetExtensionList);
30
31
32=head1 NAME
33
34    ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file.
35
36=head1 SYNOPSIS
37
38    For downloading extensions during build setup:
39
40    use ExtensionsLst;
41    ExtensionsLst::DownloadExtensions();
42
43    For including extensions into the pack set:
44
45    use ExtensionsLst;
46    ExtensionsLst::GetExtensionList(@language_list);
47
48=head1 DESCRIPTION
49
50    The contents of the extensions.lst file are used at two times in
51    the process of building pack sets.
52
53    Once at the beginning right after configure is run the
54    DownloadExtensions() function determines the list of extensions
55    that are not present locally and downloads them.
56
57    The second time is after all modules are built (and the locally
58    built extensions are present) and the pack sets are created.  For
59    every language (or sets of lanugages) a set of extensions is
60    collected and included into the pack set.
61
62    The content of the extensions.lst file is ignored when the --with-extensions option is given to configure.
63
64=cut
65
66
67# Number of the line in extensions.lst that is currently being processed.
68my $LineNo = 0;
69
70
71=head3 Prepare
72    Check that some environment variables are properly set and then return the file name
73    of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'.
74=cut
75sub Prepare ()
76{
77    die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'};
78    die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'};
79    die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'};
80    die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'};
81
82    my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst");
83    die "can not read file $candidate" if ! -r $candidate;
84    return $candidate;
85}
86
87
88
89=head 3 EvaluateOperator
90    Evaluate a single test statement like 'language = en.*'.
91    Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'.
92    Therefore the right hand side may be a perl regexp.  It is prefixed with '^'.
93
94    Other operators are at the moment only supported in the way that they are evaluated via eval().
95=cut
96sub EvaluateOperator ($$$)
97{
98    my ($left,$operator,$right) = @_;
99
100    my $result;
101
102    if ($operator =~ /^(=|==|eq)$/)
103    {
104        if ($left =~ /^$right$/)
105        {
106            $result = 1;
107        }
108        else
109        {
110            $result = 0;
111        }
112    }
113    elsif (eval($left.$operator.$right))
114    {
115        $result = 1;
116    }
117    else
118    {
119        $result = 0;
120    }
121
122    return $result;
123}
124
125
126
127
128=head EvaluateTerm
129    Evaluate a string that contains a simple test term of the form
130    left operator right
131    with arbitrary spacing allowed around and between the three parts.
132
133    The left hand side is specially handled:
134
135    - When the left hand side is 'language' then it is replaced by
136    any of the given languages in turn.  When the term evaluates to true for any of the languages then
137    true is returned.  False is returned only when none of the given languages matches.
138
139    - When the left hand side consists only of upper case letters, digits, and '_' then it is
140    interpreted as the name of a environment variable.  It is replaced by its value before the term
141    is evaluated.
142
143    - Any other left hand side is an error (at the moment.)
144=cut
145sub EvaluateTerm ($$)
146{
147    my $term = shift;
148    my $languages = shift;
149
150    my $result;
151
152    if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/)
153    {
154        my ($left,$operator,$right) = ($1,$2,$3);
155
156        if ($operator !~ /^=|==|eq$/)
157        {
158            die "unsupported operator $operator on line $LineNo";
159        }
160
161        die "no right side in condition on line $LineNo ($term)" if ! defined $right;
162
163        if ($left =~ /^[A-Z_0-9]+$/)
164        {
165            # Uppercase words are interpreted as environment variables.
166            my $left_value = $ENV{$left};
167            $left_value = "" if ! defined $left_value;
168
169            # We can check whether the condition is fullfilled right now.
170            $result = EvaluateOperator($left_value, $operator, $right);
171        }
172        elsif ($left eq "language")
173        {
174            if ($right eq "all")
175            {
176                $result = 1;
177            }
178            elsif ($#$languages>=0)
179            {
180                $result = 0;
181                for my $language (@$languages)
182                {
183                    # Unify naming schemes.
184                    $language =~ s/_/-/g;
185                    $right =~ s/_/-/g;
186
187                    # Evaluate language regexp.
188                    $result = EvaluateOperator($language, $operator, $right) ? 1 : 0;
189                    last if $result;
190                }
191            }
192            else
193            {
194                # The set of languages is not yet known.  Return true
195                # to include the following entries.
196                $result = 1;
197            }
198        }
199        elsif ($left eq "platform")
200        {
201            if ($right eq "all")
202            {
203                $result = 1;
204            }
205            else
206            {
207                # Evaluate platform regexp.
208                $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0;
209            }
210        }
211        else
212        {
213            die "can not handle left hand side $left on line $LineNo";
214        }
215    }
216    else
217    {
218        die "syntax error in expression on line $LineNo";
219    }
220
221    return $result;
222}
223
224
225
226
227=head3 EvaluateSelector
228    Evaluate the given expression that is expected to be list of terms of the form
229        left-hand-side operator right-hand-side
230    that are separated by logical operators
231        && ||
232    The expression is lazy evaluated left to right.
233=cut
234sub EvaluateSelector($$);
235sub EvaluateSelector($$)
236{
237    my $expression = shift;
238    my $languages = shift;
239
240    my $result = "";
241
242    if ($expression =~ /^\s*$/)
243    {
244        # Empty selector is always true.
245        return 1;
246    }
247    elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/)
248    {
249        my ($term, $operator) = ($1,$2);
250        $expression = $3;
251
252        my $left_result = EvaluateTerm($term, $languages);
253        # Lazy evaluation of &&
254        return 0 if ($operator eq "&&" && !$left_result);
255        # Lazy evaluation of ||
256        return 1 if ($operator eq "||" && $left_result);
257        my $right_result = EvaluateSelector($expression, $languages);
258
259        if ($operator eq "&&")
260        {
261            return $left_result && $right_result;
262        }
263        else
264        {
265            return $left_result || $right_result;
266        }
267    }
268    elsif ($expression =~ /^\s*(.+?)\s$/)
269    {
270        return EvaluateTerm($1, $languages);
271    }
272    else
273    {
274        die "invalid expression syntax on line $LineNo ($expression)";
275    }
276}
277
278
279
280
281=head3 ProcessURL
282    Check that the given line contains an optional MD5 sum followed by
283    a URL for one of the protocols file, http, https
284    Return an array that contains the protocol, the name, the original
285    URL, and the MD5 sum from the beginning of the line.
286    The name of the URL depends on its protocol:
287    - for http(s) the part of the URL after the last '/'.
288    - for file URLS it is everything after the protocol://
289=cut
290sub ProcessURL ($)
291{
292    my $line = shift;
293
294    # Check that we are looking at a valid URL.
295    if ($line =~ /^\s*(\w{32}\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)\s*$/)
296    {
297        my ($md5, $protocol, $name) = ($1,$2,$4);
298        my $URL = $2.$3.$4;
299
300        die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/;
301
302        # For file URLs we use everything after :// as name.
303        if ($protocol eq "file")
304        {
305            $URL =~ /:\/\/(.*)$/;
306            $name = $1;
307        }
308
309        return [$protocol, $name, $URL, $md5];
310    }
311    else
312    {
313        die "invalid URL at line $LineNo:\n$line\n";
314    }
315}
316
317
318
319
320=head3 ParseExtensionsLst
321    Parse the extensions.lst file.
322
323    Lines that contain only spaces or comments or are empty are
324    ignored.
325
326    Lines that contain a selector, ie a test enclosed in brackets, are
327    evaluated.  The following lines, until the next selector, are
328    ignored when the selector evaluates to false.  When an empty list
329    of languages is given then any 'language=...' test is evaluated as
330    true.
331
332    All other lines are expected to contain a URL optionally preceded
333    by an MD5 sum.
334=cut
335sub ParseExtensionsLst ($$)
336{
337    my $file_name = shift;
338    my $languages = shift;
339
340    open my $in, "$file_name";
341
342    my $current_selector_value = 1;
343    my @URLs = ();
344
345    while (<$in>)
346    {
347        my $line = $_;
348        $line =~ s/[\r\n]+//g;
349        ++$LineNo;
350
351        # Strip away comments.
352        next if $line =~ /^\s*#/;
353
354        # Ignore empty lines.
355        next if $line =~ /^\s*$/;
356
357        # Process selectors
358        if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/)
359        {
360            $current_selector_value = EvaluateSelector($1, $languages);
361        }
362        else
363        {
364            if ($current_selector_value)
365            {
366                push @URLs, ProcessURL($line);
367            }
368        }
369    }
370
371    close $in;
372
373    return @URLs;
374}
375
376
377
378
379=head3 Download
380    Download a set of files that are specified via URLs.
381
382    File URLs are ignored here because they point to extensions that have not yet been built.
383
384    For http URLs there may be an optional MD5 checksum.  If it is present then downloaded
385    files that do not match that checksum are an error and lead to abortion of the current process.
386    Files that have already been downloaded are not downloaded again.
387=cut
388sub Download (@)
389{
390    my @urls = @_;
391
392    my @missing = ();
393    my $download_path = $ENV{'TARFILE_LOCATION'};
394
395    # First check which (if any) extensions have already been downloaded.
396    for my $entry (@urls)
397    {
398        my ($protocol, $name, $URL, $md5sum) = @{$entry};
399
400        # We can not check the existence of file URLs because they point to extensions that
401        # have yet to be built.
402
403        next if $protocol ne "http";
404        my $candidate = File::Spec->catfile($download_path, $name);
405        if ( ! -f $candidate)
406        {
407            push @missing, $entry;
408        }
409    }
410    if ($#missing >= 0)
411    {
412        printf "downloading %d missing extension%s\n", $#missing+1, $#missing>0 ? "s" : "";
413        if ( ! -d $download_path)
414        {
415            mkdir File::Spec->catpath($download_path, "tmp")
416                || die "can not create tmp subdirectory of $download_path";
417        }
418    }
419    else
420    {
421        print "all downloadable extensions present\n";
422        return;
423    }
424
425    # Download the missing files.
426    for my $entry (@missing)
427    {
428        my ($protocol, $name, $URL, $md5sum) = @{$entry};
429
430        # Open a .part file for writing.
431        my $filename = File::Spec->catfile($download_path, $name);
432        my $temporary_filename = $filename . ".part";
433        open my $out, ">$temporary_filename";
434        binmode($out);
435
436        # Prepare md5
437        my $md5 = Digest::MD5->new();
438
439        # Download the extension.
440        my $agent = LWP::UserAgent->new();
441        $agent->timeout(10);
442        $agent->show_progress(1);
443        $agent->add_handler('response_data'
444                            => sub{
445                                my($response,$agent,$h,$data)=@_;
446                                print $out $data;
447                                $md5->add($data);
448                            });
449        my $response = $agent->get($URL);
450        close $out;
451
452        # When download was successfull then check the md5 checksum and rename the .part file
453        # into the actual extension name.
454        if ($response->is_success())
455        {
456            if (defined $md5sum && length($md5sum)==32)
457            {
458                if ($md5sum eq $md5->digest())
459                {
460                    print "md5 is OK\n";
461                }
462                else
463                {
464                    unlink($temporary_filename);
465                    die "downloaded file has the wrong md5 checksum";
466                }
467            }
468
469            rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
470        }
471        else
472        {
473            die "failed to download $URL";
474        }
475    }
476}
477
478
479
480
481=head3 DownloadExtensions
482    This function is intended to be called during bootstrapping.  It extracts the set of extensions
483    that will be used later, when the installation sets are built.
484=cut
485sub DownloadExtensions ()
486{
487    my $full_file_name = Prepare();
488    my @urls = ParseExtensionsLst($full_file_name, []);
489    Download(@urls);
490}
491
492
493
494
495=head3 GetExtensionList
496    This function is intended to be called when installation sets are built.
497    It expects two arguments:
498        - A protocol selector.  Http URLs reference remotely located
499          extensions that will be bundled as-is into the installation
500          sets due to legal reasons. They are installed on first start
501          of the office.
502          File URLs reference extensions whose source code is part of
503          the repository.  They are pre-registered when installation
504          sets are created.  Their installation is finished when the
505          office is first started.
506        - A set of languages.  This set determines which extensions
507          are returned and then included in an installation set.
508=cut
509sub GetExtensionList ($@)
510{
511    my $protocol_selector = shift;
512    my @language_list = @_;
513
514    my $full_file_name = Prepare();
515    my @urls = ParseExtensionsLst($full_file_name, \@language_list);
516
517    my @result = ();
518    for my $entry (@urls)
519    {
520        my ($protocol, $name, $URL, $md5sum) = @{$entry};
521        if ($protocol =~ /^$protocol_selector$/)
522        {
523            push @result, $name;
524        }
525    }
526
527    return @result;
528}
529
530
5311;
532