1c6dedb65SAndre Fischer#**************************************************************
2c6dedb65SAndre Fischer#
3c6dedb65SAndre Fischer#  Licensed to the Apache Software Foundation (ASF) under one
4c6dedb65SAndre Fischer#  or more contributor license agreements.  See the NOTICE file
5c6dedb65SAndre Fischer#  distributed with this work for additional information
6c6dedb65SAndre Fischer#  regarding copyright ownership.  The ASF licenses this file
7c6dedb65SAndre Fischer#  to you under the Apache License, Version 2.0 (the
8c6dedb65SAndre Fischer#  "License"); you may not use this file except in compliance
9c6dedb65SAndre Fischer#  with the License.  You may obtain a copy of the License at
10c6dedb65SAndre Fischer#
11c6dedb65SAndre Fischer#    http://www.apache.org/licenses/LICENSE-2.0
12c6dedb65SAndre Fischer#
13c6dedb65SAndre Fischer#  Unless required by applicable law or agreed to in writing,
14c6dedb65SAndre Fischer#  software distributed under the License is distributed on an
15c6dedb65SAndre Fischer#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16c6dedb65SAndre Fischer#  KIND, either express or implied.  See the License for the
17c6dedb65SAndre Fischer#  specific language governing permissions and limitations
18c6dedb65SAndre Fischer#  under the License.
19c6dedb65SAndre Fischer#
20c6dedb65SAndre Fischer#**************************************************************
21c6dedb65SAndre Fischer
22c6dedb65SAndre Fischerpackage ExtensionsLst;
23c6dedb65SAndre Fischer
24c6dedb65SAndre Fischeruse File::Spec;
25c6dedb65SAndre Fischeruse LWP::UserAgent;
26c6dedb65SAndre Fischeruse Digest::MD5;
27c6dedb65SAndre Fischer
28c6dedb65SAndre Fischeruse base 'Exporter';
29c6dedb65SAndre Fischerour @EXPORT = qw(DownloadExtensions GetExtensionList);
30c6dedb65SAndre Fischer
31c6dedb65SAndre Fischer
32c6dedb65SAndre Fischer=head1 NAME
33c6dedb65SAndre Fischer
34c6dedb65SAndre Fischer    ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file.
35c6dedb65SAndre Fischer
36c6dedb65SAndre Fischer=head1 SYNOPSIS
37c6dedb65SAndre Fischer
38c6dedb65SAndre Fischer    For downloading extensions during build setup:
39c6dedb65SAndre Fischer
40c6dedb65SAndre Fischer    use ExtensionsLst;
41c6dedb65SAndre Fischer    ExtensionsLst::DownloadExtensions();
42c6dedb65SAndre Fischer
43c6dedb65SAndre Fischer    For including extensions into the pack set:
44c6dedb65SAndre Fischer
45c6dedb65SAndre Fischer    use ExtensionsLst;
46c6dedb65SAndre Fischer    ExtensionsLst::GetExtensionList(@language_list);
47c6dedb65SAndre Fischer
48c6dedb65SAndre Fischer=head1 DESCRIPTION
49c6dedb65SAndre Fischer
50c6dedb65SAndre Fischer    The contents of the extensions.lst file are used at two times in
51c6dedb65SAndre Fischer    the process of building pack sets.
52c6dedb65SAndre Fischer
53c6dedb65SAndre Fischer    Once at the beginning right after configure is run the
54c6dedb65SAndre Fischer    DownloadExtensions() function determines the list of extensions
55c6dedb65SAndre Fischer    that are not present locally and downloads them.
56c6dedb65SAndre Fischer
57c6dedb65SAndre Fischer    The second time is after all modules are built (and the locally
58c6dedb65SAndre Fischer    built extensions are present) and the pack sets are created.  For
59c6dedb65SAndre Fischer    every language (or sets of lanugages) a set of extensions is
60c6dedb65SAndre Fischer    collected and included into the pack set.
61c6dedb65SAndre Fischer
62c6dedb65SAndre Fischer    The content of the extensions.lst file is ignored when the --with-extensions option is given to configure.
63c6dedb65SAndre Fischer
64c6dedb65SAndre Fischer=cut
65c6dedb65SAndre Fischer
66c6dedb65SAndre Fischer
67c6dedb65SAndre Fischer# Number of the line in extensions.lst that is currently being processed.
68c6dedb65SAndre Fischermy $LineNo = 0;
69c6dedb65SAndre Fischer
70764319a5SAndre Fischer# Set to 1 to get a more verbose output, the default is 0.
71764319a5SAndre Fischermy $Debug = 0;
72764319a5SAndre Fischer
73c6dedb65SAndre Fischer
74c6dedb65SAndre Fischer=head3 Prepare
75c6dedb65SAndre Fischer    Check that some environment variables are properly set and then return the file name
76c6dedb65SAndre Fischer    of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'.
77c6dedb65SAndre Fischer=cut
78c6dedb65SAndre Fischersub Prepare ()
79c6dedb65SAndre Fischer{
80c6dedb65SAndre Fischer    die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'};
81c6dedb65SAndre Fischer    die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'};
82c6dedb65SAndre Fischer    die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'};
83c6dedb65SAndre Fischer    die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'};
84c6dedb65SAndre Fischer
85c6dedb65SAndre Fischer    my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst");
86c6dedb65SAndre Fischer    die "can not read file $candidate" if ! -r $candidate;
87c6dedb65SAndre Fischer    return $candidate;
88c6dedb65SAndre Fischer}
89c6dedb65SAndre Fischer
90c6dedb65SAndre Fischer
91c6dedb65SAndre Fischer
92c6dedb65SAndre Fischer=head 3 EvaluateOperator
93c6dedb65SAndre Fischer    Evaluate a single test statement like 'language = en.*'.
94c6dedb65SAndre Fischer    Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'.
95c6dedb65SAndre Fischer    Therefore the right hand side may be a perl regexp.  It is prefixed with '^'.
96c6dedb65SAndre Fischer
97c6dedb65SAndre Fischer    Other operators are at the moment only supported in the way that they are evaluated via eval().
98c6dedb65SAndre Fischer=cut
99c6dedb65SAndre Fischersub EvaluateOperator ($$$)
100c6dedb65SAndre Fischer{
101c6dedb65SAndre Fischer    my ($left,$operator,$right) = @_;
102c6dedb65SAndre Fischer
103c6dedb65SAndre Fischer    my $result;
104c6dedb65SAndre Fischer
105c6dedb65SAndre Fischer    if ($operator =~ /^(=|==|eq)$/)
106c6dedb65SAndre Fischer    {
107c6dedb65SAndre Fischer        if ($left =~ /^$right$/)
108c6dedb65SAndre Fischer        {
109c6dedb65SAndre Fischer            $result = 1;
110c6dedb65SAndre Fischer        }
111c6dedb65SAndre Fischer        else
112c6dedb65SAndre Fischer        {
113c6dedb65SAndre Fischer            $result = 0;
114c6dedb65SAndre Fischer        }
115c6dedb65SAndre Fischer    }
116c6dedb65SAndre Fischer    elsif (eval($left.$operator.$right))
117c6dedb65SAndre Fischer    {
118c6dedb65SAndre Fischer        $result = 1;
119c6dedb65SAndre Fischer    }
120c6dedb65SAndre Fischer    else
121c6dedb65SAndre Fischer    {
122c6dedb65SAndre Fischer        $result = 0;
123c6dedb65SAndre Fischer    }
124c6dedb65SAndre Fischer
125c6dedb65SAndre Fischer    return $result;
126c6dedb65SAndre Fischer}
127c6dedb65SAndre Fischer
128c6dedb65SAndre Fischer
129c6dedb65SAndre Fischer
130c6dedb65SAndre Fischer
131c6dedb65SAndre Fischer=head EvaluateTerm
132c6dedb65SAndre Fischer    Evaluate a string that contains a simple test term of the form
133c6dedb65SAndre Fischer    left operator right
134c6dedb65SAndre Fischer    with arbitrary spacing allowed around and between the three parts.
135c6dedb65SAndre Fischer
136c6dedb65SAndre Fischer    The left hand side is specially handled:
137c6dedb65SAndre Fischer
138c6dedb65SAndre Fischer    - When the left hand side is 'language' then it is replaced by
139c6dedb65SAndre Fischer    any of the given languages in turn.  When the term evaluates to true for any of the languages then
140c6dedb65SAndre Fischer    true is returned.  False is returned only when none of the given languages matches.
141c6dedb65SAndre Fischer
142c6dedb65SAndre Fischer    - When the left hand side consists only of upper case letters, digits, and '_' then it is
143c6dedb65SAndre Fischer    interpreted as the name of a environment variable.  It is replaced by its value before the term
144c6dedb65SAndre Fischer    is evaluated.
145c6dedb65SAndre Fischer
146c6dedb65SAndre Fischer    - Any other left hand side is an error (at the moment.)
147c6dedb65SAndre Fischer=cut
148c6dedb65SAndre Fischersub EvaluateTerm ($$)
149c6dedb65SAndre Fischer{
150c6dedb65SAndre Fischer    my $term = shift;
151c6dedb65SAndre Fischer    my $languages = shift;
152c6dedb65SAndre Fischer
153c6dedb65SAndre Fischer    my $result;
154c6dedb65SAndre Fischer
155c6dedb65SAndre Fischer    if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/)
156c6dedb65SAndre Fischer    {
157c6dedb65SAndre Fischer        my ($left,$operator,$right) = ($1,$2,$3);
158c6dedb65SAndre Fischer
159c6dedb65SAndre Fischer        if ($operator !~ /^=|==|eq$/)
160c6dedb65SAndre Fischer        {
161c6dedb65SAndre Fischer            die "unsupported operator $operator on line $LineNo";
162c6dedb65SAndre Fischer        }
163c6dedb65SAndre Fischer
164c6dedb65SAndre Fischer        die "no right side in condition on line $LineNo ($term)" if ! defined $right;
165c6dedb65SAndre Fischer
166c6dedb65SAndre Fischer        if ($left =~ /^[A-Z_0-9]+$/)
167c6dedb65SAndre Fischer        {
168c6dedb65SAndre Fischer            # Uppercase words are interpreted as environment variables.
169c6dedb65SAndre Fischer            my $left_value = $ENV{$left};
170c6dedb65SAndre Fischer            $left_value = "" if ! defined $left_value;
171c6dedb65SAndre Fischer
17286e1cf34SPedro Giffuni            # We can check whether the condition is fulfilled right now.
173c6dedb65SAndre Fischer            $result = EvaluateOperator($left_value, $operator, $right);
174c6dedb65SAndre Fischer        }
175c6dedb65SAndre Fischer        elsif ($left eq "language")
176c6dedb65SAndre Fischer        {
177c6dedb65SAndre Fischer            if ($right eq "all")
178c6dedb65SAndre Fischer            {
179c6dedb65SAndre Fischer                $result = 1;
180c6dedb65SAndre Fischer            }
181c6dedb65SAndre Fischer            elsif ($#$languages>=0)
182c6dedb65SAndre Fischer            {
183c6dedb65SAndre Fischer                $result = 0;
184c6dedb65SAndre Fischer                for my $language (@$languages)
185c6dedb65SAndre Fischer                {
186c6dedb65SAndre Fischer                    # Unify naming schemes.
187c6dedb65SAndre Fischer                    $language =~ s/_/-/g;
188c6dedb65SAndre Fischer                    $right =~ s/_/-/g;
189c6dedb65SAndre Fischer
190c6dedb65SAndre Fischer                    # Evaluate language regexp.
191c6dedb65SAndre Fischer                    $result = EvaluateOperator($language, $operator, $right) ? 1 : 0;
192c6dedb65SAndre Fischer                    last if $result;
193c6dedb65SAndre Fischer                }
194c6dedb65SAndre Fischer            }
195c6dedb65SAndre Fischer            else
196c6dedb65SAndre Fischer            {
197c6dedb65SAndre Fischer                # The set of languages is not yet known.  Return true
198c6dedb65SAndre Fischer                # to include the following entries.
199c6dedb65SAndre Fischer                $result = 1;
200c6dedb65SAndre Fischer            }
201c6dedb65SAndre Fischer        }
202c6dedb65SAndre Fischer        elsif ($left eq "platform")
203c6dedb65SAndre Fischer        {
204c6dedb65SAndre Fischer            if ($right eq "all")
205c6dedb65SAndre Fischer            {
206c6dedb65SAndre Fischer                $result = 1;
207c6dedb65SAndre Fischer            }
208c6dedb65SAndre Fischer            else
209c6dedb65SAndre Fischer            {
210c6dedb65SAndre Fischer                # Evaluate platform regexp.
211c6dedb65SAndre Fischer                $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0;
212c6dedb65SAndre Fischer            }
213c6dedb65SAndre Fischer        }
214c6dedb65SAndre Fischer        else
215c6dedb65SAndre Fischer        {
216c6dedb65SAndre Fischer            die "can not handle left hand side $left on line $LineNo";
217c6dedb65SAndre Fischer        }
218c6dedb65SAndre Fischer    }
219c6dedb65SAndre Fischer    else
220c6dedb65SAndre Fischer    {
221c6dedb65SAndre Fischer        die "syntax error in expression on line $LineNo";
222c6dedb65SAndre Fischer    }
223c6dedb65SAndre Fischer
224c6dedb65SAndre Fischer    return $result;
225c6dedb65SAndre Fischer}
226c6dedb65SAndre Fischer
227c6dedb65SAndre Fischer
228c6dedb65SAndre Fischer
229c6dedb65SAndre Fischer
230c6dedb65SAndre Fischer=head3 EvaluateSelector
231c6dedb65SAndre Fischer    Evaluate the given expression that is expected to be list of terms of the form
232c6dedb65SAndre Fischer        left-hand-side operator right-hand-side
233c6dedb65SAndre Fischer    that are separated by logical operators
234c6dedb65SAndre Fischer        && ||
235c6dedb65SAndre Fischer    The expression is lazy evaluated left to right.
236c6dedb65SAndre Fischer=cut
237c6dedb65SAndre Fischersub EvaluateSelector($$);
238c6dedb65SAndre Fischersub EvaluateSelector($$)
239c6dedb65SAndre Fischer{
240c6dedb65SAndre Fischer    my $expression = shift;
241c6dedb65SAndre Fischer    my $languages = shift;
242c6dedb65SAndre Fischer
243c6dedb65SAndre Fischer    my $result = "";
244c6dedb65SAndre Fischer
245c6dedb65SAndre Fischer    if ($expression =~ /^\s*$/)
246c6dedb65SAndre Fischer    {
247c6dedb65SAndre Fischer        # Empty selector is always true.
248c6dedb65SAndre Fischer        return 1;
249c6dedb65SAndre Fischer    }
250c6dedb65SAndre Fischer    elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/)
251c6dedb65SAndre Fischer    {
252c6dedb65SAndre Fischer        my ($term, $operator) = ($1,$2);
253c6dedb65SAndre Fischer        $expression = $3;
254c6dedb65SAndre Fischer
255c6dedb65SAndre Fischer        my $left_result = EvaluateTerm($term, $languages);
256c6dedb65SAndre Fischer        # Lazy evaluation of &&
257c6dedb65SAndre Fischer        return 0 if ($operator eq "&&" && !$left_result);
258c6dedb65SAndre Fischer        # Lazy evaluation of ||
259c6dedb65SAndre Fischer        return 1 if ($operator eq "||" && $left_result);
260c6dedb65SAndre Fischer        my $right_result = EvaluateSelector($expression, $languages);
261c6dedb65SAndre Fischer
262c6dedb65SAndre Fischer        if ($operator eq "&&")
263c6dedb65SAndre Fischer        {
264c6dedb65SAndre Fischer            return $left_result && $right_result;
265c6dedb65SAndre Fischer        }
266c6dedb65SAndre Fischer        else
267c6dedb65SAndre Fischer        {
268c6dedb65SAndre Fischer            return $left_result || $right_result;
269c6dedb65SAndre Fischer        }
270c6dedb65SAndre Fischer    }
27137e6b05aSAndre Fischer    elsif ($expression =~ /^\s*(.+?)\s*$/)
272c6dedb65SAndre Fischer    {
273c6dedb65SAndre Fischer        return EvaluateTerm($1, $languages);
274c6dedb65SAndre Fischer    }
275c6dedb65SAndre Fischer    else
276c6dedb65SAndre Fischer    {
277c6dedb65SAndre Fischer        die "invalid expression syntax on line $LineNo ($expression)";
278c6dedb65SAndre Fischer    }
279c6dedb65SAndre Fischer}
280c6dedb65SAndre Fischer
281c6dedb65SAndre Fischer
282c6dedb65SAndre Fischer
283c6dedb65SAndre Fischer
284c6dedb65SAndre Fischer=head3 ProcessURL
285c6dedb65SAndre Fischer    Check that the given line contains an optional MD5 sum followed by
286201b78c7SAndre Fischer    a URL for one of the protocols file, http, https,
287201b78c7SAndre Fischer    followed by an optional file name (which is necessary when it is not the last part of the URL.)
288c6dedb65SAndre Fischer    Return an array that contains the protocol, the name, the original
289c6dedb65SAndre Fischer    URL, and the MD5 sum from the beginning of the line.
290c6dedb65SAndre Fischer    The name of the URL depends on its protocol:
291c6dedb65SAndre Fischer    - for http(s) the part of the URL after the last '/'.
292c6dedb65SAndre Fischer    - for file URLS it is everything after the protocol://
293c6dedb65SAndre Fischer=cut
294c6dedb65SAndre Fischersub ProcessURL ($)
295c6dedb65SAndre Fischer{
296c6dedb65SAndre Fischer    my $line = shift;
297c6dedb65SAndre Fischer
298c6dedb65SAndre Fischer    # Check that we are looking at a valid URL.
299201b78c7SAndre Fischer    if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/)
300c6dedb65SAndre Fischer    {
301201b78c7SAndre Fischer        my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6);
302201b78c7SAndre Fischer        my $URL = $3.$4.$5;
303c6dedb65SAndre Fischer
304c6dedb65SAndre Fischer        die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/;
305c6dedb65SAndre Fischer
306201b78c7SAndre Fischer        # Determine the name.  If an optional name is given then use that.
307201b78c7SAndre Fischer        if (defined $optional_name)
308c6dedb65SAndre Fischer        {
309201b78c7SAndre Fischer            die if $optional_name !~ /^\s+\"([^\"]+)\"$/;
310c6dedb65SAndre Fischer            $name = $1;
311c6dedb65SAndre Fischer        }
312201b78c7SAndre Fischer        else
313201b78c7SAndre Fischer        {
314201b78c7SAndre Fischer            if ($protocol eq "file")
315201b78c7SAndre Fischer            {
316201b78c7SAndre Fischer                # For file URLs we use everything after :// as name, or the .
317201b78c7SAndre Fischer                $URL =~ /:\/\/(.*)$/;
318201b78c7SAndre Fischer                $name = $1;
319201b78c7SAndre Fischer            }
320201b78c7SAndre Fischer            else
321201b78c7SAndre Fischer            {
322201b78c7SAndre Fischer                # For http and https use the last part of the URL.
323201b78c7SAndre Fischer                $name = $url_name;
324201b78c7SAndre Fischer            }
325201b78c7SAndre Fischer        }
326c6dedb65SAndre Fischer
327c6dedb65SAndre Fischer        return [$protocol, $name, $URL, $md5];
328c6dedb65SAndre Fischer    }
329c6dedb65SAndre Fischer    else
330c6dedb65SAndre Fischer    {
331c6dedb65SAndre Fischer        die "invalid URL at line $LineNo:\n$line\n";
332c6dedb65SAndre Fischer    }
333c6dedb65SAndre Fischer}
334c6dedb65SAndre Fischer
335c6dedb65SAndre Fischer
336c6dedb65SAndre Fischer
337c6dedb65SAndre Fischer
338c6dedb65SAndre Fischer=head3 ParseExtensionsLst
339c6dedb65SAndre Fischer    Parse the extensions.lst file.
340c6dedb65SAndre Fischer
341c6dedb65SAndre Fischer    Lines that contain only spaces or comments or are empty are
342c6dedb65SAndre Fischer    ignored.
343c6dedb65SAndre Fischer
344c6dedb65SAndre Fischer    Lines that contain a selector, ie a test enclosed in brackets, are
345c6dedb65SAndre Fischer    evaluated.  The following lines, until the next selector, are
346c6dedb65SAndre Fischer    ignored when the selector evaluates to false.  When an empty list
347c6dedb65SAndre Fischer    of languages is given then any 'language=...' test is evaluated as
348c6dedb65SAndre Fischer    true.
349c6dedb65SAndre Fischer
350c6dedb65SAndre Fischer    All other lines are expected to contain a URL optionally preceded
351c6dedb65SAndre Fischer    by an MD5 sum.
352c6dedb65SAndre Fischer=cut
353c6dedb65SAndre Fischersub ParseExtensionsLst ($$)
354c6dedb65SAndre Fischer{
355c6dedb65SAndre Fischer    my $file_name = shift;
356c6dedb65SAndre Fischer    my $languages = shift;
357c6dedb65SAndre Fischer
358c6dedb65SAndre Fischer    open my $in, "$file_name";
359c6dedb65SAndre Fischer
360c6dedb65SAndre Fischer    my $current_selector_value = 1;
361c6dedb65SAndre Fischer    my @URLs = ();
362c6dedb65SAndre Fischer
363c6dedb65SAndre Fischer    while (<$in>)
364c6dedb65SAndre Fischer    {
365c6dedb65SAndre Fischer        my $line = $_;
366c6dedb65SAndre Fischer        $line =~ s/[\r\n]+//g;
367c6dedb65SAndre Fischer        ++$LineNo;
368c6dedb65SAndre Fischer
369c6dedb65SAndre Fischer        # Strip away comments.
370c6dedb65SAndre Fischer        next if $line =~ /^\s*#/;
371c6dedb65SAndre Fischer
372c6dedb65SAndre Fischer        # Ignore empty lines.
373c6dedb65SAndre Fischer        next if $line =~ /^\s*$/;
374c6dedb65SAndre Fischer
375c6dedb65SAndre Fischer        # Process selectors
376c6dedb65SAndre Fischer        if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/)
377c6dedb65SAndre Fischer        {
378c6dedb65SAndre Fischer            $current_selector_value = EvaluateSelector($1, $languages);
379c6dedb65SAndre Fischer        }
380c6dedb65SAndre Fischer        else
381c6dedb65SAndre Fischer        {
382c6dedb65SAndre Fischer            if ($current_selector_value)
383c6dedb65SAndre Fischer            {
384c6dedb65SAndre Fischer                push @URLs, ProcessURL($line);
385c6dedb65SAndre Fischer            }
386c6dedb65SAndre Fischer        }
387c6dedb65SAndre Fischer    }
388c6dedb65SAndre Fischer
389c6dedb65SAndre Fischer    close $in;
390c6dedb65SAndre Fischer
391c6dedb65SAndre Fischer    return @URLs;
392c6dedb65SAndre Fischer}
393c6dedb65SAndre Fischer
394c6dedb65SAndre Fischer
395c6dedb65SAndre Fischer
396c6dedb65SAndre Fischer
397c6dedb65SAndre Fischer=head3 Download
398c6dedb65SAndre Fischer    Download a set of files that are specified via URLs.
399c6dedb65SAndre Fischer
400c6dedb65SAndre Fischer    File URLs are ignored here because they point to extensions that have not yet been built.
401c6dedb65SAndre Fischer
402c6dedb65SAndre Fischer    For http URLs there may be an optional MD5 checksum.  If it is present then downloaded
403c6dedb65SAndre Fischer    files that do not match that checksum are an error and lead to abortion of the current process.
404c6dedb65SAndre Fischer    Files that have already been downloaded are not downloaded again.
405c6dedb65SAndre Fischer=cut
406c6dedb65SAndre Fischersub Download (@)
407c6dedb65SAndre Fischer{
408c6dedb65SAndre Fischer    my @urls = @_;
409c6dedb65SAndre Fischer
410c6dedb65SAndre Fischer    my @missing = ();
411c6dedb65SAndre Fischer    my $download_path = $ENV{'TARFILE_LOCATION'};
412c6dedb65SAndre Fischer
413c6dedb65SAndre Fischer    # First check which (if any) extensions have already been downloaded.
414c6dedb65SAndre Fischer    for my $entry (@urls)
415c6dedb65SAndre Fischer    {
416c6dedb65SAndre Fischer        my ($protocol, $name, $URL, $md5sum) = @{$entry};
417c6dedb65SAndre Fischer
418c6dedb65SAndre Fischer        # We can not check the existence of file URLs because they point to extensions that
419c6dedb65SAndre Fischer        # have yet to be built.
420c6dedb65SAndre Fischer
421274b8a62Spescetti        next if $protocol !~ /(http|https)/;
422c6dedb65SAndre Fischer        my $candidate = File::Spec->catfile($download_path, $name);
423c6dedb65SAndre Fischer        if ( ! -f $candidate)
424c6dedb65SAndre Fischer        {
425c6dedb65SAndre Fischer            push @missing, $entry;
426c6dedb65SAndre Fischer        }
4278fbb374bSAndre Fischer        elsif (defined $md5sum)
4288fbb374bSAndre Fischer        {
4298fbb374bSAndre Fischer            # Check that the MD5 sum is still correct.
4308fbb374bSAndre Fischer            # The datafile may have been updated with a new version of the extension that
4318fbb374bSAndre Fischer            # still has the same name but a different MD5 sum.
4328fbb374bSAndre Fischer            my $cur_oxt;
4338fbb374bSAndre Fischer            if ( ! open($cur_oxt, $candidate))
4348fbb374bSAndre Fischer            {
4358fbb374bSAndre Fischer                # Can not read the extension.  Download extension again.
4368fbb374bSAndre Fischer                push @missing, $entry;
4378fbb374bSAndre Fischer                unlink($candidate);
4388fbb374bSAndre Fischer            }
4398fbb374bSAndre Fischer            binmode($cur_oxt);
4408fbb374bSAndre Fischer            my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest;
4418fbb374bSAndre Fischer            close($cur_oxt);
4428fbb374bSAndre Fischer            if ($md5sum ne $file_md5)
4438fbb374bSAndre Fischer            {
4448fbb374bSAndre Fischer                # MD5 does not match.  Download extension again.
4458fbb374bSAndre Fischer                print "extension $name has wrong MD5 and will be updated\n";
4468fbb374bSAndre Fischer                push @missing, $entry;
4478fbb374bSAndre Fischer                unlink($candidate);
4488fbb374bSAndre Fischer            }
4498fbb374bSAndre Fischer        }
450c6dedb65SAndre Fischer    }
451c6dedb65SAndre Fischer    if ($#missing >= 0)
452c6dedb65SAndre Fischer    {
4538fbb374bSAndre Fischer        printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : "";
454c6dedb65SAndre Fischer        if ( ! -d $download_path)
455c6dedb65SAndre Fischer        {
456c6dedb65SAndre Fischer            mkdir File::Spec->catpath($download_path, "tmp")
457c6dedb65SAndre Fischer                || die "can not create tmp subdirectory of $download_path";
458c6dedb65SAndre Fischer        }
459c6dedb65SAndre Fischer    }
460c6dedb65SAndre Fischer    else
461c6dedb65SAndre Fischer    {
462c6dedb65SAndre Fischer        print "all downloadable extensions present\n";
463c6dedb65SAndre Fischer        return;
464c6dedb65SAndre Fischer    }
465c6dedb65SAndre Fischer
466c6dedb65SAndre Fischer    # Download the missing files.
467c6dedb65SAndre Fischer    for my $entry (@missing)
468c6dedb65SAndre Fischer    {
469c6dedb65SAndre Fischer        my ($protocol, $name, $URL, $md5sum) = @{$entry};
470c6dedb65SAndre Fischer
471c0f6b924Sarielch        # Open a .part file for writing.
472c0f6b924Sarielch        my $filename = File::Spec->catfile($download_path, $name);
473c0f6b924Sarielch        my $temporary_filename = $filename . ".part";
474c0f6b924Sarielch        print "downloading to $temporary_filename\n";
475c0f6b924Sarielch
476c0f6b924Sarielch        # Prepare md5
477c0f6b924Sarielch        my $md5 = Digest::MD5->new();
478c0f6b924Sarielch
479c0f6b924Sarielch        # Download the extension.
480c0f6b924Sarielch        my $agent = LWP::UserAgent->new();
481c0f6b924Sarielch        $agent->timeout(120);
482c0f6b924Sarielch        $agent->env_proxy;
483c0f6b924Sarielch        my $last_was_redirect = 0;
484c0f6b924Sarielch        my $response = $agent->get($URL);
485c0f6b924Sarielch
486*983f6de1SMatthias Seidel        # When download was successful then check the md5 checksum and rename the .part file
487c0f6b924Sarielch        # into the actual extension name.
488c0f6b924Sarielch        if ($response->is_success())
489c0f6b924Sarielch        {
490c0f6b924Sarielch            my $content = $response->content;
491c0f6b924Sarielch            open $out, ">$temporary_filename";
492c0f6b924Sarielch            binmode($out);
493c0f6b924Sarielch            print $out $content;
494c0f6b924Sarielch            $md5->add($content);
495c0f6b924Sarielch            close $out;
496c0f6b924Sarielch            if (defined $md5sum && length($md5sum)==32)
497c0f6b924Sarielch            {
498c0f6b924Sarielch                my $file_md5 = $md5->hexdigest();
499c0f6b924Sarielch                if ($md5sum eq $file_md5)
500c0f6b924Sarielch                {
501c0f6b924Sarielch                    print "md5 is OK\n";
502c0f6b924Sarielch                }
503c0f6b924Sarielch                else
504c0f6b924Sarielch                {
505c0f6b924Sarielch                    unlink($temporary_filename) if ! $Debug;
506c0f6b924Sarielch                    die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum";
507c0f6b924Sarielch                }
508c0f6b924Sarielch            }
509c0f6b924Sarielch            else
510c0f6b924Sarielch            {
511c0f6b924Sarielch                print "md5 is not present\n";
512c0f6b924Sarielch                printf "   is %s, length is %d\n", $md5sum, length(md5sum);
513c0f6b924Sarielch            }
514c0f6b924Sarielch
515c0f6b924Sarielch            rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename";
516c0f6b924Sarielch        }
517c0f6b924Sarielch        else
518c6dedb65SAndre Fischer        {
519c6dedb65SAndre Fischer            die "failed to download $URL";
520c6dedb65SAndre Fischer        }
521c6dedb65SAndre Fischer    }
522c6dedb65SAndre Fischer}
523c6dedb65SAndre Fischer
524c6dedb65SAndre Fischer
525c6dedb65SAndre Fischer
526c6dedb65SAndre Fischer
527c6dedb65SAndre Fischer=head3 DownloadExtensions
528c6dedb65SAndre Fischer    This function is intended to be called during bootstrapping.  It extracts the set of extensions
529c6dedb65SAndre Fischer    that will be used later, when the installation sets are built.
5301e4dc01fSAndre Fischer    The set of languages is taken from the WITH_LANG environment variable.
531c6dedb65SAndre Fischer=cut
532c6dedb65SAndre Fischersub DownloadExtensions ()
533c6dedb65SAndre Fischer{
534764319a5SAndre Fischer    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
535764319a5SAndre Fischer         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
536764319a5SAndre Fischer    {
537764319a5SAndre Fischer        my $full_file_name = Prepare();
5381e4dc01fSAndre Fischer        my $languages = [ "en_US" ];
5391e4dc01fSAndre Fischer        if (defined $ENV{'WITH_LANG'})
5401e4dc01fSAndre Fischer        {
5411e4dc01fSAndre Fischer            @$languages = split(/\s+/, $ENV{'WITH_LANG'});
5421e4dc01fSAndre Fischer            foreach my $l (@$languages)
5431e4dc01fSAndre Fischer            {
5441e4dc01fSAndre Fischer                print "$l\n";
5451e4dc01fSAndre Fischer            }
5461e4dc01fSAndre Fischer        }
5471e4dc01fSAndre Fischer        my @urls = ParseExtensionsLst($full_file_name, $languages);
548764319a5SAndre Fischer        Download(@urls);
549764319a5SAndre Fischer    }
550764319a5SAndre Fischer    else
551764319a5SAndre Fischer    {
552764319a5SAndre Fischer        print "bundling of dictionaries is disabled.\n";
553764319a5SAndre Fischer    }
554c6dedb65SAndre Fischer}
555c6dedb65SAndre Fischer
556c6dedb65SAndre Fischer
557c6dedb65SAndre Fischer
558c6dedb65SAndre Fischer
559c6dedb65SAndre Fischer=head3 GetExtensionList
560c6dedb65SAndre Fischer    This function is intended to be called when installation sets are built.
561c6dedb65SAndre Fischer    It expects two arguments:
562c6dedb65SAndre Fischer        - A protocol selector.  Http URLs reference remotely located
563c6dedb65SAndre Fischer          extensions that will be bundled as-is into the installation
564c6dedb65SAndre Fischer          sets due to legal reasons. They are installed on first start
565c6dedb65SAndre Fischer          of the office.
566c6dedb65SAndre Fischer          File URLs reference extensions whose source code is part of
567c6dedb65SAndre Fischer          the repository.  They are pre-registered when installation
568c6dedb65SAndre Fischer          sets are created.  Their installation is finished when the
569c6dedb65SAndre Fischer          office is first started.
570c6dedb65SAndre Fischer        - A set of languages.  This set determines which extensions
571c6dedb65SAndre Fischer          are returned and then included in an installation set.
572c6dedb65SAndre Fischer=cut
573c6dedb65SAndre Fischersub GetExtensionList ($@)
574c6dedb65SAndre Fischer{
575c6dedb65SAndre Fischer    my $protocol_selector = shift;
576c6dedb65SAndre Fischer    my @language_list = @_;
577c6dedb65SAndre Fischer
578764319a5SAndre Fischer    if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'}
579764319a5SAndre Fischer         && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES")
580c6dedb65SAndre Fischer    {
581764319a5SAndre Fischer        my $full_file_name = Prepare();
582764319a5SAndre Fischer        my @urls = ParseExtensionsLst($full_file_name, \@language_list);
583764319a5SAndre Fischer
584764319a5SAndre Fischer        my @result = ();
585764319a5SAndre Fischer        for my $entry (@urls)
586c6dedb65SAndre Fischer        {
587764319a5SAndre Fischer            my ($protocol, $name, $URL, $md5sum) = @{$entry};
588764319a5SAndre Fischer            if ($protocol =~ /^$protocol_selector$/)
589764319a5SAndre Fischer            {
590764319a5SAndre Fischer                push @result, $name;
591764319a5SAndre Fischer            }
592c6dedb65SAndre Fischer        }
593c6dedb65SAndre Fischer
594764319a5SAndre Fischer        return @result;
595764319a5SAndre Fischer    }
596764319a5SAndre Fischer    else
597764319a5SAndre Fischer    {
598764319a5SAndre Fischer        # Bundling of dictionaires is disabled.
599764319a5SAndre Fischer    }
6009568fc44SAndre Fischer
6019568fc44SAndre Fischer    return ();
602c6dedb65SAndre Fischer}
603c6dedb65SAndre Fischer
604c6dedb65SAndre Fischer
605c6dedb65SAndre Fischer1;
606