1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70 71=head3 Prepare 72 Check that some environment variables are properly set and then return the file name 73 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 74=cut 75sub Prepare () 76{ 77 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 78 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 79 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 80 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 81 82 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 83 die "can not read file $candidate" if ! -r $candidate; 84 return $candidate; 85} 86 87 88 89=head 3 EvaluateOperator 90 Evaluate a single test statement like 'language = en.*'. 91 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 92 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 93 94 Other operators are at the moment only supported in the way that they are evaluated via eval(). 95=cut 96sub EvaluateOperator ($$$) 97{ 98 my ($left,$operator,$right) = @_; 99 100 my $result; 101 102 if ($operator =~ /^(=|==|eq)$/) 103 { 104 if ($left =~ /^$right$/) 105 { 106 $result = 1; 107 } 108 else 109 { 110 $result = 0; 111 } 112 } 113 elsif (eval($left.$operator.$right)) 114 { 115 $result = 1; 116 } 117 else 118 { 119 $result = 0; 120 } 121 122 return $result; 123} 124 125 126 127 128=head EvaluateTerm 129 Evaluate a string that contains a simple test term of the form 130 left operator right 131 with arbitrary spacing allowed around and between the three parts. 132 133 The left hand side is specially handled: 134 135 - When the left hand side is 'language' then it is replaced by 136 any of the given languages in turn. When the term evaluates to true for any of the languages then 137 true is returned. False is returned only when none of the given languages matches. 138 139 - When the left hand side consists only of upper case letters, digits, and '_' then it is 140 interpreted as the name of a environment variable. It is replaced by its value before the term 141 is evaluated. 142 143 - Any other left hand side is an error (at the moment.) 144=cut 145sub EvaluateTerm ($$) 146{ 147 my $term = shift; 148 my $languages = shift; 149 150 my $result; 151 152 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 153 { 154 my ($left,$operator,$right) = ($1,$2,$3); 155 156 if ($operator !~ /^=|==|eq$/) 157 { 158 die "unsupported operator $operator on line $LineNo"; 159 } 160 161 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 162 163 if ($left =~ /^[A-Z_0-9]+$/) 164 { 165 # Uppercase words are interpreted as environment variables. 166 my $left_value = $ENV{$left}; 167 $left_value = "" if ! defined $left_value; 168 169 # We can check whether the condition is fullfilled right now. 170 $result = EvaluateOperator($left_value, $operator, $right); 171 } 172 elsif ($left eq "language") 173 { 174 if ($right eq "all") 175 { 176 $result = 1; 177 } 178 elsif ($#$languages>=0) 179 { 180 $result = 0; 181 for my $language (@$languages) 182 { 183 # Unify naming schemes. 184 $language =~ s/_/-/g; 185 $right =~ s/_/-/g; 186 187 # Evaluate language regexp. 188 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 189 last if $result; 190 } 191 } 192 else 193 { 194 # The set of languages is not yet known. Return true 195 # to include the following entries. 196 $result = 1; 197 } 198 } 199 elsif ($left eq "platform") 200 { 201 if ($right eq "all") 202 { 203 $result = 1; 204 } 205 else 206 { 207 # Evaluate platform regexp. 208 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 209 } 210 } 211 else 212 { 213 die "can not handle left hand side $left on line $LineNo"; 214 } 215 } 216 else 217 { 218 die "syntax error in expression on line $LineNo"; 219 } 220 221 return $result; 222} 223 224 225 226 227=head3 EvaluateSelector 228 Evaluate the given expression that is expected to be list of terms of the form 229 left-hand-side operator right-hand-side 230 that are separated by logical operators 231 && || 232 The expression is lazy evaluated left to right. 233=cut 234sub EvaluateSelector($$); 235sub EvaluateSelector($$) 236{ 237 my $expression = shift; 238 my $languages = shift; 239 240 my $result = ""; 241 242 if ($expression =~ /^\s*$/) 243 { 244 # Empty selector is always true. 245 return 1; 246 } 247 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 248 { 249 my ($term, $operator) = ($1,$2); 250 $expression = $3; 251 252 my $left_result = EvaluateTerm($term, $languages); 253 # Lazy evaluation of && 254 return 0 if ($operator eq "&&" && !$left_result); 255 # Lazy evaluation of || 256 return 1 if ($operator eq "||" && $left_result); 257 my $right_result = EvaluateSelector($expression, $languages); 258 259 if ($operator eq "&&") 260 { 261 return $left_result && $right_result; 262 } 263 else 264 { 265 return $left_result || $right_result; 266 } 267 } 268 elsif ($expression =~ /^\s*(.+?)\s$/) 269 { 270 return EvaluateTerm($1, $languages); 271 } 272 else 273 { 274 die "invalid expression syntax on line $LineNo ($expression)"; 275 } 276} 277 278 279 280 281=head3 ProcessURL 282 Check that the given line contains an optional MD5 sum followed by 283 a URL for one of the protocols file, http, https 284 Return an array that contains the protocol, the name, the original 285 URL, and the MD5 sum from the beginning of the line. 286 The name of the URL depends on its protocol: 287 - for http(s) the part of the URL after the last '/'. 288 - for file URLS it is everything after the protocol:// 289=cut 290sub ProcessURL ($) 291{ 292 my $line = shift; 293 294 # Check that we are looking at a valid URL. 295 if ($line =~ /^\s*(\w{32}\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)\s*$/) 296 { 297 my ($md5, $protocol, $name) = ($1,$2,$4); 298 my $URL = $2.$3.$4; 299 300 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 301 302 # For file URLs we use everything after :// as name. 303 if ($protocol eq "file") 304 { 305 $URL =~ /:\/\/(.*)$/; 306 $name = $1; 307 } 308 309 return [$protocol, $name, $URL, $md5]; 310 } 311 else 312 { 313 die "invalid URL at line $LineNo:\n$line\n"; 314 } 315} 316 317 318 319 320=head3 ParseExtensionsLst 321 Parse the extensions.lst file. 322 323 Lines that contain only spaces or comments or are empty are 324 ignored. 325 326 Lines that contain a selector, ie a test enclosed in brackets, are 327 evaluated. The following lines, until the next selector, are 328 ignored when the selector evaluates to false. When an empty list 329 of languages is given then any 'language=...' test is evaluated as 330 true. 331 332 All other lines are expected to contain a URL optionally preceded 333 by an MD5 sum. 334=cut 335sub ParseExtensionsLst ($$) 336{ 337 my $file_name = shift; 338 my $languages = shift; 339 340 open my $in, "$file_name"; 341 342 my $current_selector_value = 1; 343 my @URLs = (); 344 345 while (<$in>) 346 { 347 my $line = $_; 348 $line =~ s/[\r\n]+//g; 349 ++$LineNo; 350 351 # Strip away comments. 352 next if $line =~ /^\s*#/; 353 354 # Ignore empty lines. 355 next if $line =~ /^\s*$/; 356 357 # Process selectors 358 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 359 { 360 $current_selector_value = EvaluateSelector($1, $languages); 361 } 362 else 363 { 364 if ($current_selector_value) 365 { 366 push @URLs, ProcessURL($line); 367 } 368 } 369 } 370 371 close $in; 372 373 return @URLs; 374} 375 376 377 378 379=head3 Download 380 Download a set of files that are specified via URLs. 381 382 File URLs are ignored here because they point to extensions that have not yet been built. 383 384 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 385 files that do not match that checksum are an error and lead to abortion of the current process. 386 Files that have already been downloaded are not downloaded again. 387=cut 388sub Download (@) 389{ 390 my @urls = @_; 391 392 my @missing = (); 393 my $download_path = $ENV{'TARFILE_LOCATION'}; 394 395 # First check which (if any) extensions have already been downloaded. 396 for my $entry (@urls) 397 { 398 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 399 400 # We can not check the existence of file URLs because they point to extensions that 401 # have yet to be built. 402 403 next if $protocol ne "http"; 404 my $candidate = File::Spec->catfile($download_path, $name); 405 if ( ! -f $candidate) 406 { 407 push @missing, $entry; 408 } 409 } 410 if ($#missing >= 0) 411 { 412 printf "downloading %d missing extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 413 if ( ! -d $download_path) 414 { 415 mkdir File::Spec->catpath($download_path, "tmp") 416 || die "can not create tmp subdirectory of $download_path"; 417 } 418 } 419 else 420 { 421 print "all downloadable extensions present\n"; 422 return; 423 } 424 425 # Download the missing files. 426 for my $entry (@missing) 427 { 428 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 429 430 # Open a .part file for writing. 431 my $filename = File::Spec->catfile($download_path, $name); 432 my $temporary_filename = $filename . ".part"; 433 open my $out, ">$temporary_filename"; 434 binmode($out); 435 436 # Prepare md5 437 my $md5 = Digest::MD5->new(); 438 439 # Download the extension. 440 my $agent = LWP::UserAgent->new(); 441 $agent->timeout(10); 442 $agent->show_progress(1); 443 $agent->add_handler('response_data' 444 => sub{ 445 my($response,$agent,$h,$data)=@_; 446 print $out $data; 447 $md5->add($data); 448 }); 449 my $response = $agent->get($URL); 450 close $out; 451 452 # When download was successfull then check the md5 checksum and rename the .part file 453 # into the actual extension name. 454 if ($response->is_success()) 455 { 456 if (defined $md5sum && length($md5sum)==32) 457 { 458 if ($md5sum eq $md5->digest()) 459 { 460 print "md5 is OK\n"; 461 } 462 else 463 { 464 unlink($temporary_filename); 465 die "downloaded file has the wrong md5 checksum"; 466 } 467 } 468 469 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 470 } 471 else 472 { 473 die "failed to download $URL"; 474 } 475 } 476} 477 478 479 480 481=head3 DownloadExtensions 482 This function is intended to be called during bootstrapping. It extracts the set of extensions 483 that will be used later, when the installation sets are built. 484=cut 485sub DownloadExtensions () 486{ 487 my $full_file_name = Prepare(); 488 my @urls = ParseExtensionsLst($full_file_name, []); 489 Download(@urls); 490} 491 492 493 494 495=head3 GetExtensionList 496 This function is intended to be called when installation sets are built. 497 It expects two arguments: 498 - A protocol selector. Http URLs reference remotely located 499 extensions that will be bundled as-is into the installation 500 sets due to legal reasons. They are installed on first start 501 of the office. 502 File URLs reference extensions whose source code is part of 503 the repository. They are pre-registered when installation 504 sets are created. Their installation is finished when the 505 office is first started. 506 - A set of languages. This set determines which extensions 507 are returned and then included in an installation set. 508=cut 509sub GetExtensionList ($@) 510{ 511 my $protocol_selector = shift; 512 my @language_list = @_; 513 514 my $full_file_name = Prepare(); 515 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 516 517 my @result = (); 518 for my $entry (@urls) 519 { 520 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 521 if ($protocol =~ /^$protocol_selector$/) 522 { 523 push @result, $name; 524 } 525 } 526 527 return @result; 528} 529 530 5311; 532