1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70# Set to 1 to get a more verbose output, the default is 0. 71my $Debug = 0; 72 73 74=head3 Prepare 75 Check that some environment variables are properly set and then return the file name 76 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 77=cut 78sub Prepare () 79{ 80 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 81 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 82 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 83 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 84 85 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 86 die "can not read file $candidate" if ! -r $candidate; 87 return $candidate; 88} 89 90 91 92=head 3 EvaluateOperator 93 Evaluate a single test statement like 'language = en.*'. 94 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 95 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 96 97 Other operators are at the moment only supported in the way that they are evaluated via eval(). 98=cut 99sub EvaluateOperator ($$$) 100{ 101 my ($left,$operator,$right) = @_; 102 103 my $result; 104 105 if ($operator =~ /^(=|==|eq)$/) 106 { 107 if ($left =~ /^$right$/) 108 { 109 $result = 1; 110 } 111 else 112 { 113 $result = 0; 114 } 115 } 116 elsif (eval($left.$operator.$right)) 117 { 118 $result = 1; 119 } 120 else 121 { 122 $result = 0; 123 } 124 125 return $result; 126} 127 128 129 130 131=head EvaluateTerm 132 Evaluate a string that contains a simple test term of the form 133 left operator right 134 with arbitrary spacing allowed around and between the three parts. 135 136 The left hand side is specially handled: 137 138 - When the left hand side is 'language' then it is replaced by 139 any of the given languages in turn. When the term evaluates to true for any of the languages then 140 true is returned. False is returned only when none of the given languages matches. 141 142 - When the left hand side consists only of upper case letters, digits, and '_' then it is 143 interpreted as the name of a environment variable. It is replaced by its value before the term 144 is evaluated. 145 146 - Any other left hand side is an error (at the moment.) 147=cut 148sub EvaluateTerm ($$) 149{ 150 my $term = shift; 151 my $languages = shift; 152 153 my $result; 154 155 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 156 { 157 my ($left,$operator,$right) = ($1,$2,$3); 158 159 if ($operator !~ /^=|==|eq$/) 160 { 161 die "unsupported operator $operator on line $LineNo"; 162 } 163 164 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 165 166 if ($left =~ /^[A-Z_0-9]+$/) 167 { 168 # Uppercase words are interpreted as environment variables. 169 my $left_value = $ENV{$left}; 170 $left_value = "" if ! defined $left_value; 171 172 # We can check whether the condition is fullfilled right now. 173 $result = EvaluateOperator($left_value, $operator, $right); 174 } 175 elsif ($left eq "language") 176 { 177 if ($right eq "all") 178 { 179 $result = 1; 180 } 181 elsif ($#$languages>=0) 182 { 183 $result = 0; 184 for my $language (@$languages) 185 { 186 # Unify naming schemes. 187 $language =~ s/_/-/g; 188 $right =~ s/_/-/g; 189 190 # Evaluate language regexp. 191 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 192 last if $result; 193 } 194 } 195 else 196 { 197 # The set of languages is not yet known. Return true 198 # to include the following entries. 199 $result = 1; 200 } 201 } 202 elsif ($left eq "platform") 203 { 204 if ($right eq "all") 205 { 206 $result = 1; 207 } 208 else 209 { 210 # Evaluate platform regexp. 211 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 212 } 213 } 214 else 215 { 216 die "can not handle left hand side $left on line $LineNo"; 217 } 218 } 219 else 220 { 221 die "syntax error in expression on line $LineNo"; 222 } 223 224 return $result; 225} 226 227 228 229 230=head3 EvaluateSelector 231 Evaluate the given expression that is expected to be list of terms of the form 232 left-hand-side operator right-hand-side 233 that are separated by logical operators 234 && || 235 The expression is lazy evaluated left to right. 236=cut 237sub EvaluateSelector($$); 238sub EvaluateSelector($$) 239{ 240 my $expression = shift; 241 my $languages = shift; 242 243 my $result = ""; 244 245 if ($expression =~ /^\s*$/) 246 { 247 # Empty selector is always true. 248 return 1; 249 } 250 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 251 { 252 my ($term, $operator) = ($1,$2); 253 $expression = $3; 254 255 my $left_result = EvaluateTerm($term, $languages); 256 # Lazy evaluation of && 257 return 0 if ($operator eq "&&" && !$left_result); 258 # Lazy evaluation of || 259 return 1 if ($operator eq "||" && $left_result); 260 my $right_result = EvaluateSelector($expression, $languages); 261 262 if ($operator eq "&&") 263 { 264 return $left_result && $right_result; 265 } 266 else 267 { 268 return $left_result || $right_result; 269 } 270 } 271 elsif ($expression =~ /^\s*(.+?)\s*$/) 272 { 273 return EvaluateTerm($1, $languages); 274 } 275 else 276 { 277 die "invalid expression syntax on line $LineNo ($expression)"; 278 } 279} 280 281 282 283 284=head3 ProcessURL 285 Check that the given line contains an optional MD5 sum followed by 286 a URL for one of the protocols file, http, https, 287 followed by an optional file name (which is necessary when it is not the last part of the URL.) 288 Return an array that contains the protocol, the name, the original 289 URL, and the MD5 sum from the beginning of the line. 290 The name of the URL depends on its protocol: 291 - for http(s) the part of the URL after the last '/'. 292 - for file URLS it is everything after the protocol:// 293=cut 294sub ProcessURL ($) 295{ 296 my $line = shift; 297 298 # Check that we are looking at a valid URL. 299 if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/) 300 { 301 my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6); 302 my $URL = $3.$4.$5; 303 304 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 305 306 # Determine the name. If an optional name is given then use that. 307 if (defined $optional_name) 308 { 309 die if $optional_name !~ /^\s+\"([^\"]+)\"$/; 310 $name = $1; 311 } 312 else 313 { 314 if ($protocol eq "file") 315 { 316 # For file URLs we use everything after :// as name, or the . 317 $URL =~ /:\/\/(.*)$/; 318 $name = $1; 319 } 320 else 321 { 322 # For http and https use the last part of the URL. 323 $name = $url_name; 324 } 325 } 326 327 return [$protocol, $name, $URL, $md5]; 328 } 329 else 330 { 331 die "invalid URL at line $LineNo:\n$line\n"; 332 } 333} 334 335 336 337 338=head3 ParseExtensionsLst 339 Parse the extensions.lst file. 340 341 Lines that contain only spaces or comments or are empty are 342 ignored. 343 344 Lines that contain a selector, ie a test enclosed in brackets, are 345 evaluated. The following lines, until the next selector, are 346 ignored when the selector evaluates to false. When an empty list 347 of languages is given then any 'language=...' test is evaluated as 348 true. 349 350 All other lines are expected to contain a URL optionally preceded 351 by an MD5 sum. 352=cut 353sub ParseExtensionsLst ($$) 354{ 355 my $file_name = shift; 356 my $languages = shift; 357 358 open my $in, "$file_name"; 359 360 my $current_selector_value = 1; 361 my @URLs = (); 362 363 while (<$in>) 364 { 365 my $line = $_; 366 $line =~ s/[\r\n]+//g; 367 ++$LineNo; 368 369 # Strip away comments. 370 next if $line =~ /^\s*#/; 371 372 # Ignore empty lines. 373 next if $line =~ /^\s*$/; 374 375 # Process selectors 376 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 377 { 378 $current_selector_value = EvaluateSelector($1, $languages); 379 } 380 else 381 { 382 if ($current_selector_value) 383 { 384 push @URLs, ProcessURL($line); 385 } 386 } 387 } 388 389 close $in; 390 391 return @URLs; 392} 393 394 395 396 397=head3 Download 398 Download a set of files that are specified via URLs. 399 400 File URLs are ignored here because they point to extensions that have not yet been built. 401 402 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 403 files that do not match that checksum are an error and lead to abortion of the current process. 404 Files that have already been downloaded are not downloaded again. 405=cut 406sub Download (@) 407{ 408 my @urls = @_; 409 410 my @missing = (); 411 my $download_path = $ENV{'TARFILE_LOCATION'}; 412 413 # First check which (if any) extensions have already been downloaded. 414 for my $entry (@urls) 415 { 416 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 417 418 # We can not check the existence of file URLs because they point to extensions that 419 # have yet to be built. 420 421 next if $protocol ne "http"; 422 my $candidate = File::Spec->catfile($download_path, $name); 423 if ( ! -f $candidate) 424 { 425 push @missing, $entry; 426 } 427 elsif (defined $md5sum) 428 { 429 # Check that the MD5 sum is still correct. 430 # The datafile may have been updated with a new version of the extension that 431 # still has the same name but a different MD5 sum. 432 my $cur_oxt; 433 if ( ! open($cur_oxt, $candidate)) 434 { 435 # Can not read the extension. Download extension again. 436 push @missing, $entry; 437 unlink($candidate); 438 } 439 binmode($cur_oxt); 440 my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest; 441 close($cur_oxt); 442 if ($md5sum ne $file_md5) 443 { 444 # MD5 does not match. Download extension again. 445 print "extension $name has wrong MD5 and will be updated\n"; 446 push @missing, $entry; 447 unlink($candidate); 448 } 449 } 450 } 451 if ($#missing >= 0) 452 { 453 printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 454 if ( ! -d $download_path) 455 { 456 mkdir File::Spec->catpath($download_path, "tmp") 457 || die "can not create tmp subdirectory of $download_path"; 458 } 459 } 460 else 461 { 462 print "all downloadable extensions present\n"; 463 return; 464 } 465 466 # Download the missing files. 467 for my $entry (@missing) 468 { 469 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 470 471 # Open a .part file for writing. 472 my $filename = File::Spec->catfile($download_path, $name); 473 my $temporary_filename = $filename . ".part"; 474 print "downloading to $temporary_filename\n"; 475 open my $out, ">$temporary_filename"; 476 binmode($out); 477 478 # Prepare md5 479 my $md5 = Digest::MD5->new(); 480 481 # Download the extension. 482 my $agent = LWP::UserAgent->new(); 483 $agent->timeout(120); 484 $agent->show_progress(1); 485 my $last_was_redirect = 0; 486 $agent->add_handler('response_redirect' 487 => sub{ 488 $last_was_redirect = 1; 489 return; 490 }); 491 $agent->add_handler('response_data' 492 => sub{ 493 if ($last_was_redirect) 494 { 495 $last_was_redirect = 0; 496 # Throw away the data we got so far. 497 $md5->reset(); 498 close $out; 499 open $out, ">$temporary_filename"; 500 binmode($out); 501 } 502 my($response,$agent,$h,$data)=@_; 503 print $out $data; 504 $md5->add($data); 505 }); 506 my $response = $agent->get($URL); 507 close $out; 508 509 # When download was successfull then check the md5 checksum and rename the .part file 510 # into the actual extension name. 511 if ($response->is_success()) 512 { 513 if (defined $md5sum && length($md5sum)==32) 514 { 515 my $file_md5 = $md5->hexdigest(); 516 if ($md5sum eq $file_md5) 517 { 518 print "md5 is OK\n"; 519 } 520 else 521 { 522 unlink($temporary_filename) if ! $Debug; 523 die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum"; 524 } 525 } 526 else 527 { 528 print "md5 is not present\n"; 529 printf " is %s, length is %d\n", $md5sum, length(md5sum); 530 } 531 532 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 533 } 534 else 535 { 536 die "failed to download $URL"; 537 } 538 } 539} 540 541 542 543 544=head3 DownloadExtensions 545 This function is intended to be called during bootstrapping. It extracts the set of extensions 546 that will be used later, when the installation sets are built. 547 The set of languages is taken from the WITH_LANG environment variable. 548=cut 549sub DownloadExtensions () 550{ 551 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 552 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 553 { 554 my $full_file_name = Prepare(); 555 my $languages = [ "en_US" ]; 556 if (defined $ENV{'WITH_LANG'}) 557 { 558 @$languages = split(/\s+/, $ENV{'WITH_LANG'}); 559 foreach my $l (@$languages) 560 { 561 print "$l\n"; 562 } 563 } 564 my @urls = ParseExtensionsLst($full_file_name, $languages); 565 Download(@urls); 566 } 567 else 568 { 569 print "bundling of dictionaries is disabled.\n"; 570 } 571} 572 573 574 575 576=head3 GetExtensionList 577 This function is intended to be called when installation sets are built. 578 It expects two arguments: 579 - A protocol selector. Http URLs reference remotely located 580 extensions that will be bundled as-is into the installation 581 sets due to legal reasons. They are installed on first start 582 of the office. 583 File URLs reference extensions whose source code is part of 584 the repository. They are pre-registered when installation 585 sets are created. Their installation is finished when the 586 office is first started. 587 - A set of languages. This set determines which extensions 588 are returned and then included in an installation set. 589=cut 590sub GetExtensionList ($@) 591{ 592 my $protocol_selector = shift; 593 my @language_list = @_; 594 595 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 596 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 597 { 598 my $full_file_name = Prepare(); 599 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 600 601 my @result = (); 602 for my $entry (@urls) 603 { 604 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 605 if ($protocol =~ /^$protocol_selector$/) 606 { 607 push @result, $name; 608 } 609 } 610 611 return @result; 612 } 613 else 614 { 615 # Bundling of dictionaires is disabled. 616 } 617 618 return (); 619} 620 621 6221; 623