1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70# Set to 1 to get a more verbose output, the default is 0. 71my $Debug = 0; 72 73 74=head3 Prepare 75 Check that some environment variables are properly set and then return the file name 76 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 77=cut 78sub Prepare () 79{ 80 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 81 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 82 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 83 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 84 85 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 86 die "can not read file $candidate" if ! -r $candidate; 87 return $candidate; 88} 89 90 91 92=head 3 EvaluateOperator 93 Evaluate a single test statement like 'language = en.*'. 94 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 95 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 96 97 Other operators are at the moment only supported in the way that they are evaluated via eval(). 98=cut 99sub EvaluateOperator ($$$) 100{ 101 my ($left,$operator,$right) = @_; 102 103 my $result; 104 105 if ($operator =~ /^(=|==|eq)$/) 106 { 107 if ($left =~ /^$right$/) 108 { 109 $result = 1; 110 } 111 else 112 { 113 $result = 0; 114 } 115 } 116 elsif (eval($left.$operator.$right)) 117 { 118 $result = 1; 119 } 120 else 121 { 122 $result = 0; 123 } 124 125 return $result; 126} 127 128 129 130 131=head EvaluateTerm 132 Evaluate a string that contains a simple test term of the form 133 left operator right 134 with arbitrary spacing allowed around and between the three parts. 135 136 The left hand side is specially handled: 137 138 - When the left hand side is 'language' then it is replaced by 139 any of the given languages in turn. When the term evaluates to true for any of the languages then 140 true is returned. False is returned only when none of the given languages matches. 141 142 - When the left hand side consists only of upper case letters, digits, and '_' then it is 143 interpreted as the name of a environment variable. It is replaced by its value before the term 144 is evaluated. 145 146 - Any other left hand side is an error (at the moment.) 147=cut 148sub EvaluateTerm ($$) 149{ 150 my $term = shift; 151 my $languages = shift; 152 153 my $result; 154 155 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 156 { 157 my ($left,$operator,$right) = ($1,$2,$3); 158 159 if ($operator !~ /^=|==|eq$/) 160 { 161 die "unsupported operator $operator on line $LineNo"; 162 } 163 164 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 165 166 if ($left =~ /^[A-Z_0-9]+$/) 167 { 168 # Uppercase words are interpreted as environment variables. 169 my $left_value = $ENV{$left}; 170 $left_value = "" if ! defined $left_value; 171 172 # We can check whether the condition is fulfilled right now. 173 $result = EvaluateOperator($left_value, $operator, $right); 174 } 175 elsif ($left eq "language") 176 { 177 if ($right eq "all") 178 { 179 $result = 1; 180 } 181 elsif ($#$languages>=0) 182 { 183 $result = 0; 184 for my $language (@$languages) 185 { 186 # Unify naming schemes. 187 $language =~ s/_/-/g; 188 $right =~ s/_/-/g; 189 190 # Evaluate language regexp. 191 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 192 last if $result; 193 } 194 } 195 else 196 { 197 # The set of languages is not yet known. Return true 198 # to include the following entries. 199 $result = 1; 200 } 201 } 202 elsif ($left eq "platform") 203 { 204 if ($right eq "all") 205 { 206 $result = 1; 207 } 208 else 209 { 210 # Evaluate platform regexp. 211 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 212 } 213 } 214 else 215 { 216 die "can not handle left hand side $left on line $LineNo"; 217 } 218 } 219 else 220 { 221 die "syntax error in expression on line $LineNo"; 222 } 223 224 return $result; 225} 226 227 228 229 230=head3 EvaluateSelector 231 Evaluate the given expression that is expected to be list of terms of the form 232 left-hand-side operator right-hand-side 233 that are separated by logical operators 234 && || 235 The expression is lazy evaluated left to right. 236=cut 237sub EvaluateSelector($$); 238sub EvaluateSelector($$) 239{ 240 my $expression = shift; 241 my $languages = shift; 242 243 my $result = ""; 244 245 if ($expression =~ /^\s*$/) 246 { 247 # Empty selector is always true. 248 return 1; 249 } 250 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 251 { 252 my ($term, $operator) = ($1,$2); 253 $expression = $3; 254 255 my $left_result = EvaluateTerm($term, $languages); 256 # Lazy evaluation of && 257 return 0 if ($operator eq "&&" && !$left_result); 258 # Lazy evaluation of || 259 return 1 if ($operator eq "||" && $left_result); 260 my $right_result = EvaluateSelector($expression, $languages); 261 262 if ($operator eq "&&") 263 { 264 return $left_result && $right_result; 265 } 266 else 267 { 268 return $left_result || $right_result; 269 } 270 } 271 elsif ($expression =~ /^\s*(.+?)\s*$/) 272 { 273 return EvaluateTerm($1, $languages); 274 } 275 else 276 { 277 die "invalid expression syntax on line $LineNo ($expression)"; 278 } 279} 280 281 282 283 284=head3 ProcessURL 285 Check that the given line contains an optional MD5 sum followed by 286 a URL for one of the protocols file, http, https, 287 followed by an optional file name (which is necessary when it is not the last part of the URL.) 288 Return an array that contains the protocol, the name, the original 289 URL, and the MD5 sum from the beginning of the line. 290 The name of the URL depends on its protocol: 291 - for http(s) the part of the URL after the last '/'. 292 - for file URLS it is everything after the protocol:// 293=cut 294sub ProcessURL ($) 295{ 296 my $line = shift; 297 298 # Check that we are looking at a valid URL. 299 if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/) 300 { 301 my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6); 302 my $URL = $3.$4.$5; 303 304 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 305 306 # Determine the name. If an optional name is given then use that. 307 if (defined $optional_name) 308 { 309 die if $optional_name !~ /^\s+\"([^\"]+)\"$/; 310 $name = $1; 311 } 312 else 313 { 314 if ($protocol eq "file") 315 { 316 # For file URLs we use everything after :// as name, or the . 317 $URL =~ /:\/\/(.*)$/; 318 $name = $1; 319 } 320 else 321 { 322 # For http and https use the last part of the URL. 323 $name = $url_name; 324 } 325 } 326 327 return [$protocol, $name, $URL, $md5]; 328 } 329 else 330 { 331 die "invalid URL at line $LineNo:\n$line\n"; 332 } 333} 334 335 336 337 338=head3 ParseExtensionsLst 339 Parse the extensions.lst file. 340 341 Lines that contain only spaces or comments or are empty are 342 ignored. 343 344 Lines that contain a selector, ie a test enclosed in brackets, are 345 evaluated. The following lines, until the next selector, are 346 ignored when the selector evaluates to false. When an empty list 347 of languages is given then any 'language=...' test is evaluated as 348 true. 349 350 All other lines are expected to contain a URL optionally preceded 351 by an MD5 sum. 352=cut 353sub ParseExtensionsLst ($$) 354{ 355 my $file_name = shift; 356 my $languages = shift; 357 358 open my $in, "$file_name"; 359 360 my $current_selector_value = 1; 361 my @URLs = (); 362 363 while (<$in>) 364 { 365 my $line = $_; 366 $line =~ s/[\r\n]+//g; 367 ++$LineNo; 368 369 # Strip away comments. 370 next if $line =~ /^\s*#/; 371 372 # Ignore empty lines. 373 next if $line =~ /^\s*$/; 374 375 # Process selectors 376 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 377 { 378 $current_selector_value = EvaluateSelector($1, $languages); 379 } 380 else 381 { 382 if ($current_selector_value) 383 { 384 push @URLs, ProcessURL($line); 385 } 386 } 387 } 388 389 close $in; 390 391 return @URLs; 392} 393 394 395 396 397=head3 Download 398 Download a set of files that are specified via URLs. 399 400 File URLs are ignored here because they point to extensions that have not yet been built. 401 402 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 403 files that do not match that checksum are an error and lead to abortion of the current process. 404 Files that have already been downloaded are not downloaded again. 405=cut 406sub Download (@) 407{ 408 my @urls = @_; 409 410 my @missing = (); 411 my $download_path = $ENV{'TARFILE_LOCATION'}; 412 413 # First check which (if any) extensions have already been downloaded. 414 for my $entry (@urls) 415 { 416 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 417 418 # We can not check the existence of file URLs because they point to extensions that 419 # have yet to be built. 420 421 next if $protocol !~ /(http|https)/; 422 my $candidate = File::Spec->catfile($download_path, $name); 423 if ( ! -f $candidate) 424 { 425 push @missing, $entry; 426 } 427 elsif (defined $md5sum) 428 { 429 # Check that the MD5 sum is still correct. 430 # The datafile may have been updated with a new version of the extension that 431 # still has the same name but a different MD5 sum. 432 my $cur_oxt; 433 if ( ! open($cur_oxt, $candidate)) 434 { 435 # Can not read the extension. Download extension again. 436 push @missing, $entry; 437 unlink($candidate); 438 } 439 binmode($cur_oxt); 440 my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest; 441 close($cur_oxt); 442 if ($md5sum ne $file_md5) 443 { 444 # MD5 does not match. Download extension again. 445 print "extension $name has wrong MD5 and will be updated\n"; 446 push @missing, $entry; 447 unlink($candidate); 448 } 449 } 450 } 451 if ($#missing >= 0) 452 { 453 printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 454 if ( ! -d $download_path) 455 { 456 mkdir File::Spec->catpath($download_path, "tmp") 457 || die "can not create tmp subdirectory of $download_path"; 458 } 459 } 460 else 461 { 462 print "all downloadable extensions present\n"; 463 return; 464 } 465 466 # Download the missing files. 467 for my $entry (@missing) 468 { 469 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 470 471 # Open a .part file for writing. 472 my $filename = File::Spec->catfile($download_path, $name); 473 my $temporary_filename = $filename . ".part"; 474 print "downloading to $temporary_filename\n"; 475 476 # Prepare md5 477 my $md5 = Digest::MD5->new(); 478 479 # Download the extension. 480 my $agent = LWP::UserAgent->new(); 481 $agent->timeout(120); 482 $agent->env_proxy; 483 my $last_was_redirect = 0; 484 my $response = $agent->get($URL); 485 486 # When download was successful then check the md5 checksum and rename the .part file 487 # into the actual extension name. 488 if ($response->is_success()) 489 { 490 my $content = $response->content; 491 open $out, ">$temporary_filename"; 492 binmode($out); 493 print $out $content; 494 $md5->add($content); 495 close $out; 496 if (defined $md5sum && length($md5sum)==32) 497 { 498 my $file_md5 = $md5->hexdigest(); 499 if ($md5sum eq $file_md5) 500 { 501 print "md5 is OK\n"; 502 } 503 else 504 { 505 unlink($temporary_filename) if ! $Debug; 506 die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum"; 507 } 508 } 509 else 510 { 511 print "md5 is not present\n"; 512 printf " is %s, length is %d\n", $md5sum, length(md5sum); 513 } 514 515 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 516 } 517 else 518 { 519 die "failed to download $URL"; 520 } 521 } 522} 523 524 525 526 527=head3 DownloadExtensions 528 This function is intended to be called during bootstrapping. It extracts the set of extensions 529 that will be used later, when the installation sets are built. 530 The set of languages is taken from the WITH_LANG environment variable. 531=cut 532sub DownloadExtensions () 533{ 534 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 535 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 536 { 537 my $full_file_name = Prepare(); 538 my $languages = [ "en_US" ]; 539 if (defined $ENV{'WITH_LANG'}) 540 { 541 @$languages = split(/\s+/, $ENV{'WITH_LANG'}); 542 foreach my $l (@$languages) 543 { 544 print "$l\n"; 545 } 546 } 547 my @urls = ParseExtensionsLst($full_file_name, $languages); 548 Download(@urls); 549 } 550 else 551 { 552 print "bundling of dictionaries is disabled.\n"; 553 } 554} 555 556 557 558 559=head3 GetExtensionList 560 This function is intended to be called when installation sets are built. 561 It expects two arguments: 562 - A protocol selector. Http URLs reference remotely located 563 extensions that will be bundled as-is into the installation 564 sets due to legal reasons. They are installed on first start 565 of the office. 566 File URLs reference extensions whose source code is part of 567 the repository. They are pre-registered when installation 568 sets are created. Their installation is finished when the 569 office is first started. 570 - A set of languages. This set determines which extensions 571 are returned and then included in an installation set. 572=cut 573sub GetExtensionList ($@) 574{ 575 my $protocol_selector = shift; 576 my @language_list = @_; 577 578 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 579 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 580 { 581 my $full_file_name = Prepare(); 582 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 583 584 my @result = (); 585 for my $entry (@urls) 586 { 587 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 588 if ($protocol =~ /^$protocol_selector$/) 589 { 590 push @result, $name; 591 } 592 } 593 594 return @result; 595 } 596 else 597 { 598 # Bundling of dictionaires is disabled. 599 } 600 601 return (); 602} 603 604 6051; 606