1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70# Set to 1 to get a more verbose output, the default is 0. 71my $Debug = 0; 72 73 74=head3 Prepare 75 Check that some environment variables are properly set and then return the file name 76 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 77=cut 78sub Prepare () 79{ 80 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 81 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 82 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 83 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 84 85 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 86 die "can not read file $candidate" if ! -r $candidate; 87 return $candidate; 88} 89 90 91 92=head 3 EvaluateOperator 93 Evaluate a single test statement like 'language = en.*'. 94 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 95 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 96 97 Other operators are at the moment only supported in the way that they are evaluated via eval(). 98=cut 99sub EvaluateOperator ($$$) 100{ 101 my ($left,$operator,$right) = @_; 102 103 my $result; 104 105 if ($operator =~ /^(=|==|eq)$/) 106 { 107 if ($left =~ /^$right$/) 108 { 109 $result = 1; 110 } 111 else 112 { 113 $result = 0; 114 } 115 } 116 elsif (eval($left.$operator.$right)) 117 { 118 $result = 1; 119 } 120 else 121 { 122 $result = 0; 123 } 124 125 return $result; 126} 127 128 129 130 131=head EvaluateTerm 132 Evaluate a string that contains a simple test term of the form 133 left operator right 134 with arbitrary spacing allowed around and between the three parts. 135 136 The left hand side is specially handled: 137 138 - When the left hand side is 'language' then it is replaced by 139 any of the given languages in turn. When the term evaluates to true for any of the languages then 140 true is returned. False is returned only when none of the given languages matches. 141 142 - When the left hand side consists only of upper case letters, digits, and '_' then it is 143 interpreted as the name of a environment variable. It is replaced by its value before the term 144 is evaluated. 145 146 - Any other left hand side is an error (at the moment.) 147=cut 148sub EvaluateTerm ($$) 149{ 150 my $term = shift; 151 my $languages = shift; 152 153 my $result; 154 155 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 156 { 157 my ($left,$operator,$right) = ($1,$2,$3); 158 159 if ($operator !~ /^=|==|eq$/) 160 { 161 die "unsupported operator $operator on line $LineNo"; 162 } 163 164 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 165 166 if ($left =~ /^[A-Z_0-9]+$/) 167 { 168 # Uppercase words are interpreted as environment variables. 169 my $left_value = $ENV{$left}; 170 $left_value = "" if ! defined $left_value; 171 172 # We can check whether the condition is fulfilled right now. 173 $result = EvaluateOperator($left_value, $operator, $right); 174 } 175 elsif ($left eq "language") 176 { 177 if ($right eq "all") 178 { 179 $result = 1; 180 } 181 elsif ($#$languages>=0) 182 { 183 $result = 0; 184 for my $language (@$languages) 185 { 186 # Unify naming schemes. 187 $language =~ s/_/-/g; 188 $right =~ s/_/-/g; 189 190 # Evaluate language regexp. 191 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 192 last if $result; 193 } 194 } 195 else 196 { 197 # The set of languages is not yet known. Return true 198 # to include the following entries. 199 $result = 1; 200 } 201 } 202 elsif ($left eq "platform") 203 { 204 if ($right eq "all") 205 { 206 $result = 1; 207 } 208 else 209 { 210 # Evaluate platform regexp. 211 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 212 } 213 } 214 else 215 { 216 die "can not handle left hand side $left on line $LineNo"; 217 } 218 } 219 else 220 { 221 die "syntax error in expression on line $LineNo"; 222 } 223 224 return $result; 225} 226 227 228 229 230=head3 EvaluateSelector 231 Evaluate the given expression that is expected to be list of terms of the form 232 left-hand-side operator right-hand-side 233 that are separated by logical operators 234 && || 235 The expression is lazy evaluated left to right. 236=cut 237sub EvaluateSelector($$); 238sub EvaluateSelector($$) 239{ 240 my $expression = shift; 241 my $languages = shift; 242 243 my $result = ""; 244 245 if ($expression =~ /^\s*$/) 246 { 247 # Empty selector is always true. 248 return 1; 249 } 250 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 251 { 252 my ($term, $operator) = ($1,$2); 253 $expression = $3; 254 255 my $left_result = EvaluateTerm($term, $languages); 256 # Lazy evaluation of && 257 return 0 if ($operator eq "&&" && !$left_result); 258 # Lazy evaluation of || 259 return 1 if ($operator eq "||" && $left_result); 260 my $right_result = EvaluateSelector($expression, $languages); 261 262 if ($operator eq "&&") 263 { 264 return $left_result && $right_result; 265 } 266 else 267 { 268 return $left_result || $right_result; 269 } 270 } 271 elsif ($expression =~ /^\s*(.+?)\s*$/) 272 { 273 return EvaluateTerm($1, $languages); 274 } 275 else 276 { 277 die "invalid expression syntax on line $LineNo ($expression)"; 278 } 279} 280 281 282 283 284=head3 ProcessURL 285 Check that the given line contains an optional MD5 sum followed by 286 a URL for one of the protocols file, http, https, 287 followed by an optional file name (which is necessary when it is not the last part of the URL.) 288 Return an array that contains the protocol, the name, the original 289 URL, and the MD5 sum from the beginning of the line. 290 The name of the URL depends on its protocol: 291 - for http(s) the part of the URL after the last '/'. 292 - for file URLS it is everything after the protocol:// 293=cut 294sub ProcessURL ($) 295{ 296 my $line = shift; 297 298 # Check that we are looking at a valid URL. 299 if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/) 300 { 301 my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6); 302 my $URL = $3.$4.$5; 303 304 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 305 306 # Determine the name. If an optional name is given then use that. 307 if (defined $optional_name) 308 { 309 die if $optional_name !~ /^\s+\"([^\"]+)\"$/; 310 $name = $1; 311 } 312 else 313 { 314 if ($protocol eq "file") 315 { 316 # For file URLs we use everything after :// as name, or the . 317 $URL =~ /:\/\/(.*)$/; 318 $name = $1; 319 } 320 else 321 { 322 # For http and https use the last part of the URL. 323 $name = $url_name; 324 } 325 } 326 327 return [$protocol, $name, $URL, $md5]; 328 } 329 else 330 { 331 die "invalid URL at line $LineNo:\n$line\n"; 332 } 333} 334 335 336 337 338=head3 ParseExtensionsLst 339 Parse the extensions.lst file. 340 341 Lines that contain only spaces or comments or are empty are 342 ignored. 343 344 Lines that contain a selector, ie a test enclosed in brackets, are 345 evaluated. The following lines, until the next selector, are 346 ignored when the selector evaluates to false. When an empty list 347 of languages is given then any 'language=...' test is evaluated as 348 true. 349 350 All other lines are expected to contain a URL optionally preceded 351 by an MD5 sum. 352=cut 353sub ParseExtensionsLst ($$) 354{ 355 my $file_name = shift; 356 my $languages = shift; 357 358 open my $in, "$file_name"; 359 360 my $current_selector_value = 1; 361 my @URLs = (); 362 363 while (<$in>) 364 { 365 my $line = $_; 366 $line =~ s/[\r\n]+//g; 367 ++$LineNo; 368 369 # Strip away comments. 370 next if $line =~ /^\s*#/; 371 372 # Ignore empty lines. 373 next if $line =~ /^\s*$/; 374 375 # Process selectors 376 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 377 { 378 $current_selector_value = EvaluateSelector($1, $languages); 379 } 380 else 381 { 382 if ($current_selector_value) 383 { 384 push @URLs, ProcessURL($line); 385 } 386 } 387 } 388 389 close $in; 390 391 return @URLs; 392} 393 394 395 396 397=head3 Download 398 Download a set of files that are specified via URLs. 399 400 File URLs are ignored here because they point to extensions that have not yet been built. 401 402 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 403 files that do not match that checksum are an error and lead to abortion of the current process. 404 Files that have already been downloaded are not downloaded again. 405=cut 406sub Download (@) 407{ 408 my @urls = @_; 409 410 my @missing = (); 411 my $download_path = $ENV{'TARFILE_LOCATION'}; 412 413 # First check which (if any) extensions have already been downloaded. 414 for my $entry (@urls) 415 { 416 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 417 418 # We can not check the existence of file URLs because they point to extensions that 419 # have yet to be built. 420 421 next if $protocol ne "http"; 422 my $candidate = File::Spec->catfile($download_path, $name); 423 if ( ! -f $candidate) 424 { 425 push @missing, $entry; 426 } 427 elsif (defined $md5sum) 428 { 429 # Check that the MD5 sum is still correct. 430 # The datafile may have been updated with a new version of the extension that 431 # still has the same name but a different MD5 sum. 432 my $cur_oxt; 433 if ( ! open($cur_oxt, $candidate)) 434 { 435 # Can not read the extension. Download extension again. 436 push @missing, $entry; 437 unlink($candidate); 438 } 439 binmode($cur_oxt); 440 my $file_md5 = Digest::MD5->new->addfile(*$cur_oxt)->hexdigest; 441 close($cur_oxt); 442 if ($md5sum ne $file_md5) 443 { 444 # MD5 does not match. Download extension again. 445 print "extension $name has wrong MD5 and will be updated\n"; 446 push @missing, $entry; 447 unlink($candidate); 448 } 449 } 450 } 451 if ($#missing >= 0) 452 { 453 printf "downloading/updating %d extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 454 if ( ! -d $download_path) 455 { 456 mkdir File::Spec->catpath($download_path, "tmp") 457 || die "can not create tmp subdirectory of $download_path"; 458 } 459 } 460 else 461 { 462 print "all downloadable extensions present\n"; 463 return; 464 } 465 466 # Download the missing files. 467 for my $entry (@missing) 468 { 469 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 470 471 system( 472 $ENV{'JAVAINTERPRETER'}, 473 "-cp", 474 File::Spec->catfile( 475 File::Spec->catfile($ENV{'SOLARENV'}, $ENV{'INPATH'}), 476 "class"), 477 "AOOJavaDownloader", 478 $name, 479 $URL, 480 'MD5', 481 $md5sum); 482 483 if ($? != 0) 484 { 485 die "failed to download $URL"; 486 } 487 } 488} 489 490 491 492 493=head3 DownloadExtensions 494 This function is intended to be called during bootstrapping. It extracts the set of extensions 495 that will be used later, when the installation sets are built. 496 The set of languages is taken from the WITH_LANG environment variable. 497=cut 498sub DownloadExtensions () 499{ 500 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 501 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 502 { 503 my $full_file_name = Prepare(); 504 my $languages = [ "en_US" ]; 505 if (defined $ENV{'WITH_LANG'}) 506 { 507 @$languages = split(/\s+/, $ENV{'WITH_LANG'}); 508 foreach my $l (@$languages) 509 { 510 print "$l\n"; 511 } 512 } 513 my @urls = ParseExtensionsLst($full_file_name, $languages); 514 Download(@urls); 515 } 516 else 517 { 518 print "bundling of dictionaries is disabled.\n"; 519 } 520} 521 522 523 524 525=head3 GetExtensionList 526 This function is intended to be called when installation sets are built. 527 It expects two arguments: 528 - A protocol selector. Http URLs reference remotely located 529 extensions that will be bundled as-is into the installation 530 sets due to legal reasons. They are installed on first start 531 of the office. 532 File URLs reference extensions whose source code is part of 533 the repository. They are pre-registered when installation 534 sets are created. Their installation is finished when the 535 office is first started. 536 - A set of languages. This set determines which extensions 537 are returned and then included in an installation set. 538=cut 539sub GetExtensionList ($@) 540{ 541 my $protocol_selector = shift; 542 my @language_list = @_; 543 544 if (defined $ENV{'ENABLE_BUNDLED_DICTIONARIES'} 545 && $ENV{'ENABLE_BUNDLED_DICTIONARIES'} eq "YES") 546 { 547 my $full_file_name = Prepare(); 548 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 549 550 my @result = (); 551 for my $entry (@urls) 552 { 553 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 554 if ($protocol =~ /^$protocol_selector$/) 555 { 556 push @result, $name; 557 } 558 } 559 560 return @result; 561 } 562 else 563 { 564 # Bundling of dictionaires is disabled. 565 } 566 567 return (); 568} 569 570 5711; 572