1#************************************************************** 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20#************************************************************** 21 22package ExtensionsLst; 23 24use File::Spec; 25use LWP::UserAgent; 26use Digest::MD5; 27 28use base 'Exporter'; 29our @EXPORT = qw(DownloadExtensions GetExtensionList); 30 31 32=head1 NAME 33 34 ExtensionLst.pm - Functionality for the interpretation of the main/extensions.lst file. 35 36=head1 SYNOPSIS 37 38 For downloading extensions during build setup: 39 40 use ExtensionsLst; 41 ExtensionsLst::DownloadExtensions(); 42 43 For including extensions into the pack set: 44 45 use ExtensionsLst; 46 ExtensionsLst::GetExtensionList(@language_list); 47 48=head1 DESCRIPTION 49 50 The contents of the extensions.lst file are used at two times in 51 the process of building pack sets. 52 53 Once at the beginning right after configure is run the 54 DownloadExtensions() function determines the list of extensions 55 that are not present locally and downloads them. 56 57 The second time is after all modules are built (and the locally 58 built extensions are present) and the pack sets are created. For 59 every language (or sets of lanugages) a set of extensions is 60 collected and included into the pack set. 61 62 The content of the extensions.lst file is ignored when the --with-extensions option is given to configure. 63 64=cut 65 66 67# Number of the line in extensions.lst that is currently being processed. 68my $LineNo = 0; 69 70 71=head3 Prepare 72 Check that some environment variables are properly set and then return the file name 73 of the 'extensions.lst' file, typically located in main/ beside 'ooo.lst'. 74=cut 75sub Prepare () 76{ 77 die "can not access environment varianle SRC_ROOT" if ! defined $ENV{'SRC_ROOT'}; 78 die "can not determine the platform: INPATH is not set" if ! defined $ENV{'INPATH'}; 79 die "can not determine solver directory: OUTDIR is not set" if ! defined $ENV{'OUTDIR'}; 80 die "can not determine download directory: TARFILE_LOCATION is not set" if ! defined $ENV{'TARFILE_LOCATION'}; 81 82 my $candidate = File::Spec->catfile($ENV{SRC_ROOT}, "extensions.lst"); 83 die "can not read file $candidate" if ! -r $candidate; 84 return $candidate; 85} 86 87 88 89=head 3 EvaluateOperator 90 Evaluate a single test statement like 'language = en.*'. 91 Special handling for operators '=', '==', and 'eq' which are all mapped to '=~'. 92 Therefore the right hand side may be a perl regexp. It is prefixed with '^'. 93 94 Other operators are at the moment only supported in the way that they are evaluated via eval(). 95=cut 96sub EvaluateOperator ($$$) 97{ 98 my ($left,$operator,$right) = @_; 99 100 my $result; 101 102 if ($operator =~ /^(=|==|eq)$/) 103 { 104 if ($left =~ /^$right$/) 105 { 106 $result = 1; 107 } 108 else 109 { 110 $result = 0; 111 } 112 } 113 elsif (eval($left.$operator.$right)) 114 { 115 $result = 1; 116 } 117 else 118 { 119 $result = 0; 120 } 121 122 return $result; 123} 124 125 126 127 128=head EvaluateTerm 129 Evaluate a string that contains a simple test term of the form 130 left operator right 131 with arbitrary spacing allowed around and between the three parts. 132 133 The left hand side is specially handled: 134 135 - When the left hand side is 'language' then it is replaced by 136 any of the given languages in turn. When the term evaluates to true for any of the languages then 137 true is returned. False is returned only when none of the given languages matches. 138 139 - When the left hand side consists only of upper case letters, digits, and '_' then it is 140 interpreted as the name of a environment variable. It is replaced by its value before the term 141 is evaluated. 142 143 - Any other left hand side is an error (at the moment.) 144=cut 145sub EvaluateTerm ($$) 146{ 147 my $term = shift; 148 my $languages = shift; 149 150 my $result; 151 152 if ($term =~ /^\s*(\w+)\s*(\W+)\s*(.*?)\s*$/) 153 { 154 my ($left,$operator,$right) = ($1,$2,$3); 155 156 if ($operator !~ /^=|==|eq$/) 157 { 158 die "unsupported operator $operator on line $LineNo"; 159 } 160 161 die "no right side in condition on line $LineNo ($term)" if ! defined $right; 162 163 if ($left =~ /^[A-Z_0-9]+$/) 164 { 165 # Uppercase words are interpreted as environment variables. 166 my $left_value = $ENV{$left}; 167 $left_value = "" if ! defined $left_value; 168 169 # We can check whether the condition is fullfilled right now. 170 $result = EvaluateOperator($left_value, $operator, $right); 171 } 172 elsif ($left eq "language") 173 { 174 if ($right eq "all") 175 { 176 $result = 1; 177 } 178 elsif ($#$languages>=0) 179 { 180 $result = 0; 181 for my $language (@$languages) 182 { 183 # Unify naming schemes. 184 $language =~ s/_/-/g; 185 $right =~ s/_/-/g; 186 187 # Evaluate language regexp. 188 $result = EvaluateOperator($language, $operator, $right) ? 1 : 0; 189 last if $result; 190 } 191 } 192 else 193 { 194 # The set of languages is not yet known. Return true 195 # to include the following entries. 196 $result = 1; 197 } 198 } 199 elsif ($left eq "platform") 200 { 201 if ($right eq "all") 202 { 203 $result = 1; 204 } 205 else 206 { 207 # Evaluate platform regexp. 208 $result = EvaluateOperator($ENV{'INPATH'}, $operator, $right) ? 1 : 0; 209 } 210 } 211 else 212 { 213 die "can not handle left hand side $left on line $LineNo"; 214 } 215 } 216 else 217 { 218 die "syntax error in expression on line $LineNo"; 219 } 220 221 return $result; 222} 223 224 225 226 227=head3 EvaluateSelector 228 Evaluate the given expression that is expected to be list of terms of the form 229 left-hand-side operator right-hand-side 230 that are separated by logical operators 231 && || 232 The expression is lazy evaluated left to right. 233=cut 234sub EvaluateSelector($$); 235sub EvaluateSelector($$) 236{ 237 my $expression = shift; 238 my $languages = shift; 239 240 my $result = ""; 241 242 if ($expression =~ /^\s*$/) 243 { 244 # Empty selector is always true. 245 return 1; 246 } 247 elsif ($expression =~ /^\s*(.*?)(&&|\|\|)\s*(.*)$/) 248 { 249 my ($term, $operator) = ($1,$2); 250 $expression = $3; 251 252 my $left_result = EvaluateTerm($term, $languages); 253 # Lazy evaluation of && 254 return 0 if ($operator eq "&&" && !$left_result); 255 # Lazy evaluation of || 256 return 1 if ($operator eq "||" && $left_result); 257 my $right_result = EvaluateSelector($expression, $languages); 258 259 if ($operator eq "&&") 260 { 261 return $left_result && $right_result; 262 } 263 else 264 { 265 return $left_result || $right_result; 266 } 267 } 268 elsif ($expression =~ /^\s*(.+?)\s$/) 269 { 270 return EvaluateTerm($1, $languages); 271 } 272 else 273 { 274 die "invalid expression syntax on line $LineNo ($expression)"; 275 } 276} 277 278 279 280 281=head3 ProcessURL 282 Check that the given line contains an optional MD5 sum followed by 283 a URL for one of the protocols file, http, https, 284 followed by an optional file name (which is necessary when it is not the last part of the URL.) 285 Return an array that contains the protocol, the name, the original 286 URL, and the MD5 sum from the beginning of the line. 287 The name of the URL depends on its protocol: 288 - for http(s) the part of the URL after the last '/'. 289 - for file URLS it is everything after the protocol:// 290=cut 291sub ProcessURL ($) 292{ 293 my $line = shift; 294 295 # Check that we are looking at a valid URL. 296 if ($line =~ /^\s*((\w{32})\s+)?([a-zA-Z]+)(:\/\/.*?\/)([^\/ \t]+)(\s+\"[^\"]+\")?\s*$/) 297 { 298 my ($md5, $protocol, $url_name, $optional_name) = ($2,$3,$5,$6); 299 my $URL = $3.$4.$5; 300 301 die "invalid URL protocol on line $LineNo:\n$line\n" if $protocol !~ /(file|http|https)/; 302 303 # Determine the name. If an optional name is given then use that. 304 if (defined $optional_name) 305 { 306 die if $optional_name !~ /^\s+\"([^\"]+)\"$/; 307 $name = $1; 308 } 309 else 310 { 311 if ($protocol eq "file") 312 { 313 # For file URLs we use everything after :// as name, or the . 314 $URL =~ /:\/\/(.*)$/; 315 $name = $1; 316 } 317 else 318 { 319 # For http and https use the last part of the URL. 320 $name = $url_name; 321 } 322 } 323 324 return [$protocol, $name, $URL, $md5]; 325 } 326 else 327 { 328 die "invalid URL at line $LineNo:\n$line\n"; 329 } 330} 331 332 333 334 335=head3 ParseExtensionsLst 336 Parse the extensions.lst file. 337 338 Lines that contain only spaces or comments or are empty are 339 ignored. 340 341 Lines that contain a selector, ie a test enclosed in brackets, are 342 evaluated. The following lines, until the next selector, are 343 ignored when the selector evaluates to false. When an empty list 344 of languages is given then any 'language=...' test is evaluated as 345 true. 346 347 All other lines are expected to contain a URL optionally preceded 348 by an MD5 sum. 349=cut 350sub ParseExtensionsLst ($$) 351{ 352 my $file_name = shift; 353 my $languages = shift; 354 355 open my $in, "$file_name"; 356 357 my $current_selector_value = 1; 358 my @URLs = (); 359 360 while (<$in>) 361 { 362 my $line = $_; 363 $line =~ s/[\r\n]+//g; 364 ++$LineNo; 365 366 # Strip away comments. 367 next if $line =~ /^\s*#/; 368 369 # Ignore empty lines. 370 next if $line =~ /^\s*$/; 371 372 # Process selectors 373 if ($line =~ /^\s*\[\s*(.*)\s*\]\s*$/) 374 { 375 $current_selector_value = EvaluateSelector($1, $languages); 376 } 377 else 378 { 379 if ($current_selector_value) 380 { 381 push @URLs, ProcessURL($line); 382 } 383 } 384 } 385 386 close $in; 387 388 return @URLs; 389} 390 391 392 393 394=head3 Download 395 Download a set of files that are specified via URLs. 396 397 File URLs are ignored here because they point to extensions that have not yet been built. 398 399 For http URLs there may be an optional MD5 checksum. If it is present then downloaded 400 files that do not match that checksum are an error and lead to abortion of the current process. 401 Files that have already been downloaded are not downloaded again. 402=cut 403sub Download (@) 404{ 405 my @urls = @_; 406 407 my @missing = (); 408 my $download_path = $ENV{'TARFILE_LOCATION'}; 409 410 # First check which (if any) extensions have already been downloaded. 411 for my $entry (@urls) 412 { 413 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 414 415 # We can not check the existence of file URLs because they point to extensions that 416 # have yet to be built. 417 418 next if $protocol ne "http"; 419 my $candidate = File::Spec->catfile($download_path, $name); 420 if ( ! -f $candidate) 421 { 422 push @missing, $entry; 423 } 424 } 425 if ($#missing >= 0) 426 { 427 printf "downloading %d missing extension%s\n", $#missing+1, $#missing>0 ? "s" : ""; 428 if ( ! -d $download_path) 429 { 430 mkdir File::Spec->catpath($download_path, "tmp") 431 || die "can not create tmp subdirectory of $download_path"; 432 } 433 } 434 else 435 { 436 print "all downloadable extensions present\n"; 437 return; 438 } 439 440 # Download the missing files. 441 for my $entry (@missing) 442 { 443 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 444 445 # Open a .part file for writing. 446 my $filename = File::Spec->catfile($download_path, $name); 447 my $temporary_filename = $filename . ".part"; 448 print "downloading to $temporary_filename\n"; 449 open my $out, ">$temporary_filename"; 450 binmode($out); 451 452 # Prepare md5 453 my $md5 = Digest::MD5->new(); 454 455 # Download the extension. 456 my $agent = LWP::UserAgent->new(); 457 $agent->timeout(10); 458 $agent->show_progress(1); 459 my $last_was_redirect = 0; 460 $agent->add_handler('response_redirect' 461 => sub{ 462 $last_was_redirect = 1; 463 return; 464 }); 465 $agent->add_handler('response_data' 466 => sub{ 467 if ($last_was_redirect) 468 { 469 $last_was_redirect = 0; 470 # Throw away the data we got so far. 471 $md5->reset(); 472 close $out; 473 open $out, ">$temporary_filename"; 474 binmode($out); 475 } 476 my($response,$agent,$h,$data)=@_; 477 print $out $data; 478 $md5->add($data); 479 }); 480 my $response = $agent->get($URL); 481 close $out; 482 483 # When download was successfull then check the md5 checksum and rename the .part file 484 # into the actual extension name. 485 if ($response->is_success()) 486 { 487 if (defined $md5sum && length($md5sum)==32) 488 { 489 my $file_md5 = $md5->hexdigest(); 490 if ($md5sum eq $file_md5) 491 { 492 print "md5 is OK\n"; 493 } 494 else 495 { 496 unlink($temporary_filename); 497 die "downloaded file has the wrong md5 checksum: $file_md5 instead of $md5sum"; 498 } 499 } 500 else 501 { 502 print "md5 is not present\n"; 503 printf " is %s, length is %d\n", $md5sum, length(md5sum); 504 } 505 506 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 507 } 508 else 509 { 510 die "failed to download $URL"; 511 } 512 } 513} 514 515 516 517 518=head3 DownloadExtensions 519 This function is intended to be called during bootstrapping. It extracts the set of extensions 520 that will be used later, when the installation sets are built. 521=cut 522sub DownloadExtensions () 523{ 524 my $full_file_name = Prepare(); 525 my @urls = ParseExtensionsLst($full_file_name, []); 526 Download(@urls); 527} 528 529 530 531 532=head3 GetExtensionList 533 This function is intended to be called when installation sets are built. 534 It expects two arguments: 535 - A protocol selector. Http URLs reference remotely located 536 extensions that will be bundled as-is into the installation 537 sets due to legal reasons. They are installed on first start 538 of the office. 539 File URLs reference extensions whose source code is part of 540 the repository. They are pre-registered when installation 541 sets are created. Their installation is finished when the 542 office is first started. 543 - A set of languages. This set determines which extensions 544 are returned and then included in an installation set. 545=cut 546sub GetExtensionList ($@) 547{ 548 my $protocol_selector = shift; 549 my @language_list = @_; 550 551 my $full_file_name = Prepare(); 552 my @urls = ParseExtensionsLst($full_file_name, \@language_list); 553 554 my @result = (); 555 for my $entry (@urls) 556 { 557 my ($protocol, $name, $URL, $md5sum) = @{$entry}; 558 if ($protocol =~ /^$protocol_selector$/) 559 { 560 push @result, $name; 561 } 562 } 563 564 return @result; 565} 566 567 5681; 569