1#!/usr/bin/perl 2 3#************************************************************** 4# 5# Licensed to the Apache Software Foundation (ASF) under one 6# or more contributor license agreements. See the NOTICE file 7# distributed with this work for additional information 8# regarding copyright ownership. The ASF licenses this file 9# to you under the Apache License, Version 2.0 (the 10# "License"); you may not use this file except in compliance 11# with the License. You may obtain a copy of the License at 12# 13# http://www.apache.org/licenses/LICENSE-2.0 14# 15# Unless required by applicable law or agreed to in writing, 16# software distributed under the License is distributed on an 17# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18# KIND, either express or implied. See the License for the 19# specific language governing permissions and limitations 20# under the License. 21# 22#************************************************************** 23 24=head1 NAME 25 26 download_external_libraries.pl - Load missing tarballs specified in main/external_libs.lst. 27 28=head1 SYNOPSIS 29 30 For downloading external libraries (typically from the main/bootstrap script): 31 32 download_external_libraries(<data-file-name>); 33 34=head1 DESCRIPTION 35 36 The contents of the main/external_libs.lst file are used to determine the 37 external library tarballs that are missing from ext_sources/. 38 39 Individual libraries can be ignored depending on the values of environment variables. 40 41 Format of the main/external_libs.lst file: 42 43 The file is line based. 44 Comments start with a # and go to the end of the line and are ignored. 45 Lines that are empty or contain only spaces and/or comments are ignored. 46 47 All other lines can have one of two forms: 48 - A variable definition of the form <name>=<value>. 49 - A conditional block start in the form "if (<expression>)" 50 51 Variables defined in a conditional block are only visible in this block and 52 replace the definition of global variables and variables earlier in the same 53 block. 54 Some variables have special names: 55 - MD5 is the expected MD5 checksum of the library tarball. 56 - SHA1 is the expected SHA1 checksum of the library tarball. 57 - URL1 to URL9 specify from where to download the tarball. The urls are tried in order. 58 The first successful download (download completed and checksum match) stops the iteration. 59 60 Expressions are explained below in the comment of EvaluateExpression(). 61 62 A library is only regarded if its conditional expression evaluates to 1. 63 64 Example: 65 66 DefaultSite=http://some-internet-site.org 67 if ( true ) 68 MD5 = 0123456789abcdef0123456789abcdef 69 name = library-1.0.tar.gz 70 URL1 = http://some-other-internet-site.org/another-name.tgz 71 URL2 = $(DefaultSite)$(MD5)-$(name) 72 73 This tries to load a library first from some-other-internet-site.org and if 74 that fails from some-internet-site.org. The library is stored as $(MD5)-$(name) 75 even when it is loaded as another-name.tgz. 76 77=cut 78 79 80use strict; 81 82use File::Spec; 83use File::Path; 84use File::Basename; 85use Digest::MD5; 86use Digest::SHA; 87use URI; 88my $simple = 1; 89if ($simple) 90{ 91 use LWP::Simple; 92} 93else 94{ 95 use LWP::UserAgent; 96} 97 98my $Debug = 1; 99 100my $LocalEnvironment = undef; 101my $GlobalEnvironment = {}; 102my @Missing = (); 103 104 105 106 107=head3 ProcessDataFile 108 109 Read the data file, typically named main/external_libs.lst, find the external 110 library tarballs that are not yet present in ext_sources/ and download them. 111 112=cut 113sub ProcessDataFile ($) 114{ 115 my $filename = shift; 116 117 my $destination = $ENV{'TARFILE_LOCATION'}; 118 119 die "can not open data file $filename" if ! -e $filename; 120 121 my $current_selector_value = 1; 122 my @URLHeads = (); 123 my @download_requests = (); 124 125 open my $in, $filename; 126 while (my $line = <$in>) 127 { 128 # Remove leading and trailing space and comments 129 $line =~ s/^\s+//; 130 $line =~ s/\s+$//; 131 $line =~ s/\s*#.*$//; 132 133 # Ignore empty lines. 134 next if $line eq ""; 135 136 # An "if" statement starts a new block. 137 if ($line =~ /^\s*if\s*\(\s*(.*?)\s*\)\s*$/) 138 { 139 ProcessLastBlock(); 140 141 $LocalEnvironment = { 'selector' => $1 }; 142 } 143 144 # Lines of the form name = value define a local variable. 145 elsif ($line =~ /^\s*(\S+)\s*=\s*(.*?)\s*$/) 146 { 147 if (defined $LocalEnvironment) 148 { 149 $LocalEnvironment->{$1} = $2; 150 } 151 else 152 { 153 $GlobalEnvironment->{$1} = $2; 154 } 155 } 156 else 157 { 158 die "can not parse line $line\n"; 159 } 160 } 161 162 ProcessLastBlock(); 163 164 Download(\@download_requests, \@URLHeads); 165} 166 167 168 169 170=head3 ProcessLastBlock 171 172 Process the last definition of an external library. 173 If there is not last block, true for the first "if" statement, then the call is ignored. 174 175=cut 176sub ProcessLastBlock () 177{ 178 # Return if no block is defined. 179 return if ! defined $LocalEnvironment; 180 181 # Ignore the block if the selector does not match. 182 if ( ! EvaluateExpression(SubstituteVariables($LocalEnvironment->{'selector'}))) 183 { 184 printf("ignoring %s because its prerequisites are not fulfilled\n", GetValue('name')); 185 } 186 else 187 { 188 my $name = GetValue('name'); 189 my $checksum = GetChecksum(); 190 191 if ( ! IsPresent($name, $checksum)) 192 { 193 AddDownloadRequest($name, $checksum); 194 } 195 } 196} 197 198 199 200 201=head3 AddDownloadRequest($name, $checksum) 202 203 Add a request for downloading the library $name to @Missing. 204 Collect all available URL[1-9] variables as source URLs. 205 206=cut 207sub AddDownloadRequest ($$) 208{ 209 my ($name, $checksum) = @_; 210 211 print "adding download request for $name\n"; 212 213 my $urls = []; 214 my $url = GetValue('URL'); 215 push @$urls, SubstituteVariables($url) if (defined $url); 216 for (my $i=1; $i<10; ++$i) 217 { 218 $url = GetValue('URL'.$i); 219 next if ! defined $url; 220 push @$urls, SubstituteVariables($url); 221 } 222 223 push @Missing, [$name, $checksum, $urls]; 224} 225 226 227 228 229=head3 GetChecksum() 230 231 When either MD5 or SHA1 are variables in the current scope then return 232 a reference to a hash with two entries: 233 'type' is either 'MD5' or 'SHA1', the type or algorithm of the checksum, 234 'value' is the actual checksum 235 Otherwise undef is returned. 236 237=cut 238sub GetChecksum() 239{ 240 my $checksum = GetValue("MD5"); 241 if (defined $checksum && $checksum ne "") 242 { 243 return { 'type' => 'MD5', 'value' => $checksum }; 244 } 245 elsif (defined ($checksum=GetValue("SHA1")) && $checksum ne "") 246 { 247 return { 'type' => 'SHA1', 'value' => $checksum }; 248 } 249 else 250 { 251 return undef; 252 } 253} 254 255 256 257 258=head3 GetValue($variable_name) 259 260 Return the value of the variable with name $variable_name from the local 261 environment or, if not defined there, the global environment. 262 263=cut 264sub GetValue ($) 265{ 266 my $variable_name = shift; 267 268 my $candidate = $LocalEnvironment->{$variable_name}; 269 return $candidate if defined $candidate; 270 271 return $GlobalEnvironment->{$variable_name}; 272} 273 274 275 276=head3 SubstituteVariables($text) 277 278 Replace all references to variables in $text with the respective variable values. 279 This is done repeatedly until no variable reference remains. 280 281=cut 282sub SubstituteVariables ($) 283{ 284 my $text = shift; 285 286 my $infinite_recursion_guard = 100; 287 while ($text =~ /^(.*?)\$\(([^)]+)\)(.*)$/) 288 { 289 my ($head,$name,$tail) = ($1,$2,$3); 290 my $value = GetValue($name); 291 die "can not evaluate variable $name" if ! defined $value; 292 $text = $head.$value.$tail; 293 294 die "(probably) detected an infinite recursion in variable definitions" if --$infinite_recursion_guard<=0; 295 } 296 297 return $text; 298} 299 300 301 302 303=head3 EvaluateExpression($expression) 304 305 Evaluate the $expression of an "if" statement to either 0 or 1. It can 306 be a single term (see EvaluateTerm for a description), or several terms 307 separated by either all ||s or &&s. A term can also be an expression 308 enclosed in parantheses. 309 310=cut 311sub EvaluateExpression ($) 312{ 313 my $expression = shift; 314 315 # Evaluate sub expressions enclosed in parantheses. 316 while ($expression =~ /^(.*)\(([^\(\)]+)\)(.*)$/) 317 { 318 $expression = $1 . (EvaluateExpression($2) ? " true " : " false ") . $3; 319 } 320 321 if ($expression =~ /&&/ && $expression =~ /\|\|/) 322 { 323 die "expression can contain either && or || but not both at the same time"; 324 } 325 elsif ($expression =~ /&&/) 326 { 327 foreach my $term (split (/\s*&&\s*/,$expression)) 328 { 329 return 0 if ! EvaluateTerm($term); 330 } 331 return 1; 332 } 333 elsif ($expression =~ /\|\|/) 334 { 335 foreach my $term (split (/\s*\|\|\s*/,$expression)) 336 { 337 return 1 if EvaluateTerm($term); 338 } 339 return 0; 340 } 341 else 342 { 343 return EvaluateTerm($expression); 344 } 345} 346 347 348 349 350=head3 EvaluateTerm($term) 351 352 Evaluate the $term to either 0 or 1. 353 A term is either the literal "true", which evaluates to 1, or an expression 354 of the form NAME=VALUE or NAME!=VALUE. NAME is the name of an environment 355 variable and VALUE any string. VALUE may be empty. 356 357=cut 358sub EvaluateTerm ($) 359{ 360 my $term = shift; 361 362 if ($term =~ /^\s*([a-zA-Z_0-9]+)\s*(==|!=)\s*(.*)\s*$/) 363 { 364 my ($variable_name, $operator, $given_value) = ($1,$2,$3); 365 my $variable_value = $ENV{$variable_name}; 366 $variable_value = "" if ! defined $variable_value; 367 368 if ($operator eq "==") 369 { 370 return $variable_value eq $given_value; 371 } 372 elsif ($operator eq "!=") 373 { 374 return $variable_value ne $given_value; 375 } 376 else 377 { 378 die "unknown operator in term $term"; 379 } 380 } 381 elsif ($term =~ /^\s*true\s*$/i) 382 { 383 return 1; 384 } 385 elsif ($term =~ /^\s*false\s*$/i) 386 { 387 return 0; 388 } 389 else 390 { 391 die "term $term is not of the form <environment-variable> (=|==) <value>"; 392 } 393} 394 395 396 397 398=head IsPresent($name, $given_checksum) 399 400 Check if an external library tar ball with the basename $name already 401 exists in the target directory TARFILE_LOCATION. The basename is 402 prefixed with the MD5 or SHA1 checksum. 403 If the file exists then its checksum is compared to the given one. 404 405=cut 406sub IsPresent ($$) 407{ 408 my ($name, $given_checksum) = @_; 409 410 my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $given_checksum->{'value'}."-".$name); 411 return 0 unless -f $filename; 412 413 # File exists. Check if its checksum is correct. 414 my $checksum; 415 if ( ! defined $given_checksum) 416 { 417 print "no checksum given, can not verify\n"; 418 return 1; 419 } 420 elsif ($given_checksum->{'type'} eq "MD5") 421 { 422 my $md5 = Digest::MD5->new(); 423 open my $in, $filename; 424 $md5->addfile($in); 425 $checksum = $md5->hexdigest(); 426 } 427 elsif ($given_checksum->{'type'} eq "SHA1") 428 { 429 my $sha1 = Digest::SHA->new("1"); 430 open my $in, $filename; 431 $sha1->addfile($in); 432 $checksum = $sha1->hexdigest(); 433 } 434 else 435 { 436 die "unsupported checksum type (not MD5 or SHA1)"; 437 } 438 439 if ($given_checksum->{'value'} ne $checksum) 440 { 441 # Checksum does not match. Delete the file. 442 print "$name exists, but checksum does not match => deleting\n"; 443 unlink($filename); 444 return 0; 445 } 446 else 447 { 448 printf("%s exists, %s checksum is OK\n", $name, $given_checksum->{'type'}); 449 return 1; 450 } 451} 452 453 454 455 456=head3 Download 457 458 Download a set of files specified by @Missing. 459 460 For http URLs there may be an optional checksum. If it is present then downloaded 461 files that do not match that checksum lead to abortion of the current process. 462 Files that have already been downloaded are not downloaded again. 463 464=cut 465sub Download () 466{ 467 my $download_path = $ENV{'TARFILE_LOCATION'}; 468 469 if (scalar @Missing > 0) 470 { 471 printf("downloading %d missing tar ball%s to %s\n", 472 scalar @Missing, scalar @Missing>0 ? "s" : "", 473 $download_path); 474 } 475 else 476 { 477 print "all external libraries present\n"; 478 return; 479 } 480 481 # Download the missing files. 482 for my $item (@Missing) 483 { 484 my ($name, $checksum, $urls) = @$item; 485 486 foreach my $url (@$urls) 487 { 488 last if DownloadFile( 489 defined $checksum 490 ? $checksum->{'value'}."-".$name 491 : $name, 492 $url, 493 $checksum); 494 } 495 } 496} 497 498 499 500 501=head3 DownloadFile($name,$URL,$checksum) 502 503 Download a single external library tarball. It origin is given by $URL. 504 Its destination is $(TARFILE_LOCATION)/$checksum-$name. 505 506=cut 507sub DownloadFile ($$$) 508{ 509 my $name = shift; 510 my $URL = shift; 511 my $checksum = shift; 512 513 my $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $name); 514 515 my $temporary_filename = $filename . ".part"; 516 517 print "downloading to $temporary_filename\n"; 518 my $out; 519 open $out, ">$temporary_filename"; 520 binmode($out); 521 522 # Prepare checksum 523 my $digest; 524 if (defined $checksum && $checksum->{'type'} eq "SHA1") 525 { 526 # Use SHA1 only when explicitly requested (by the presence of a "SHA1=..." line.) 527 $digest = Digest::SHA->new("1"); 528 } 529 elsif ( ! defined $checksum || $checksum->{'type'} eq "MD5") 530 { 531 # Use MD5 when explicitly requested or when no checksum type is given. 532 $digest = Digest::MD5->new(); 533 } 534 else 535 { 536 die "checksum type ".$checksum->{'type'}." is not supported"; 537 } 538 539 # Download the extension. 540 my $success = 0; 541 if ($simple) 542 { 543 my $content = LWP::Simple::get($URL); 544 $success = defined $content; 545 if ($success) 546 { 547 open $out, ">$temporary_filename"; 548 binmode($out); 549 print $out $content; 550 close($out); 551 $digest->add($content); 552 } 553 else 554 { 555 print "download from $URL failed\n"; 556 } 557 } 558 else 559 { 560 my $agent = LWP::UserAgent->new(); 561 $agent->timeout(120); 562 $agent->env_proxy; 563 $agent->show_progress(1); 564 my $last_was_redirect = 0; 565 $agent->add_handler('response_redirect' 566 => sub{ 567 $last_was_redirect = 1; 568 return; 569 }); 570 $agent->add_handler('response_data' 571 => sub{ 572 if ($last_was_redirect) 573 { 574 $last_was_redirect = 0; 575 # Throw away the data we got so far. 576 $digest->reset(); 577 close $out; 578 open $out, ">$temporary_filename"; 579 binmode($out); 580 } 581 my($response,$agent,$h,$data)=@_; 582 print $out $data; 583 $digest->add($data); 584 }); 585 586 $success = $agent->get($URL)->is_success(); 587 close $out; 588 } 589 590 # When download was successful then check the checksum and rename the .part file 591 # into the actual extension name. 592 if ($success) 593 { 594 my $file_checksum = $digest->hexdigest(); 595 if (defined $checksum) 596 { 597 if ($checksum->{'value'} eq $file_checksum) 598 { 599 printf("%s checksum is OK\n", $checksum->{'type'}); 600 } 601 else 602 { 603 unlink($temporary_filename); 604 printf(" %s checksum does not match (%s instead of %s)\n", 605 $checksum->{'type'}, 606 $file_checksum, 607 $checksum->{'value'}); 608 return 0; 609 } 610 } 611 else 612 { 613 # The datafile does not contain a checksum to match against. 614 # Display the one that was calculated for the downloaded file so that 615 # it can be integrated manually into the data file. 616 printf("checksum not given, md5 of file is %s\n", $file_checksum); 617 $filename = File::Spec->catfile($ENV{'TARFILE_LOCATION'}, $file_checksum . "-" . $name); 618 } 619 620 rename($temporary_filename, $filename) || die "can not rename $temporary_filename to $filename"; 621 return 1; 622 } 623 else 624 { 625 unlink($temporary_filename); 626 print " download failed\n"; 627 return 0; 628 } 629} 630 631 632 633 634=head3 CheckDownloadDestination () 635 636 Make sure that the download destination $TARFILE_LOCATION does exist. If 637 not, then the directory is created. 638 639=cut 640sub CheckDownloadDestination () 641{ 642 my $destination = $ENV{'TARFILE_LOCATION'}; 643 die "ERROR: no destination defined! please set TARFILE_LOCATION!" if ($destination eq ""); 644 645 if ( ! -d $destination) 646 { 647 File::Path::make_path($destination); 648 die "ERROR: can't create \$TARFILE_LOCATION" if ! -d $destination; 649 } 650} 651 652 653 654 655=head3 ProvideSpecialTarball ($url,$name,$name_converter) 656 657 A few tarballs need special handling. That is done here. 658 659=cut 660sub ProvideSpecialTarball ($$$) 661{ 662 my $url = shift; 663 my $name = shift; 664 my $name_converter = shift; 665 666 return unless defined $url && $url ne ""; 667 668 # See if we can find the executable. 669 my ($SOLARENV,$OUTPATH,$EXEEXT) = ($ENV{'SOLARENV'},$ENV{'OUTPATH'},$ENV{'EXEEXT'}); 670 $SOLARENV = "" unless defined $SOLARENV; 671 $OUTPATH = "" unless defined $OUTPATH; 672 $EXEEXT = "" unless defined $EXEEXT; 673 if (-x File::Spec->catfile($SOLARENV, $OUTPATH, "bin", $name.$EXEEXT)) 674 { 675 print "found $name executable\n"; 676 return; 677 } 678 679 # Download the source from the URL. 680 my $basename = basename(URI->new($url)->path()); 681 die unless defined $basename; 682 683 if (defined $name_converter) 684 { 685 $basename = &{$name_converter}($basename); 686 } 687 688 # Has the source tar ball already been downloaded? 689 my @candidates = glob(File::Spec->catfile($ENV{'TARFILE_LOCATION'}, "*-" . $basename)); 690 if (scalar @candidates > 0) 691 { 692 # Yes. 693 print "$basename exists\n"; 694 return; 695 } 696 else 697 { 698 # No, download it. 699 print "downloading $basename\n"; 700 DownloadFile($basename, $url, undef); 701 } 702} 703 704 705 706 707 708# The main() functionality. 709 710die "usage: $0 <data-file-name>" if scalar @ARGV != 1; 711my $data_file = $ARGV[0]; 712CheckDownloadDestination(); 713ProcessDataFile($data_file); 714ProvideSpecialTarball($ENV{'DMAKE_URL'}, "dmake", undef); 715ProvideSpecialTarball( 716 $ENV{'EPM_URL'}, 717 "epm", 718 sub{$_[0]=~s/-source//; return $_[0]}); 719