1--- misc/mythes-1.2.0.orig/th_gen_idx.pl 2010-02-27 12:52:58.000000000 -0300 2+++ misc/build/mythes-1.2.0/th_gen_idx.pl 2012-01-12 04:13:15.149371123 -0300 3@@ -1,11 +1,26 @@ 4-#!/usr/bin/perl 5- 6-# perl program to take a thesaurus structured text data file 7-# and create the proper sorted index file (.idx) 8+: 9+eval 'exec perl -wS $0 ${1+"$@"}' 10+ if 0; 11+#************************************************************** 12+# 13+# Licensed to the Apache Software Foundation (ASF) under one 14+# or more contributor license agreements. See the NOTICE file 15+# distributed with this work for additional information 16+# regarding copyright ownership. The ASF licenses this file 17+# to you under the Apache License, Version 2.0 (the 18+# "License"); you may not use this file except in compliance 19+# with the License. You may obtain a copy of the License at 20+# 21+# http://www.apache.org/licenses/LICENSE-2.0 22 # 23-# typcially invoked as follows: 24-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx 25+# Unless required by applicable law or agreed to in writing, 26+# software distributed under the License is distributed on an 27+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 28+# KIND, either express or implied. See the License for the 29+# specific language governing permissions and limitations 30+# under the License. 31 # 32+#************************************************************** 33 34 sub by_entry { 35 my ($aent, $aoff) = split('\|',$a); 36@@ -13,6 +28,27 @@ sub by_entry { 37 $aent cmp $bent; 38 } 39 40+#FIXME: someone may want "infile" or even parameter parsing 41+sub get_outfile { 42+ my $next_is_file = 0; 43+ foreach ( @ARGV ) { 44+ if ( $next_is_file ) { 45+ return $_ 46+ } 47+ if ( $_ eq "-o" ) { 48+ $next_is_file = 1; 49+ } 50+ } 51+ return ""; 52+} 53+ 54+sub usage { 55+ print "usage:\n"; 56+ print "$0 -o outfile < input\n"; 57+ 58+ exit 99; 59+} 60+ 61 # main routine 62 my $ne = 0; # number of entries in index 63 my @tindex=(); # the index itself 64@@ -24,6 +60,10 @@ my $nm=0; # number of meaning fo 65 my $meaning=""; # current meaning and synonyms 66 my $p; # misc uses 67 my $encoding; # encoding used by text file 68+my $outfile = ""; 69+ 70+$outfile = get_outfile(); 71+usage() if ( $outfile eq "" ); 72 73 # top line of thesaurus provides encoding 74 $encoding=<STDIN>; 75@@ -51,9 +91,13 @@ while ($rec=<STDIN>){ 76 # now we have all of the information 77 # so sort it and then output the encoding, count and index data 78 @tindex = sort by_entry @tindex; 79-print STDOUT "$encoding\n"; 80-print STDOUT "$ne\n"; 81+ 82+print "$outfile\n"; 83+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!"; 84+print OUTFILE "$encoding\n"; 85+print OUTFILE "$ne\n"; 86 foreach $one (@tindex) { 87- print STDOUT "$one\n"; 88+ print OUTFILE "$one\n"; 89 } 90+close OUTFILE; 91 92