1cdf0e10cSrcweir--- misc/mythes-1.2.0.orig/th_gen_idx.pl 2cdf0e10cSrcweir+++ misc/build/mythes-1.2.0/th_gen_idx.pl 3cdf0e10cSrcweir@@ -1,11 +1,32 @@ 4cdf0e10cSrcweir-#!/usr/bin/perl 5cdf0e10cSrcweir- 6cdf0e10cSrcweir-# perl program to take a thesaurus structured text data file 7cdf0e10cSrcweir-# and create the proper sorted index file (.idx) 8cdf0e10cSrcweir+: 9cdf0e10cSrcweir+eval 'exec perl -wS $0 ${1+"$@"}' 10cdf0e10cSrcweir+ if 0; 11*e76eebc6SAndrew Rist+#************************************************************** 12*e76eebc6SAndrew Rist+# 13*e76eebc6SAndrew Rist+# Licensed to the Apache Software Foundation (ASF) under one 14*e76eebc6SAndrew Rist+# or more contributor license agreements. See the NOTICE file 15*e76eebc6SAndrew Rist+# distributed with this work for additional information 16*e76eebc6SAndrew Rist+# regarding copyright ownership. The ASF licenses this file 17*e76eebc6SAndrew Rist+# to you under the Apache License, Version 2.0 (the 18*e76eebc6SAndrew Rist+# "License"); you may not use this file except in compliance 19*e76eebc6SAndrew Rist+# with the License. You may obtain a copy of the License at 20*e76eebc6SAndrew Rist+# 21*e76eebc6SAndrew Rist+# http://www.apache.org/licenses/LICENSE-2.0 22*e76eebc6SAndrew Rist+# 23*e76eebc6SAndrew Rist+# Unless required by applicable law or agreed to in writing, 24*e76eebc6SAndrew Rist+# software distributed under the License is distributed on an 25*e76eebc6SAndrew Rist+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 26*e76eebc6SAndrew Rist+# KIND, either express or implied. See the License for the 27*e76eebc6SAndrew Rist+# specific language governing permissions and limitations 28*e76eebc6SAndrew Rist+# under the License. 29*e76eebc6SAndrew Rist+# 30*e76eebc6SAndrew Rist+#************************************************************** 31cdf0e10cSrcweir 32cdf0e10cSrcweir sub by_entry { 33cdf0e10cSrcweir my ($aent, $aoff) = split('\|',$a); 34cdf0e10cSrcweir@@ -13,6 +34,27 @@ sub by_entry { 35cdf0e10cSrcweir $aent cmp $bent; 36cdf0e10cSrcweir } 37cdf0e10cSrcweir 38cdf0e10cSrcweir+#FIXME: someone may want "infile" or even parameter parsing 39cdf0e10cSrcweir+sub get_outfile { 40cdf0e10cSrcweir+ my $next_is_file = 0; 41cdf0e10cSrcweir+ foreach ( @ARGV ) { 42cdf0e10cSrcweir+ if ( $next_is_file ) { 43cdf0e10cSrcweir+ return $_ 44cdf0e10cSrcweir+ } 45cdf0e10cSrcweir+ if ( $_ eq "-o" ) { 46cdf0e10cSrcweir+ $next_is_file = 1; 47cdf0e10cSrcweir+ } 48cdf0e10cSrcweir+ } 49cdf0e10cSrcweir+ return ""; 50cdf0e10cSrcweir+} 51cdf0e10cSrcweir+ 52cdf0e10cSrcweir+sub usage { 53cdf0e10cSrcweir+ print "usage:\n"; 54cdf0e10cSrcweir+ print "$0 -o outfile < input\n"; 55cdf0e10cSrcweir+ 56cdf0e10cSrcweir+ exit 99; 57cdf0e10cSrcweir+} 58cdf0e10cSrcweir+ 59cdf0e10cSrcweir # main routine 60cdf0e10cSrcweir my $ne = 0; # number of entries in index 61cdf0e10cSrcweir my @tindex=(); # the index itself 62cdf0e10cSrcweir@@ -24,6 +66,10 @@ my $nm=0; # number of meaning fo 63cdf0e10cSrcweir my $meaning=""; # current meaning and synonyms 64cdf0e10cSrcweir my $p; # misc uses 65cdf0e10cSrcweir my $encoding; # encoding used by text file 66cdf0e10cSrcweir+my $outfile = ""; 67cdf0e10cSrcweir+ 68cdf0e10cSrcweir+$outfile = get_outfile(); 69cdf0e10cSrcweir+usage() if ( $outfile eq "" ); 70cdf0e10cSrcweir 71cdf0e10cSrcweir # top line of thesaurus provides encoding 72cdf0e10cSrcweir $encoding=<STDIN>; 73cdf0e10cSrcweir@@ -51,9 +97,13 @@ while ($rec=<STDIN>){ 74cdf0e10cSrcweir # now we have all of the information 75cdf0e10cSrcweir # so sort it and then output the encoding, count and index data 76cdf0e10cSrcweir @tindex = sort by_entry @tindex; 77cdf0e10cSrcweir-print STDOUT "$encoding\n"; 78cdf0e10cSrcweir-print STDOUT "$ne\n"; 79cdf0e10cSrcweir+ 80cdf0e10cSrcweir+print "$outfile\n"; 81cdf0e10cSrcweir+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!"; 82cdf0e10cSrcweir+print OUTFILE "$encoding\n"; 83cdf0e10cSrcweir+print OUTFILE "$ne\n"; 84cdf0e10cSrcweir foreach $one (@tindex) { 85cdf0e10cSrcweir- print STDOUT "$one\n"; 86cdf0e10cSrcweir+ print OUTFILE "$one\n"; 87cdf0e10cSrcweir } 88cdf0e10cSrcweir+close OUTFILE; 89cdf0e10cSrcweir 90