1cdf0e10cSrcweir--- misc/mythes-1.2.0.orig/th_gen_idx.pl
2cdf0e10cSrcweir+++ misc/build/mythes-1.2.0/th_gen_idx.pl
3cdf0e10cSrcweir@@ -1,11 +1,32 @@
4cdf0e10cSrcweir-#!/usr/bin/perl
5cdf0e10cSrcweir-
6cdf0e10cSrcweir-# perl program to take a thesaurus structured text data file
7cdf0e10cSrcweir-# and create the proper sorted index file (.idx)
8cdf0e10cSrcweir+:
9cdf0e10cSrcweir+eval 'exec perl -wS $0 ${1+"$@"}'
10cdf0e10cSrcweir+    if 0;
11*e76eebc6SAndrew Rist+#**************************************************************
12*e76eebc6SAndrew Rist+#
13*e76eebc6SAndrew Rist+#  Licensed to the Apache Software Foundation (ASF) under one
14*e76eebc6SAndrew Rist+#  or more contributor license agreements.  See the NOTICE file
15*e76eebc6SAndrew Rist+#  distributed with this work for additional information
16*e76eebc6SAndrew Rist+#  regarding copyright ownership.  The ASF licenses this file
17*e76eebc6SAndrew Rist+#  to you under the Apache License, Version 2.0 (the
18*e76eebc6SAndrew Rist+#  "License"); you may not use this file except in compliance
19*e76eebc6SAndrew Rist+#  with the License.  You may obtain a copy of the License at
20*e76eebc6SAndrew Rist+#
21*e76eebc6SAndrew Rist+#    http://www.apache.org/licenses/LICENSE-2.0
22*e76eebc6SAndrew Rist+#
23*e76eebc6SAndrew Rist+#  Unless required by applicable law or agreed to in writing,
24*e76eebc6SAndrew Rist+#  software distributed under the License is distributed on an
25*e76eebc6SAndrew Rist+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
26*e76eebc6SAndrew Rist+#  KIND, either express or implied.  See the License for the
27*e76eebc6SAndrew Rist+#  specific language governing permissions and limitations
28*e76eebc6SAndrew Rist+#  under the License.
29*e76eebc6SAndrew Rist+#
30*e76eebc6SAndrew Rist+#**************************************************************
31cdf0e10cSrcweir
32cdf0e10cSrcweir sub by_entry {
33cdf0e10cSrcweir     my ($aent, $aoff) = split('\|',$a);
34cdf0e10cSrcweir@@ -13,6 +34,27 @@ sub by_entry {
35cdf0e10cSrcweir     $aent cmp $bent;
36cdf0e10cSrcweir }
37cdf0e10cSrcweir
38cdf0e10cSrcweir+#FIXME: someone may want "infile" or even parameter parsing
39cdf0e10cSrcweir+sub get_outfile {
40cdf0e10cSrcweir+	my $next_is_file = 0;
41cdf0e10cSrcweir+	foreach ( @ARGV ) {
42cdf0e10cSrcweir+		if ( $next_is_file ) {
43cdf0e10cSrcweir+			return $_
44cdf0e10cSrcweir+		}
45cdf0e10cSrcweir+		if ( $_ eq "-o" ) {
46cdf0e10cSrcweir+			$next_is_file = 1;
47cdf0e10cSrcweir+		}
48cdf0e10cSrcweir+	}
49cdf0e10cSrcweir+	return "";
50cdf0e10cSrcweir+}
51cdf0e10cSrcweir+
52cdf0e10cSrcweir+sub usage {
53cdf0e10cSrcweir+	print "usage:\n";
54cdf0e10cSrcweir+	print "$0 -o outfile < input\n";
55cdf0e10cSrcweir+
56cdf0e10cSrcweir+	exit 99;
57cdf0e10cSrcweir+}
58cdf0e10cSrcweir+
59cdf0e10cSrcweir # main routine
60cdf0e10cSrcweir my $ne = 0;       # number of entries in index
61cdf0e10cSrcweir my @tindex=();    # the index itself
62cdf0e10cSrcweir@@ -24,6 +66,10 @@ my $nm=0;         # number of meaning fo
63cdf0e10cSrcweir my $meaning="";   # current meaning and synonyms
64cdf0e10cSrcweir my $p;            # misc uses
65cdf0e10cSrcweir my $encoding;     # encoding used by text file
66cdf0e10cSrcweir+my $outfile = "";
67cdf0e10cSrcweir+
68cdf0e10cSrcweir+$outfile = get_outfile();
69cdf0e10cSrcweir+usage() if ( $outfile eq "" );
70cdf0e10cSrcweir
71cdf0e10cSrcweir # top line of thesaurus provides encoding
72cdf0e10cSrcweir $encoding=<STDIN>;
73cdf0e10cSrcweir@@ -51,9 +97,13 @@ while ($rec=<STDIN>){
74cdf0e10cSrcweir # now we have all of the information
75cdf0e10cSrcweir # so sort it and then output the encoding, count and index data
76cdf0e10cSrcweir @tindex = sort by_entry @tindex;
77cdf0e10cSrcweir-print STDOUT "$encoding\n";
78cdf0e10cSrcweir-print STDOUT "$ne\n";
79cdf0e10cSrcweir+
80cdf0e10cSrcweir+print "$outfile\n";
81cdf0e10cSrcweir+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
82cdf0e10cSrcweir+print OUTFILE "$encoding\n";
83cdf0e10cSrcweir+print OUTFILE "$ne\n";
84cdf0e10cSrcweir foreach $one (@tindex) {
85cdf0e10cSrcweir-    print STDOUT "$one\n";
86cdf0e10cSrcweir+    print OUTFILE "$one\n";
87cdf0e10cSrcweir }
88cdf0e10cSrcweir+close OUTFILE;
89cdf0e10cSrcweir
90