1*cdf0e10cSrcweir--- misc/mythes-1.2.0.orig/th_gen_idx.pl
2*cdf0e10cSrcweir+++ misc/build/mythes-1.2.0/th_gen_idx.pl
3*cdf0e10cSrcweir@@ -1,11 +1,32 @@
4*cdf0e10cSrcweir-#!/usr/bin/perl
5*cdf0e10cSrcweir-
6*cdf0e10cSrcweir-# perl program to take a thesaurus structured text data file
7*cdf0e10cSrcweir-# and create the proper sorted index file (.idx)
8*cdf0e10cSrcweir+:
9*cdf0e10cSrcweir+eval 'exec perl -wS $0 ${1+"$@"}'
10*cdf0e10cSrcweir+    if 0;
11*cdf0e10cSrcweir+#*************************************************************************
12*cdf0e10cSrcweir+#
13*cdf0e10cSrcweir+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
14*cdf0e10cSrcweir+#
15*cdf0e10cSrcweir+# Copyright 2000, 2010 Oracle and/or its affiliates.
16*cdf0e10cSrcweir+#
17*cdf0e10cSrcweir+# OpenOffice.org - a multi-platform office productivity suite
18*cdf0e10cSrcweir+#
19*cdf0e10cSrcweir+# This file is part of OpenOffice.org.
20*cdf0e10cSrcweir+#
21*cdf0e10cSrcweir+# OpenOffice.org is free software: you can redistribute it and/or modify
22*cdf0e10cSrcweir+# it under the terms of the GNU Lesser General Public License version 3
23*cdf0e10cSrcweir+# only, as published by the Free Software Foundation.
24*cdf0e10cSrcweir+#
25*cdf0e10cSrcweir+# OpenOffice.org is distributed in the hope that it will be useful,
26*cdf0e10cSrcweir+# but WITHOUT ANY WARRANTY; without even the implied warranty of
27*cdf0e10cSrcweir+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28*cdf0e10cSrcweir+# GNU Lesser General Public License version 3 for more details
29*cdf0e10cSrcweir+# (a copy is included in the LICENSE file that accompanied this code).
30*cdf0e10cSrcweir #
31*cdf0e10cSrcweir-# typcially invoked as follows:
32*cdf0e10cSrcweir-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
33*cdf0e10cSrcweir+# You should have received a copy of the GNU Lesser General Public License
34*cdf0e10cSrcweir+# version 3 along with OpenOffice.org.  If not, see
35*cdf0e10cSrcweir+# <http://www.openoffice.org/license.html>
36*cdf0e10cSrcweir+# for a copy of the LGPLv3 License.
37*cdf0e10cSrcweir #
38*cdf0e10cSrcweir+#*************************************************************************
39*cdf0e10cSrcweir
40*cdf0e10cSrcweir sub by_entry {
41*cdf0e10cSrcweir     my ($aent, $aoff) = split('\|',$a);
42*cdf0e10cSrcweir@@ -13,6 +34,27 @@ sub by_entry {
43*cdf0e10cSrcweir     $aent cmp $bent;
44*cdf0e10cSrcweir }
45*cdf0e10cSrcweir
46*cdf0e10cSrcweir+#FIXME: someone may want "infile" or even parameter parsing
47*cdf0e10cSrcweir+sub get_outfile {
48*cdf0e10cSrcweir+	my $next_is_file = 0;
49*cdf0e10cSrcweir+	foreach ( @ARGV ) {
50*cdf0e10cSrcweir+		if ( $next_is_file ) {
51*cdf0e10cSrcweir+			return $_
52*cdf0e10cSrcweir+		}
53*cdf0e10cSrcweir+		if ( $_ eq "-o" ) {
54*cdf0e10cSrcweir+			$next_is_file = 1;
55*cdf0e10cSrcweir+		}
56*cdf0e10cSrcweir+	}
57*cdf0e10cSrcweir+	return "";
58*cdf0e10cSrcweir+}
59*cdf0e10cSrcweir+
60*cdf0e10cSrcweir+sub usage {
61*cdf0e10cSrcweir+	print "usage:\n";
62*cdf0e10cSrcweir+	print "$0 -o outfile < input\n";
63*cdf0e10cSrcweir+
64*cdf0e10cSrcweir+	exit 99;
65*cdf0e10cSrcweir+}
66*cdf0e10cSrcweir+
67*cdf0e10cSrcweir # main routine
68*cdf0e10cSrcweir my $ne = 0;       # number of entries in index
69*cdf0e10cSrcweir my @tindex=();    # the index itself
70*cdf0e10cSrcweir@@ -24,6 +66,10 @@ my $nm=0;         # number of meaning fo
71*cdf0e10cSrcweir my $meaning="";   # current meaning and synonyms
72*cdf0e10cSrcweir my $p;            # misc uses
73*cdf0e10cSrcweir my $encoding;     # encoding used by text file
74*cdf0e10cSrcweir+my $outfile = "";
75*cdf0e10cSrcweir+
76*cdf0e10cSrcweir+$outfile = get_outfile();
77*cdf0e10cSrcweir+usage() if ( $outfile eq "" );
78*cdf0e10cSrcweir
79*cdf0e10cSrcweir # top line of thesaurus provides encoding
80*cdf0e10cSrcweir $encoding=<STDIN>;
81*cdf0e10cSrcweir@@ -51,9 +97,13 @@ while ($rec=<STDIN>){
82*cdf0e10cSrcweir # now we have all of the information
83*cdf0e10cSrcweir # so sort it and then output the encoding, count and index data
84*cdf0e10cSrcweir @tindex = sort by_entry @tindex;
85*cdf0e10cSrcweir-print STDOUT "$encoding\n";
86*cdf0e10cSrcweir-print STDOUT "$ne\n";
87*cdf0e10cSrcweir+
88*cdf0e10cSrcweir+print "$outfile\n";
89*cdf0e10cSrcweir+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
90*cdf0e10cSrcweir+print OUTFILE "$encoding\n";
91*cdf0e10cSrcweir+print OUTFILE "$ne\n";
92*cdf0e10cSrcweir foreach $one (@tindex) {
93*cdf0e10cSrcweir-    print STDOUT "$one\n";
94*cdf0e10cSrcweir+    print OUTFILE "$one\n";
95*cdf0e10cSrcweir }
96*cdf0e10cSrcweir+close OUTFILE;
97*cdf0e10cSrcweir
98