1*cdf0e10cSrcweir--- misc/mythes-1.2.0.orig/th_gen_idx.pl 2*cdf0e10cSrcweir+++ misc/build/mythes-1.2.0/th_gen_idx.pl 3*cdf0e10cSrcweir@@ -1,11 +1,32 @@ 4*cdf0e10cSrcweir-#!/usr/bin/perl 5*cdf0e10cSrcweir- 6*cdf0e10cSrcweir-# perl program to take a thesaurus structured text data file 7*cdf0e10cSrcweir-# and create the proper sorted index file (.idx) 8*cdf0e10cSrcweir+: 9*cdf0e10cSrcweir+eval 'exec perl -wS $0 ${1+"$@"}' 10*cdf0e10cSrcweir+ if 0; 11*cdf0e10cSrcweir+#************************************************************************* 12*cdf0e10cSrcweir+# 13*cdf0e10cSrcweir+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 14*cdf0e10cSrcweir+# 15*cdf0e10cSrcweir+# Copyright 2000, 2010 Oracle and/or its affiliates. 16*cdf0e10cSrcweir+# 17*cdf0e10cSrcweir+# OpenOffice.org - a multi-platform office productivity suite 18*cdf0e10cSrcweir+# 19*cdf0e10cSrcweir+# This file is part of OpenOffice.org. 20*cdf0e10cSrcweir+# 21*cdf0e10cSrcweir+# OpenOffice.org is free software: you can redistribute it and/or modify 22*cdf0e10cSrcweir+# it under the terms of the GNU Lesser General Public License version 3 23*cdf0e10cSrcweir+# only, as published by the Free Software Foundation. 24*cdf0e10cSrcweir+# 25*cdf0e10cSrcweir+# OpenOffice.org is distributed in the hope that it will be useful, 26*cdf0e10cSrcweir+# but WITHOUT ANY WARRANTY; without even the implied warranty of 27*cdf0e10cSrcweir+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28*cdf0e10cSrcweir+# GNU Lesser General Public License version 3 for more details 29*cdf0e10cSrcweir+# (a copy is included in the LICENSE file that accompanied this code). 30*cdf0e10cSrcweir # 31*cdf0e10cSrcweir-# typcially invoked as follows: 32*cdf0e10cSrcweir-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx 33*cdf0e10cSrcweir+# You should have received a copy of the GNU Lesser General Public License 34*cdf0e10cSrcweir+# version 3 along with OpenOffice.org. If not, see 35*cdf0e10cSrcweir+# <http://www.openoffice.org/license.html> 36*cdf0e10cSrcweir+# for a copy of the LGPLv3 License. 37*cdf0e10cSrcweir # 38*cdf0e10cSrcweir+#************************************************************************* 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir sub by_entry { 41*cdf0e10cSrcweir my ($aent, $aoff) = split('\|',$a); 42*cdf0e10cSrcweir@@ -13,6 +34,27 @@ sub by_entry { 43*cdf0e10cSrcweir $aent cmp $bent; 44*cdf0e10cSrcweir } 45*cdf0e10cSrcweir 46*cdf0e10cSrcweir+#FIXME: someone may want "infile" or even parameter parsing 47*cdf0e10cSrcweir+sub get_outfile { 48*cdf0e10cSrcweir+ my $next_is_file = 0; 49*cdf0e10cSrcweir+ foreach ( @ARGV ) { 50*cdf0e10cSrcweir+ if ( $next_is_file ) { 51*cdf0e10cSrcweir+ return $_ 52*cdf0e10cSrcweir+ } 53*cdf0e10cSrcweir+ if ( $_ eq "-o" ) { 54*cdf0e10cSrcweir+ $next_is_file = 1; 55*cdf0e10cSrcweir+ } 56*cdf0e10cSrcweir+ } 57*cdf0e10cSrcweir+ return ""; 58*cdf0e10cSrcweir+} 59*cdf0e10cSrcweir+ 60*cdf0e10cSrcweir+sub usage { 61*cdf0e10cSrcweir+ print "usage:\n"; 62*cdf0e10cSrcweir+ print "$0 -o outfile < input\n"; 63*cdf0e10cSrcweir+ 64*cdf0e10cSrcweir+ exit 99; 65*cdf0e10cSrcweir+} 66*cdf0e10cSrcweir+ 67*cdf0e10cSrcweir # main routine 68*cdf0e10cSrcweir my $ne = 0; # number of entries in index 69*cdf0e10cSrcweir my @tindex=(); # the index itself 70*cdf0e10cSrcweir@@ -24,6 +66,10 @@ my $nm=0; # number of meaning fo 71*cdf0e10cSrcweir my $meaning=""; # current meaning and synonyms 72*cdf0e10cSrcweir my $p; # misc uses 73*cdf0e10cSrcweir my $encoding; # encoding used by text file 74*cdf0e10cSrcweir+my $outfile = ""; 75*cdf0e10cSrcweir+ 76*cdf0e10cSrcweir+$outfile = get_outfile(); 77*cdf0e10cSrcweir+usage() if ( $outfile eq "" ); 78*cdf0e10cSrcweir 79*cdf0e10cSrcweir # top line of thesaurus provides encoding 80*cdf0e10cSrcweir $encoding=<STDIN>; 81*cdf0e10cSrcweir@@ -51,9 +97,13 @@ while ($rec=<STDIN>){ 82*cdf0e10cSrcweir # now we have all of the information 83*cdf0e10cSrcweir # so sort it and then output the encoding, count and index data 84*cdf0e10cSrcweir @tindex = sort by_entry @tindex; 85*cdf0e10cSrcweir-print STDOUT "$encoding\n"; 86*cdf0e10cSrcweir-print STDOUT "$ne\n"; 87*cdf0e10cSrcweir+ 88*cdf0e10cSrcweir+print "$outfile\n"; 89*cdf0e10cSrcweir+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!"; 90*cdf0e10cSrcweir+print OUTFILE "$encoding\n"; 91*cdf0e10cSrcweir+print OUTFILE "$ne\n"; 92*cdf0e10cSrcweir foreach $one (@tindex) { 93*cdf0e10cSrcweir- print STDOUT "$one\n"; 94*cdf0e10cSrcweir+ print OUTFILE "$one\n"; 95*cdf0e10cSrcweir } 96*cdf0e10cSrcweir+close OUTFILE; 97*cdf0e10cSrcweir 98