xref: /aoo41x/main/sc/workben/celltrans/parse.py (revision cdf0e10c)
1*cdf0e10cSrcweir#!/usr/bin/env python
2*cdf0e10cSrcweir#***********************************************************************
3*cdf0e10cSrcweir#
4*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5*cdf0e10cSrcweir#
6*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates.
7*cdf0e10cSrcweir#
8*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite
9*cdf0e10cSrcweir#
10*cdf0e10cSrcweir# This file is part of OpenOffice.org.
11*cdf0e10cSrcweir#
12*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify
13*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3
14*cdf0e10cSrcweir# only, as published by the Free Software Foundation.
15*cdf0e10cSrcweir#
16*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful,
17*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of
18*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details
20*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code).
21*cdf0e10cSrcweir#
22*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License
23*cdf0e10cSrcweir# version 3 along with OpenOffice.org.  If not, see
24*cdf0e10cSrcweir# <http://www.openoffice.org/license.html>
25*cdf0e10cSrcweir# for a copy of the LGPLv3 License.
26*cdf0e10cSrcweir#
27*cdf0e10cSrcweir#***********************************************************************
28*cdf0e10cSrcweir
29*cdf0e10cSrcweirimport sys
30*cdf0e10cSrcweir
31*cdf0e10cSrcweirlocaleNames = {'fr': 'French', 'hu': 'Hungarian', 'de': 'German'}
32*cdf0e10cSrcweirdef getLocaleName (code):
33*cdf0e10cSrcweir    global localeNames
34*cdf0e10cSrcweir    if localeNames.has_key(code):
35*cdf0e10cSrcweir        return localeNames[code]
36*cdf0e10cSrcweir    else:
37*cdf0e10cSrcweir        return "(unknown locale)"
38*cdf0e10cSrcweir
39*cdf0e10cSrcweirdef getAscii (ords):
40*cdf0e10cSrcweir    ascii = ''
41*cdf0e10cSrcweir    for c in ords:
42*cdf0e10cSrcweir        ascii += chr(c)
43*cdf0e10cSrcweir    return ascii
44*cdf0e10cSrcweir
45*cdf0e10cSrcweirclass LocaleData(object):
46*cdf0e10cSrcweir    def __init__ (self, locale):
47*cdf0e10cSrcweir        self.locale = locale
48*cdf0e10cSrcweir        self.funcList = {}
49*cdf0e10cSrcweir
50*cdf0e10cSrcweir    def addKeywordMap (self, funcName, localeName, engName):
51*cdf0e10cSrcweir        if not self.funcList.has_key(funcName):
52*cdf0e10cSrcweir            self.funcList[funcName] = []
53*cdf0e10cSrcweir
54*cdf0e10cSrcweir        self.funcList[funcName].append([localeName, engName])
55*cdf0e10cSrcweir
56*cdf0e10cSrcweir    def getLocaleFuncVarName (self, func, pair):
57*cdf0e10cSrcweir        return func.lower() + "_" + getAscii(pair[1]).lower() + "_" + self.locale
58*cdf0e10cSrcweir
59*cdf0e10cSrcweir    def dumpCode (self):
60*cdf0e10cSrcweir        chars = ""
61*cdf0e10cSrcweir
62*cdf0e10cSrcweir        # locale output
63*cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
64*cdf0e10cSrcweir        chars += "// %s language locale (automatically generated)\n"%getLocaleName(self.locale)
65*cdf0e10cSrcweir        chars += "// " + "-"*75 + "\n"
66*cdf0e10cSrcweir        chars += "static const Locale a" + self.locale.capitalize() + "(OUString::createFromAscii(\""
67*cdf0e10cSrcweir        chars += self.locale
68*cdf0e10cSrcweir        chars += "\"), OUString(), OUString());\n\n"
69*cdf0e10cSrcweir
70*cdf0e10cSrcweir        # pre instantiations of localized function names.
71*cdf0e10cSrcweir        funcs = self.funcList.keys()
72*cdf0e10cSrcweir        funcs.sort()
73*cdf0e10cSrcweir        chars += "// pre instantiations of localized function names\n"
74*cdf0e10cSrcweir        for func in funcs:
75*cdf0e10cSrcweir            for item in self.funcList[func]:
76*cdf0e10cSrcweir                chars += "static const sal_Unicode " + self.getLocaleFuncVarName(func, item) + "[] = {\n"
77*cdf0e10cSrcweir                chars += "    "
78*cdf0e10cSrcweir                isFirst = True
79*cdf0e10cSrcweir                # Dump the UTF-16 bytes.
80*cdf0e10cSrcweir                for uval in item[0]:
81*cdf0e10cSrcweir                    if isFirst:
82*cdf0e10cSrcweir                        isFirst = False
83*cdf0e10cSrcweir                    else:
84*cdf0e10cSrcweir                        chars += ", "
85*cdf0e10cSrcweir                    chars += "0x%.4X"%uval
86*cdf0e10cSrcweir
87*cdf0e10cSrcweir                # Don't forget to null-terminate the string.
88*cdf0e10cSrcweir                if not isFirst:
89*cdf0e10cSrcweir                    chars += ", "
90*cdf0e10cSrcweir                chars += "0x0000"
91*cdf0e10cSrcweir
92*cdf0e10cSrcweir                chars += "};\n"
93*cdf0e10cSrcweir
94*cdf0e10cSrcweir        # map item instantiations
95*cdf0e10cSrcweir        chars += "\n"
96*cdf0e10cSrcweir        chars += "static const TransItem p" + self.locale.capitalize() + "[] = {\n"
97*cdf0e10cSrcweir        for func in funcs:
98*cdf0e10cSrcweir            for item in self.funcList[func]:
99*cdf0e10cSrcweir                chars += "    "
100*cdf0e10cSrcweir                chars += "{%s, \"%s\", %s},\n"%(self.getLocaleFuncVarName(func, item),
101*cdf0e10cSrcweir                                                getAscii(item[1]),
102*cdf0e10cSrcweir                                                "oc"+func.capitalize())
103*cdf0e10cSrcweir
104*cdf0e10cSrcweir        chars += "    {NULL, NULL, ocNone}\n"
105*cdf0e10cSrcweir        chars += "};\n\n"
106*cdf0e10cSrcweir
107*cdf0e10cSrcweir        # addToMap call
108*cdf0e10cSrcweir        chars += "addToMap(%s, %s);\n"%(
109*cdf0e10cSrcweir            "p"+self.locale.capitalize(), "a"+self.locale.capitalize())
110*cdf0e10cSrcweir
111*cdf0e10cSrcweir        return chars
112*cdf0e10cSrcweir
113*cdf0e10cSrcweirclass Parser(object):
114*cdf0e10cSrcweir
115*cdf0e10cSrcweir    def __init__ (self, args):
116*cdf0e10cSrcweir        # default input & output files.
117*cdf0e10cSrcweir        self.infile = "./keywords_utf16.txt"
118*cdf0e10cSrcweir        self.outfile = "../../source/core/tool/cellkeywords.inl"
119*cdf0e10cSrcweir
120*cdf0e10cSrcweir        if len(args) >= 2:
121*cdf0e10cSrcweir            self.infile = args[1]
122*cdf0e10cSrcweir        if len(args) >= 3:
123*cdf0e10cSrcweir            self.outfile = args[2]
124*cdf0e10cSrcweir
125*cdf0e10cSrcweir    def getDByte (self):
126*cdf0e10cSrcweir        # Assume little endian.
127*cdf0e10cSrcweir        bh = ord(self.bytes[self.i])
128*cdf0e10cSrcweir        bl = ord(self.bytes[self.i+1])
129*cdf0e10cSrcweir        dbyte = bl*256 + bh
130*cdf0e10cSrcweir        self.i += 2
131*cdf0e10cSrcweir        return dbyte
132*cdf0e10cSrcweir
133*cdf0e10cSrcweir    def parseLine (self):
134*cdf0e10cSrcweir        buf = []
135*cdf0e10cSrcweir        while self.i < self.size:
136*cdf0e10cSrcweir            dbyte = self.getDByte()
137*cdf0e10cSrcweir            if dbyte == 0x000A:
138*cdf0e10cSrcweir                break
139*cdf0e10cSrcweir            buf.append(dbyte)
140*cdf0e10cSrcweir        return buf
141*cdf0e10cSrcweir
142*cdf0e10cSrcweir    def dumpBuf (self, buf, linefeed=True):
143*cdf0e10cSrcweir        for item in buf:
144*cdf0e10cSrcweir            sys.stdout.write(chr(item))
145*cdf0e10cSrcweir        if linefeed:
146*cdf0e10cSrcweir            print ''
147*cdf0e10cSrcweir
148*cdf0e10cSrcweir    def parse (self):
149*cdf0e10cSrcweir
150*cdf0e10cSrcweir        file = open(self.infile, 'r')
151*cdf0e10cSrcweir        self.bytes = file.read()
152*cdf0e10cSrcweir        file.close()
153*cdf0e10cSrcweir
154*cdf0e10cSrcweir        self.size = len(self.bytes)
155*cdf0e10cSrcweir        self.i = 0
156*cdf0e10cSrcweir
157*cdf0e10cSrcweir        localeList = []  # stores an array of locale data objects.
158*cdf0e10cSrcweir        funcName = None
159*cdf0e10cSrcweir        word = []
160*cdf0e10cSrcweir        wordPair = []
161*cdf0e10cSrcweir
162*cdf0e10cSrcweir        while self.i < self.size:
163*cdf0e10cSrcweir            dbyte = self.getDByte()
164*cdf0e10cSrcweir            if dbyte == 0xFEFF and self.i == 2:
165*cdf0e10cSrcweir                # unicode signature - ignore it.
166*cdf0e10cSrcweir                pass
167*cdf0e10cSrcweir            elif dbyte == 0x0024:
168*cdf0e10cSrcweir                # $ - locale name
169*cdf0e10cSrcweir                buf = self.parseLine()
170*cdf0e10cSrcweir                locale = getAscii(buf)
171*cdf0e10cSrcweir                localeList.append(LocaleData(locale))
172*cdf0e10cSrcweir
173*cdf0e10cSrcweir            elif dbyte == 0x0040:
174*cdf0e10cSrcweir                # @ - function name
175*cdf0e10cSrcweir                buf = self.parseLine()
176*cdf0e10cSrcweir                funcName = getAscii(buf)
177*cdf0e10cSrcweir
178*cdf0e10cSrcweir            elif dbyte == 0x002C:
179*cdf0e10cSrcweir                # , - comma separator
180*cdf0e10cSrcweir                if len(word) > 0:
181*cdf0e10cSrcweir                    wordPair.append(word)
182*cdf0e10cSrcweir                    word = []
183*cdf0e10cSrcweir            elif dbyte == 0x000A:
184*cdf0e10cSrcweir                # linefeed
185*cdf0e10cSrcweir                if len(word) > 0:
186*cdf0e10cSrcweir                    wordPair.append(word)
187*cdf0e10cSrcweir                    word = []
188*cdf0e10cSrcweir                if len(wordPair) >= 2:
189*cdf0e10cSrcweir                    localeList[-1].addKeywordMap(funcName, wordPair[0], wordPair[1])
190*cdf0e10cSrcweir                wordPair = []
191*cdf0e10cSrcweir            elif dbyte in [0x0009, 0x0020]:
192*cdf0e10cSrcweir                # whitespace - ignore it.
193*cdf0e10cSrcweir                pass
194*cdf0e10cSrcweir            else:
195*cdf0e10cSrcweir                word.append(dbyte)
196*cdf0e10cSrcweir
197*cdf0e10cSrcweir        chars = "// This file has been automatically generated.  Do not hand-edit this!\n"
198*cdf0e10cSrcweir        for obj in localeList:
199*cdf0e10cSrcweir            chars += "\n" + obj.dumpCode()
200*cdf0e10cSrcweir
201*cdf0e10cSrcweir        # Write to output file.
202*cdf0e10cSrcweir        file = open(self.outfile, 'w')
203*cdf0e10cSrcweir        file.write(chars)
204*cdf0e10cSrcweir        file.close()
205*cdf0e10cSrcweir
206*cdf0e10cSrcweirif __name__=='__main__':
207*cdf0e10cSrcweir    parser = Parser(sys.argv)
208*cdf0e10cSrcweir    parser.parse()
209*cdf0e10cSrcweir
210