xref: /aoo41x/main/l10ntools/scripts/tool/xhtex.py (revision a0428e9e)
1*a0428e9eSAndrew Rist#**************************************************************
2*a0428e9eSAndrew Rist#
3*a0428e9eSAndrew Rist#  Licensed to the Apache Software Foundation (ASF) under one
4*a0428e9eSAndrew Rist#  or more contributor license agreements.  See the NOTICE file
5*a0428e9eSAndrew Rist#  distributed with this work for additional information
6*a0428e9eSAndrew Rist#  regarding copyright ownership.  The ASF licenses this file
7*a0428e9eSAndrew Rist#  to you under the Apache License, Version 2.0 (the
8*a0428e9eSAndrew Rist#  "License"); you may not use this file except in compliance
9*a0428e9eSAndrew Rist#  with the License.  You may obtain a copy of the License at
10*a0428e9eSAndrew Rist#
11*a0428e9eSAndrew Rist#    http://www.apache.org/licenses/LICENSE-2.0
12*a0428e9eSAndrew Rist#
13*a0428e9eSAndrew Rist#  Unless required by applicable law or agreed to in writing,
14*a0428e9eSAndrew Rist#  software distributed under the License is distributed on an
15*a0428e9eSAndrew Rist#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*a0428e9eSAndrew Rist#  KIND, either express or implied.  See the License for the
17*a0428e9eSAndrew Rist#  specific language governing permissions and limitations
18*a0428e9eSAndrew Rist#  under the License.
19*a0428e9eSAndrew Rist#
20*a0428e9eSAndrew Rist#**************************************************************
21cdf0e10cSrcweir
22cdf0e10cSrcweirfrom l10ntool import AbstractL10nTool
23cdf0e10cSrcweirfrom sdf import SdfEntity
24cdf0e10cSrcweirimport sys
25cdf0e10cSrcweirimport xml.dom.minidom
26cdf0e10cSrcweir
27cdf0e10cSrcweirclass Xhtex(AbstractL10nTool):
28cdf0e10cSrcweir    _resource_type = "xht"
29cdf0e10cSrcweir    _sdfdata       = ()
30cdf0e10cSrcweir    _lang          = ""
31cdf0e10cSrcweir
32cdf0e10cSrcweir    # Extract methods
33cdf0e10cSrcweir    def extract_topic(self, list, inputfile):
34cdf0e10cSrcweir        topics = []
35cdf0e10cSrcweir        for elem in list:
36cdf0e10cSrcweir            if elem.childNodes[0].nodeType == elem.TEXT_NODE and len(elem.childNodes[0].data.strip()):
37cdf0e10cSrcweir                topics.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.childNodes[0].data, inputfile=inputfile))
38cdf0e10cSrcweir        return topics
39cdf0e10cSrcweir
40cdf0e10cSrcweir    def extract_title(self, list, inputfile):
41cdf0e10cSrcweir        titles = []
42cdf0e10cSrcweir        for elem in list:
43cdf0e10cSrcweir            if len(elem.getAttribute("title").strip()):
44cdf0e10cSrcweir                titles.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.getAttribute("title").strip(), inputfile=inputfile))
45cdf0e10cSrcweir        return titles
46cdf0e10cSrcweir
47cdf0e10cSrcweir    # Merge methods
48cdf0e10cSrcweir    def merge_topic(self, list, sdfdata, lang, inputfilename, dom):
49cdf0e10cSrcweir        for elem in list:
50cdf0e10cSrcweir            if elem.childNodes[0].nodeType == elem.TEXT_NODE and elem.getAttribute("id").strip():
51cdf0e10cSrcweir                obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
52cdf0e10cSrcweir                if sdfdata[obj.get_id()]:
53cdf0e10cSrcweir                    elem.childNodes[0].data = unicode(str(sdfdata[obj.get_id()].text),"utf8")
54cdf0e10cSrcweir
55cdf0e10cSrcweir
56cdf0e10cSrcweir    def merge_title(self, list, sdfdata, lang, inputfilename):
57cdf0e10cSrcweir        for elem in list:
58cdf0e10cSrcweir            obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
59cdf0e10cSrcweir            if elem.getAttribute("id").strip() and sdfdata[obj.get_id()]:
60cdf0e10cSrcweir                elem.setAttribute("title", unicode(str(sdfdata[obj.get_id()].text),"utf8"))
61cdf0e10cSrcweir
62cdf0e10cSrcweir    # L10N tool
63cdf0e10cSrcweir    def __init__(self):
64cdf0e10cSrcweir        AbstractL10nTool.__init__(self)
65cdf0e10cSrcweir
66cdf0e10cSrcweir    def parse_file(self, filename):
67cdf0e10cSrcweir        document = ""
68cdf0e10cSrcweir        try:
69cdf0e10cSrcweir            f = open(filename, "r+")
70cdf0e10cSrcweir            document = f.read()
71cdf0e10cSrcweir        except IOError:
72cdf0e10cSrcweir            print "ERROR: Can not read file " + filename
73cdf0e10cSrcweir            sys.exit(-1)
74cdf0e10cSrcweir        else:
75cdf0e10cSrcweir            f.close()
76cdf0e10cSrcweir        return xml.dom.minidom.parseString(document)
77cdf0e10cSrcweir
78cdf0e10cSrcweir
79cdf0e10cSrcweir    def merge_file(self, inputfilename, outputfilename, parsed_file_ref, lang,is_forced_lang, sdfdata):
80cdf0e10cSrcweir        if lang == "en-US":
81cdf0e10cSrcweir            mod_outputfilename = outputfilename.replace("_en-US",'')
82cdf0e10cSrcweir            self.make_dirs(mod_outputfilename)
83cdf0e10cSrcweir            self.copy_file(inputfilename, mod_outputfilename)
84cdf0e10cSrcweir            return
85cdf0e10cSrcweir        dom = parsed_file_ref.cloneNode(True)
86cdf0e10cSrcweir        #dom = self.parse_file(inputfilename)    # in case cloneNode is buggy just parse it always
87cdf0e10cSrcweir
88cdf0e10cSrcweir        self.merge_topic(dom.getElementsByTagName("topic"), sdfdata, lang, inputfilename, dom)
89cdf0e10cSrcweir        self.merge_title(dom.getElementsByTagName("node"), sdfdata, lang, inputfilename)
90cdf0e10cSrcweir        self.merge_title(dom.getElementsByTagName("help_section"), sdfdata, lang, inputfilename)
91cdf0e10cSrcweir        self.make_dirs(outputfilename)
92cdf0e10cSrcweir        try:
93cdf0e10cSrcweir            f = open(outputfilename, "w+")
94cdf0e10cSrcweir            str = dom.toxml()
95cdf0e10cSrcweir            f.write(str.encode("utf-8"))
96cdf0e10cSrcweir        except IOError:
97cdf0e10cSrcweir            print "ERROR: Can not write file " + outputfilename
98cdf0e10cSrcweir            sys.exit(-1)
99cdf0e10cSrcweir        else:
100cdf0e10cSrcweir            f.close()
101cdf0e10cSrcweir
102cdf0e10cSrcweir    ##### Helper for parse-once-use-often like parsing a xml file is needed implement it here
103cdf0e10cSrcweir    def parse_file(self, filename):
104cdf0e10cSrcweir        document = ""
105cdf0e10cSrcweir        try:
106cdf0e10cSrcweir            f = open(filename,"r")
107cdf0e10cSrcweir            document = f.read()
108cdf0e10cSrcweir        except IOError:
109cdf0e10cSrcweir            print "ERROR: Can not read file " + filename
110cdf0e10cSrcweir        else:
111cdf0e10cSrcweir            f.close()
112cdf0e10cSrcweir        return xml.dom.minidom.parseString(document)
113cdf0e10cSrcweir
114cdf0e10cSrcweir    ##### Extract a single File
115cdf0e10cSrcweir    def extract_file(self, inputfile):
116cdf0e10cSrcweir        sdf_data = []
117cdf0e10cSrcweir        dom = self.parse_file(inputfile)
118cdf0e10cSrcweir        sdf_data.extend(self.extract_topic(dom.getElementsByTagName("topic"), inputfile))
119cdf0e10cSrcweir        sdf_data.extend(self.extract_title(dom.getElementsByTagName("help_section"), inputfile))
120cdf0e10cSrcweir        sdf_data.extend(self.extract_title(dom.getElementsByTagName("node"), inputfile))
121cdf0e10cSrcweir        return ''.join([str(line)+"\n" for line in sdf_data])
122cdf0e10cSrcweir
123cdf0e10cSrcweir    def prepare_sdf_line(self, inputfile="", lang="" , id="" , text=""):
124cdf0e10cSrcweir        if lang == "":
125cdf0e10cSrcweir            lang = self._source_language
126cdf0e10cSrcweir        return SdfEntity(project=self._options.project_name, source_file=self.get_filename_string(inputfile),
127cdf0e10cSrcweir                         resource_type=self._resource_type, gid=id, lid="", langid=lang,text=text)
128cdf0e10cSrcweir
129cdf0e10cSrcweirrun = Xhtex()
130cdf0e10cSrcweir
131