xref: /aoo41x/main/l10ntools/scripts/tool/xhtex.py (revision cdf0e10c)
1*cdf0e10cSrcweir#*************************************************************************
2*cdf0e10cSrcweir#
3*cdf0e10cSrcweir# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir#
5*cdf0e10cSrcweir# Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir#
7*cdf0e10cSrcweir# OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir#
9*cdf0e10cSrcweir# This file is part of OpenOffice.org.
10*cdf0e10cSrcweir#
11*cdf0e10cSrcweir# OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir# it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir# only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir#
15*cdf0e10cSrcweir# OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir# but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir# GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir# (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir#
21*cdf0e10cSrcweir# You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir# version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir# <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir# for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir#
26*cdf0e10cSrcweir#*************************************************************************
27*cdf0e10cSrcweir
28*cdf0e10cSrcweirfrom l10ntool import AbstractL10nTool
29*cdf0e10cSrcweirfrom sdf import SdfEntity
30*cdf0e10cSrcweirimport sys
31*cdf0e10cSrcweirimport xml.dom.minidom
32*cdf0e10cSrcweir
33*cdf0e10cSrcweirclass Xhtex(AbstractL10nTool):
34*cdf0e10cSrcweir    _resource_type = "xht"
35*cdf0e10cSrcweir    _sdfdata       = ()
36*cdf0e10cSrcweir    _lang          = ""
37*cdf0e10cSrcweir
38*cdf0e10cSrcweir    # Extract methods
39*cdf0e10cSrcweir    def extract_topic(self, list, inputfile):
40*cdf0e10cSrcweir        topics = []
41*cdf0e10cSrcweir        for elem in list:
42*cdf0e10cSrcweir            if elem.childNodes[0].nodeType == elem.TEXT_NODE and len(elem.childNodes[0].data.strip()):
43*cdf0e10cSrcweir                topics.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.childNodes[0].data, inputfile=inputfile))
44*cdf0e10cSrcweir        return topics
45*cdf0e10cSrcweir
46*cdf0e10cSrcweir    def extract_title(self, list, inputfile):
47*cdf0e10cSrcweir        titles = []
48*cdf0e10cSrcweir        for elem in list:
49*cdf0e10cSrcweir            if len(elem.getAttribute("title").strip()):
50*cdf0e10cSrcweir                titles.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.getAttribute("title").strip(), inputfile=inputfile))
51*cdf0e10cSrcweir        return titles
52*cdf0e10cSrcweir
53*cdf0e10cSrcweir    # Merge methods
54*cdf0e10cSrcweir    def merge_topic(self, list, sdfdata, lang, inputfilename, dom):
55*cdf0e10cSrcweir        for elem in list:
56*cdf0e10cSrcweir            if elem.childNodes[0].nodeType == elem.TEXT_NODE and elem.getAttribute("id").strip():
57*cdf0e10cSrcweir                obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
58*cdf0e10cSrcweir                if sdfdata[obj.get_id()]:
59*cdf0e10cSrcweir                    elem.childNodes[0].data = unicode(str(sdfdata[obj.get_id()].text),"utf8")
60*cdf0e10cSrcweir
61*cdf0e10cSrcweir
62*cdf0e10cSrcweir    def merge_title(self, list, sdfdata, lang, inputfilename):
63*cdf0e10cSrcweir        for elem in list:
64*cdf0e10cSrcweir            obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
65*cdf0e10cSrcweir            if elem.getAttribute("id").strip() and sdfdata[obj.get_id()]:
66*cdf0e10cSrcweir                elem.setAttribute("title", unicode(str(sdfdata[obj.get_id()].text),"utf8"))
67*cdf0e10cSrcweir
68*cdf0e10cSrcweir    # L10N tool
69*cdf0e10cSrcweir    def __init__(self):
70*cdf0e10cSrcweir        AbstractL10nTool.__init__(self)
71*cdf0e10cSrcweir
72*cdf0e10cSrcweir    def parse_file(self, filename):
73*cdf0e10cSrcweir        document = ""
74*cdf0e10cSrcweir        try:
75*cdf0e10cSrcweir            f = open(filename, "r+")
76*cdf0e10cSrcweir            document = f.read()
77*cdf0e10cSrcweir        except IOError:
78*cdf0e10cSrcweir            print "ERROR: Can not read file " + filename
79*cdf0e10cSrcweir            sys.exit(-1)
80*cdf0e10cSrcweir        else:
81*cdf0e10cSrcweir            f.close()
82*cdf0e10cSrcweir        return xml.dom.minidom.parseString(document)
83*cdf0e10cSrcweir
84*cdf0e10cSrcweir
85*cdf0e10cSrcweir    def merge_file(self, inputfilename, outputfilename, parsed_file_ref, lang,is_forced_lang, sdfdata):
86*cdf0e10cSrcweir        if lang == "en-US":
87*cdf0e10cSrcweir            mod_outputfilename = outputfilename.replace("_en-US",'')
88*cdf0e10cSrcweir            self.make_dirs(mod_outputfilename)
89*cdf0e10cSrcweir            self.copy_file(inputfilename, mod_outputfilename)
90*cdf0e10cSrcweir            return
91*cdf0e10cSrcweir        dom = parsed_file_ref.cloneNode(True)
92*cdf0e10cSrcweir        #dom = self.parse_file(inputfilename)    # in case cloneNode is buggy just parse it always
93*cdf0e10cSrcweir
94*cdf0e10cSrcweir        self.merge_topic(dom.getElementsByTagName("topic"), sdfdata, lang, inputfilename, dom)
95*cdf0e10cSrcweir        self.merge_title(dom.getElementsByTagName("node"), sdfdata, lang, inputfilename)
96*cdf0e10cSrcweir        self.merge_title(dom.getElementsByTagName("help_section"), sdfdata, lang, inputfilename)
97*cdf0e10cSrcweir        self.make_dirs(outputfilename)
98*cdf0e10cSrcweir        try:
99*cdf0e10cSrcweir            f = open(outputfilename, "w+")
100*cdf0e10cSrcweir            str = dom.toxml()
101*cdf0e10cSrcweir            f.write(str.encode("utf-8"))
102*cdf0e10cSrcweir        except IOError:
103*cdf0e10cSrcweir            print "ERROR: Can not write file " + outputfilename
104*cdf0e10cSrcweir            sys.exit(-1)
105*cdf0e10cSrcweir        else:
106*cdf0e10cSrcweir            f.close()
107*cdf0e10cSrcweir
108*cdf0e10cSrcweir    ##### Helper for parse-once-use-often like parsing a xml file is needed implement it here
109*cdf0e10cSrcweir    def parse_file(self, filename):
110*cdf0e10cSrcweir        document = ""
111*cdf0e10cSrcweir        try:
112*cdf0e10cSrcweir            f = open(filename,"r")
113*cdf0e10cSrcweir            document = f.read()
114*cdf0e10cSrcweir        except IOError:
115*cdf0e10cSrcweir            print "ERROR: Can not read file " + filename
116*cdf0e10cSrcweir        else:
117*cdf0e10cSrcweir            f.close()
118*cdf0e10cSrcweir        return xml.dom.minidom.parseString(document)
119*cdf0e10cSrcweir
120*cdf0e10cSrcweir    ##### Extract a single File
121*cdf0e10cSrcweir    def extract_file(self, inputfile):
122*cdf0e10cSrcweir        sdf_data = []
123*cdf0e10cSrcweir        dom = self.parse_file(inputfile)
124*cdf0e10cSrcweir        sdf_data.extend(self.extract_topic(dom.getElementsByTagName("topic"), inputfile))
125*cdf0e10cSrcweir        sdf_data.extend(self.extract_title(dom.getElementsByTagName("help_section"), inputfile))
126*cdf0e10cSrcweir        sdf_data.extend(self.extract_title(dom.getElementsByTagName("node"), inputfile))
127*cdf0e10cSrcweir        return ''.join([str(line)+"\n" for line in sdf_data])
128*cdf0e10cSrcweir
129*cdf0e10cSrcweir    def prepare_sdf_line(self, inputfile="", lang="" , id="" , text=""):
130*cdf0e10cSrcweir        if lang == "":
131*cdf0e10cSrcweir            lang = self._source_language
132*cdf0e10cSrcweir        return SdfEntity(project=self._options.project_name, source_file=self.get_filename_string(inputfile),
133*cdf0e10cSrcweir                         resource_type=self._resource_type, gid=id, lid="", langid=lang,text=text)
134*cdf0e10cSrcweir
135*cdf0e10cSrcweirrun = Xhtex()
136*cdf0e10cSrcweir
137