xref: /aoo41x/main/l10ntools/scripts/tool/xhtex.py (revision cdf0e10c)
1#*************************************************************************
2#
3# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4#
5# Copyright 2000, 2010 Oracle and/or its affiliates.
6#
7# OpenOffice.org - a multi-platform office productivity suite
8#
9# This file is part of OpenOffice.org.
10#
11# OpenOffice.org is free software: you can redistribute it and/or modify
12# it under the terms of the GNU Lesser General Public License version 3
13# only, as published by the Free Software Foundation.
14#
15# OpenOffice.org is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU Lesser General Public License version 3 for more details
19# (a copy is included in the LICENSE file that accompanied this code).
20#
21# You should have received a copy of the GNU Lesser General Public License
22# version 3 along with OpenOffice.org.  If not, see
23# <http://www.openoffice.org/license.html>
24# for a copy of the LGPLv3 License.
25#
26#*************************************************************************
27
28from l10ntool import AbstractL10nTool
29from sdf import SdfEntity
30import sys
31import xml.dom.minidom
32
33class Xhtex(AbstractL10nTool):
34    _resource_type = "xht"
35    _sdfdata       = ()
36    _lang          = ""
37
38    # Extract methods
39    def extract_topic(self, list, inputfile):
40        topics = []
41        for elem in list:
42            if elem.childNodes[0].nodeType == elem.TEXT_NODE and len(elem.childNodes[0].data.strip()):
43                topics.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.childNodes[0].data, inputfile=inputfile))
44        return topics
45
46    def extract_title(self, list, inputfile):
47        titles = []
48        for elem in list:
49            if len(elem.getAttribute("title").strip()):
50                titles.append(self.prepare_sdf_line(id=elem.getAttribute("id").strip(), text=elem.getAttribute("title").strip(), inputfile=inputfile))
51        return titles
52
53    # Merge methods
54    def merge_topic(self, list, sdfdata, lang, inputfilename, dom):
55        for elem in list:
56            if elem.childNodes[0].nodeType == elem.TEXT_NODE and elem.getAttribute("id").strip():
57                obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
58                if sdfdata[obj.get_id()]:
59                    elem.childNodes[0].data = unicode(str(sdfdata[obj.get_id()].text),"utf8")
60
61
62    def merge_title(self, list, sdfdata, lang, inputfilename):
63        for elem in list:
64            obj = self.prepare_sdf_line(inputfile=inputfilename, lang=lang, id=elem.getAttribute("id").strip())
65            if elem.getAttribute("id").strip() and sdfdata[obj.get_id()]:
66                elem.setAttribute("title", unicode(str(sdfdata[obj.get_id()].text),"utf8"))
67
68    # L10N tool
69    def __init__(self):
70        AbstractL10nTool.__init__(self)
71
72    def parse_file(self, filename):
73        document = ""
74        try:
75            f = open(filename, "r+")
76            document = f.read()
77        except IOError:
78            print "ERROR: Can not read file " + filename
79            sys.exit(-1)
80        else:
81            f.close()
82        return xml.dom.minidom.parseString(document)
83
84
85    def merge_file(self, inputfilename, outputfilename, parsed_file_ref, lang,is_forced_lang, sdfdata):
86        if lang == "en-US":
87            mod_outputfilename = outputfilename.replace("_en-US",'')
88            self.make_dirs(mod_outputfilename)
89            self.copy_file(inputfilename, mod_outputfilename)
90            return
91        dom = parsed_file_ref.cloneNode(True)
92        #dom = self.parse_file(inputfilename)    # in case cloneNode is buggy just parse it always
93
94        self.merge_topic(dom.getElementsByTagName("topic"), sdfdata, lang, inputfilename, dom)
95        self.merge_title(dom.getElementsByTagName("node"), sdfdata, lang, inputfilename)
96        self.merge_title(dom.getElementsByTagName("help_section"), sdfdata, lang, inputfilename)
97        self.make_dirs(outputfilename)
98        try:
99            f = open(outputfilename, "w+")
100            str = dom.toxml()
101            f.write(str.encode("utf-8"))
102        except IOError:
103            print "ERROR: Can not write file " + outputfilename
104            sys.exit(-1)
105        else:
106            f.close()
107
108    ##### Helper for parse-once-use-often like parsing a xml file is needed implement it here
109    def parse_file(self, filename):
110        document = ""
111        try:
112            f = open(filename,"r")
113            document = f.read()
114        except IOError:
115            print "ERROR: Can not read file " + filename
116        else:
117            f.close()
118        return xml.dom.minidom.parseString(document)
119
120    ##### Extract a single File
121    def extract_file(self, inputfile):
122        sdf_data = []
123        dom = self.parse_file(inputfile)
124        sdf_data.extend(self.extract_topic(dom.getElementsByTagName("topic"), inputfile))
125        sdf_data.extend(self.extract_title(dom.getElementsByTagName("help_section"), inputfile))
126        sdf_data.extend(self.extract_title(dom.getElementsByTagName("node"), inputfile))
127        return ''.join([str(line)+"\n" for line in sdf_data])
128
129    def prepare_sdf_line(self, inputfile="", lang="" , id="" , text=""):
130        if lang == "":
131            lang = self._source_language
132        return SdfEntity(project=self._options.project_name, source_file=self.get_filename_string(inputfile),
133                         resource_type=self._resource_type, gid=id, lid="", langid=lang,text=text)
134
135run = Xhtex()
136
137