1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #ifndef HELPCOMPILER_HXX
29*cdf0e10cSrcweir #define HELPCOMPILER_HXX
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include <string>
32*cdf0e10cSrcweir #include <hash_map>
33*cdf0e10cSrcweir #include <vector>
34*cdf0e10cSrcweir #include <list>
35*cdf0e10cSrcweir #include <fstream>
36*cdf0e10cSrcweir #include <iostream>
37*cdf0e10cSrcweir #include <sstream>
38*cdf0e10cSrcweir #include <algorithm>
39*cdf0e10cSrcweir #include <ctype.h>
40*cdf0e10cSrcweir #ifdef SYSTEM_DB
41*cdf0e10cSrcweir #include <db.h>
42*cdf0e10cSrcweir #else
43*cdf0e10cSrcweir #include <berkeleydb/db.h>
44*cdf0e10cSrcweir #endif
45*cdf0e10cSrcweir 
46*cdf0e10cSrcweir #include <boost/shared_ptr.hpp>
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir #include <libxml/xmlmemory.h>
49*cdf0e10cSrcweir #include <libxml/debugXML.h>
50*cdf0e10cSrcweir #include <libxml/HTMLtree.h>
51*cdf0e10cSrcweir #include <libxml/xmlIO.h>
52*cdf0e10cSrcweir #include <libxml/xinclude.h>
53*cdf0e10cSrcweir #include <libxml/catalog.h>
54*cdf0e10cSrcweir 
55*cdf0e10cSrcweir #include <rtl/ustring.hxx>
56*cdf0e10cSrcweir #include <osl/thread.h>
57*cdf0e10cSrcweir #include <osl/process.h>
58*cdf0e10cSrcweir #include <osl/file.hxx>
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir #include <compilehelp.hxx>
61*cdf0e10cSrcweir 
62*cdf0e10cSrcweir #define EMULATEORIGINAL 1
63*cdf0e10cSrcweir 
64*cdf0e10cSrcweir #ifdef CMCDEBUG
65*cdf0e10cSrcweir     #define HCDBG(foo) do { if (1) foo; } while(0)
66*cdf0e10cSrcweir #else
67*cdf0e10cSrcweir     #define HCDBG(foo) do { if (0) foo; } while(0)
68*cdf0e10cSrcweir #endif
69*cdf0e10cSrcweir 
70*cdf0e10cSrcweir namespace fs
71*cdf0e10cSrcweir {
72*cdf0e10cSrcweir 	rtl_TextEncoding getThreadTextEncoding( void );
73*cdf0e10cSrcweir 
74*cdf0e10cSrcweir     enum convert { native };
75*cdf0e10cSrcweir     class path
76*cdf0e10cSrcweir     {
77*cdf0e10cSrcweir     public:
78*cdf0e10cSrcweir         ::rtl::OUString data;
79*cdf0e10cSrcweir     public:
80*cdf0e10cSrcweir         path() {}
81*cdf0e10cSrcweir         path(const path &rOther) : data(rOther.data) {}
82*cdf0e10cSrcweir         path(const std::string &in, convert)
83*cdf0e10cSrcweir         {
84*cdf0e10cSrcweir             rtl::OUString sWorkingDir;
85*cdf0e10cSrcweir             osl_getProcessWorkingDir(&sWorkingDir.pData);
86*cdf0e10cSrcweir 
87*cdf0e10cSrcweir             rtl::OString tmp(in.c_str());
88*cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
89*cdf0e10cSrcweir             osl::File::getFileURLFromSystemPath(ustrSystemPath, data);
90*cdf0e10cSrcweir             osl::File::getAbsoluteFileURL(sWorkingDir, data, data);
91*cdf0e10cSrcweir         }
92*cdf0e10cSrcweir         path(const std::string &FileURL)
93*cdf0e10cSrcweir 		{
94*cdf0e10cSrcweir             rtl::OString tmp(FileURL.c_str());
95*cdf0e10cSrcweir             data = rtl::OStringToOUString(tmp, getThreadTextEncoding());
96*cdf0e10cSrcweir 		}
97*cdf0e10cSrcweir         std::string native_file_string() const
98*cdf0e10cSrcweir         {
99*cdf0e10cSrcweir             ::rtl::OUString ustrSystemPath;
100*cdf0e10cSrcweir             osl::File::getSystemPathFromFileURL(data, ustrSystemPath);
101*cdf0e10cSrcweir             rtl::OString tmp(rtl::OUStringToOString(ustrSystemPath, getThreadTextEncoding()));
102*cdf0e10cSrcweir             HCDBG(std::cerr << "native_file_string is " << tmp.getStr() << std::endl);
103*cdf0e10cSrcweir             return std::string(tmp.getStr());
104*cdf0e10cSrcweir         }
105*cdf0e10cSrcweir #ifdef WNT
106*cdf0e10cSrcweir         wchar_t const * native_file_string_w() const
107*cdf0e10cSrcweir         {
108*cdf0e10cSrcweir             ::rtl::OUString ustrSystemPath;
109*cdf0e10cSrcweir             osl::File::getSystemPathFromFileURL(data, ustrSystemPath);
110*cdf0e10cSrcweir             return reinterpret_cast< wchar_t const * >(ustrSystemPath.getStr());
111*cdf0e10cSrcweir         }
112*cdf0e10cSrcweir #endif
113*cdf0e10cSrcweir         std::string native_directory_string() const { return native_file_string(); }
114*cdf0e10cSrcweir         std::string toUTF8() const
115*cdf0e10cSrcweir         {
116*cdf0e10cSrcweir             rtl::OString tmp(rtl::OUStringToOString(data, RTL_TEXTENCODING_UTF8));
117*cdf0e10cSrcweir             return std::string(tmp.getStr());
118*cdf0e10cSrcweir         }
119*cdf0e10cSrcweir         bool empty() const { return data.getLength() == 0; }
120*cdf0e10cSrcweir         path operator/(const std::string &in) const
121*cdf0e10cSrcweir         {
122*cdf0e10cSrcweir             path ret(*this);
123*cdf0e10cSrcweir             HCDBG(std::cerr << "orig was " <<
124*cdf0e10cSrcweir                 rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl);
125*cdf0e10cSrcweir             rtl::OString tmp(in.c_str());
126*cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
127*cdf0e10cSrcweir             ret.data += rtl::OUString(sal_Unicode('/'));
128*cdf0e10cSrcweir             ret.data += ustrSystemPath;
129*cdf0e10cSrcweir             HCDBG(std::cerr << "final is " <<
130*cdf0e10cSrcweir                 rtl::OUStringToOString(ret.data, RTL_TEXTENCODING_UTF8).getStr() << std::endl);
131*cdf0e10cSrcweir             return ret;
132*cdf0e10cSrcweir         }
133*cdf0e10cSrcweir         void append(const char *in)
134*cdf0e10cSrcweir         {
135*cdf0e10cSrcweir             rtl::OString tmp(in);
136*cdf0e10cSrcweir             rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
137*cdf0e10cSrcweir             data = data + ustrSystemPath;
138*cdf0e10cSrcweir         }
139*cdf0e10cSrcweir         void append(const std::string &in) { append(in.c_str()); }
140*cdf0e10cSrcweir     };
141*cdf0e10cSrcweir 
142*cdf0e10cSrcweir     void create_directory(const fs::path indexDirName);
143*cdf0e10cSrcweir     void rename(const fs::path &src, const fs::path &dest);
144*cdf0e10cSrcweir     void copy(const fs::path &src, const fs::path &dest);
145*cdf0e10cSrcweir     bool exists(const fs::path &in);
146*cdf0e10cSrcweir     void remove_all(const fs::path &in);
147*cdf0e10cSrcweir     void remove(const fs::path &in);
148*cdf0e10cSrcweir }
149*cdf0e10cSrcweir 
150*cdf0e10cSrcweir struct joaat_hash
151*cdf0e10cSrcweir {
152*cdf0e10cSrcweir     size_t operator()(const std::string &str) const
153*cdf0e10cSrcweir     {
154*cdf0e10cSrcweir         size_t hash = 0;
155*cdf0e10cSrcweir         const char *key = str.data();
156*cdf0e10cSrcweir         for (size_t i = 0; i < str.size(); i++)
157*cdf0e10cSrcweir         {
158*cdf0e10cSrcweir             hash += key[i];
159*cdf0e10cSrcweir             hash += (hash << 10);
160*cdf0e10cSrcweir             hash ^= (hash >> 6);
161*cdf0e10cSrcweir         }
162*cdf0e10cSrcweir         hash += (hash << 3);
163*cdf0e10cSrcweir         hash ^= (hash >> 11);
164*cdf0e10cSrcweir         hash += (hash << 15);
165*cdf0e10cSrcweir         return hash;
166*cdf0e10cSrcweir     }
167*cdf0e10cSrcweir };
168*cdf0e10cSrcweir 
169*cdf0e10cSrcweir #define get16bits(d) ((((sal_uInt32)(((const sal_uInt8 *)(d))[1])) << 8)\
170*cdf0e10cSrcweir                        +(sal_uInt32)(((const sal_uInt8 *)(d))[0]) )
171*cdf0e10cSrcweir 
172*cdf0e10cSrcweir struct SuperFastHash
173*cdf0e10cSrcweir {
174*cdf0e10cSrcweir     size_t operator()(const std::string &str) const
175*cdf0e10cSrcweir     {
176*cdf0e10cSrcweir         const char * data = str.data();
177*cdf0e10cSrcweir         int len = str.size();
178*cdf0e10cSrcweir         size_t hash = len, tmp;
179*cdf0e10cSrcweir         if (len <= 0 || data == NULL) return 0;
180*cdf0e10cSrcweir 
181*cdf0e10cSrcweir         int rem = len & 3;
182*cdf0e10cSrcweir         len >>= 2;
183*cdf0e10cSrcweir 
184*cdf0e10cSrcweir         /* Main loop */
185*cdf0e10cSrcweir         for (;len > 0; len--)
186*cdf0e10cSrcweir         {
187*cdf0e10cSrcweir             hash  += get16bits (data);
188*cdf0e10cSrcweir             tmp    = (get16bits (data+2) << 11) ^ hash;
189*cdf0e10cSrcweir             hash   = (hash << 16) ^ tmp;
190*cdf0e10cSrcweir             data  += 2*sizeof (sal_uInt16);
191*cdf0e10cSrcweir             hash  += hash >> 11;
192*cdf0e10cSrcweir         }
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir         /* Handle end cases */
195*cdf0e10cSrcweir         switch (rem)
196*cdf0e10cSrcweir         {
197*cdf0e10cSrcweir             case 3: hash += get16bits (data);
198*cdf0e10cSrcweir                     hash ^= hash << 16;
199*cdf0e10cSrcweir                     hash ^= data[sizeof (sal_uInt16)] << 18;
200*cdf0e10cSrcweir                     hash += hash >> 11;
201*cdf0e10cSrcweir                     break;
202*cdf0e10cSrcweir             case 2: hash += get16bits (data);
203*cdf0e10cSrcweir                     hash ^= hash << 11;
204*cdf0e10cSrcweir                     hash += hash >> 17;
205*cdf0e10cSrcweir                     break;
206*cdf0e10cSrcweir             case 1: hash += *data;
207*cdf0e10cSrcweir                     hash ^= hash << 10;
208*cdf0e10cSrcweir                     hash += hash >> 1;
209*cdf0e10cSrcweir         }
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir         /* Force "avalanching" of final 127 bits */
212*cdf0e10cSrcweir         hash ^= hash << 3;
213*cdf0e10cSrcweir         hash += hash >> 5;
214*cdf0e10cSrcweir         hash ^= hash << 4;
215*cdf0e10cSrcweir         hash += hash >> 17;
216*cdf0e10cSrcweir         hash ^= hash << 25;
217*cdf0e10cSrcweir         hash += hash >> 6;
218*cdf0e10cSrcweir 
219*cdf0e10cSrcweir         return hash;
220*cdf0e10cSrcweir     }
221*cdf0e10cSrcweir };
222*cdf0e10cSrcweir 
223*cdf0e10cSrcweir #define pref_hash joaat_hash
224*cdf0e10cSrcweir 
225*cdf0e10cSrcweir typedef std::hash_map<std::string, std::string, pref_hash> Stringtable;
226*cdf0e10cSrcweir typedef std::list<std::string> LinkedList;
227*cdf0e10cSrcweir typedef std::vector<std::string> HashSet;
228*cdf0e10cSrcweir 
229*cdf0e10cSrcweir typedef std::hash_map<std::string, LinkedList, pref_hash> Hashtable;
230*cdf0e10cSrcweir 
231*cdf0e10cSrcweir class StreamTable
232*cdf0e10cSrcweir {
233*cdf0e10cSrcweir public:
234*cdf0e10cSrcweir     std::string document_id;
235*cdf0e10cSrcweir     std::string document_path;
236*cdf0e10cSrcweir     std::string document_module;
237*cdf0e10cSrcweir     std::string document_title;
238*cdf0e10cSrcweir 
239*cdf0e10cSrcweir     HashSet *appl_hidlist;
240*cdf0e10cSrcweir     Hashtable *appl_keywords;
241*cdf0e10cSrcweir     Stringtable *appl_helptexts;
242*cdf0e10cSrcweir     xmlDocPtr appl_doc;
243*cdf0e10cSrcweir 
244*cdf0e10cSrcweir     HashSet *default_hidlist;
245*cdf0e10cSrcweir     Hashtable *default_keywords;
246*cdf0e10cSrcweir     Stringtable *default_helptexts;
247*cdf0e10cSrcweir     xmlDocPtr default_doc;
248*cdf0e10cSrcweir 
249*cdf0e10cSrcweir     StreamTable() :
250*cdf0e10cSrcweir         appl_hidlist(NULL), appl_keywords(NULL), appl_helptexts(NULL), appl_doc(NULL),
251*cdf0e10cSrcweir         default_hidlist(NULL), default_keywords(NULL), default_helptexts(NULL), default_doc(NULL)
252*cdf0e10cSrcweir     {}
253*cdf0e10cSrcweir     void dropdefault()
254*cdf0e10cSrcweir     {
255*cdf0e10cSrcweir         delete default_hidlist;
256*cdf0e10cSrcweir         delete default_keywords;
257*cdf0e10cSrcweir         delete default_helptexts;
258*cdf0e10cSrcweir         if (default_doc) xmlFreeDoc(default_doc);
259*cdf0e10cSrcweir     }
260*cdf0e10cSrcweir     void dropappl()
261*cdf0e10cSrcweir     {
262*cdf0e10cSrcweir         delete appl_hidlist;
263*cdf0e10cSrcweir         delete appl_keywords;
264*cdf0e10cSrcweir         delete appl_helptexts;
265*cdf0e10cSrcweir         if (appl_doc) xmlFreeDoc(appl_doc);
266*cdf0e10cSrcweir     }
267*cdf0e10cSrcweir     ~StreamTable()
268*cdf0e10cSrcweir     {
269*cdf0e10cSrcweir         dropappl();
270*cdf0e10cSrcweir         dropdefault();
271*cdf0e10cSrcweir     }
272*cdf0e10cSrcweir };
273*cdf0e10cSrcweir 
274*cdf0e10cSrcweir struct HelpProcessingException
275*cdf0e10cSrcweir {
276*cdf0e10cSrcweir 	HelpProcessingErrorClass		m_eErrorClass;
277*cdf0e10cSrcweir 	std::string						m_aErrorMsg;
278*cdf0e10cSrcweir 	std::string						m_aXMLParsingFile;
279*cdf0e10cSrcweir 	int								m_nXMLParsingLine;
280*cdf0e10cSrcweir 
281*cdf0e10cSrcweir 	HelpProcessingException( HelpProcessingErrorClass eErrorClass, const std::string& aErrorMsg )
282*cdf0e10cSrcweir 		: m_eErrorClass( eErrorClass )
283*cdf0e10cSrcweir 		, m_aErrorMsg( aErrorMsg )
284*cdf0e10cSrcweir 	{}
285*cdf0e10cSrcweir 	HelpProcessingException( const std::string& aErrorMsg, const std::string& aXMLParsingFile, int nXMLParsingLine )
286*cdf0e10cSrcweir 		: m_eErrorClass( HELPPROCESSING_XMLPARSING_ERROR )
287*cdf0e10cSrcweir 		, m_aErrorMsg( aErrorMsg )
288*cdf0e10cSrcweir 		, m_aXMLParsingFile( aXMLParsingFile )
289*cdf0e10cSrcweir 		, m_nXMLParsingLine( nXMLParsingLine )
290*cdf0e10cSrcweir 	{}
291*cdf0e10cSrcweir };
292*cdf0e10cSrcweir 
293*cdf0e10cSrcweir class HelpCompiler
294*cdf0e10cSrcweir {
295*cdf0e10cSrcweir public:
296*cdf0e10cSrcweir     HelpCompiler(StreamTable &streamTable,
297*cdf0e10cSrcweir                 const fs::path &in_inputFile,
298*cdf0e10cSrcweir                 const fs::path &in_src,
299*cdf0e10cSrcweir                 const fs::path &in_resEmbStylesheet,
300*cdf0e10cSrcweir                 const std::string &in_module,
301*cdf0e10cSrcweir                 const std::string &in_lang,
302*cdf0e10cSrcweir 				bool in_bExtensionMode);
303*cdf0e10cSrcweir     bool compile( void ) throw (HelpProcessingException);
304*cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
305*cdf0e10cSrcweir         const std::string &entryName, const std::string &bytesToAdd);
306*cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
307*cdf0e10cSrcweir                 const std::string &entryName, const HashSet &bytesToAdd);
308*cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
309*cdf0e10cSrcweir                 const std::string &entryName, const Stringtable &bytesToAdd);
310*cdf0e10cSrcweir     void addEntryToJarFile(const std::string &prefix,
311*cdf0e10cSrcweir                 const std::string &entryName, const Hashtable &bytesToAdd);
312*cdf0e10cSrcweir private:
313*cdf0e10cSrcweir     xmlDocPtr getSourceDocument(const fs::path &filePath);
314*cdf0e10cSrcweir     HashSet switchFind(xmlDocPtr doc);
315*cdf0e10cSrcweir     xmlNodePtr clone(xmlNodePtr node, const std::string& appl);
316*cdf0e10cSrcweir     StreamTable &streamTable;
317*cdf0e10cSrcweir     const fs::path inputFile, src;
318*cdf0e10cSrcweir     const std::string module, lang;
319*cdf0e10cSrcweir     const fs::path resEmbStylesheet;
320*cdf0e10cSrcweir 	bool bExtensionMode;
321*cdf0e10cSrcweir };
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir #endif
324*cdf0e10cSrcweir 
325*cdf0e10cSrcweir /* vi:set tabstop=4 shiftwidth=4 expandtab: */
326