1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
25 
26 import org.w3c.dom.NodeList;
27 import org.w3c.dom.Node;
28 import org.w3c.dom.Element;
29 import org.w3c.dom.Text;
30 
31 import java.io.IOException;
32 import java.util.Enumeration;
33 
34 import org.openoffice.xmerge.Document;
35 import org.openoffice.xmerge.ConvertData;
36 import org.openoffice.xmerge.ConvertException;
37 import org.openoffice.xmerge.DocumentDeserializer;
38 import org.openoffice.xmerge.converter.xml.OfficeConstants;
39 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
40 import org.openoffice.xmerge.converter.palm.PalmDB;
41 import org.openoffice.xmerge.converter.palm.Record;
42 import org.openoffice.xmerge.converter.palm.PalmDocument;
43 import org.openoffice.xmerge.util.Debug;
44 
45 /**
46  *  <p>AportisDoc implementation of <code>DocumentDeserializer</code>
47  *  for the {@link
48  *  org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
49  *  PluginFactoryImpl}.</p>
50  *
51  *  <p>This converts an file in AportisDoc PDB format to StarOffice
52  *  XML format.</p>
53  *
54  *  <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
55  *  to read the AportisDoc format into a <code>String</code> object, then
56  *  it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
57  *  object from it.</p>
58  *
59  *  @author      Herbie Ong
60  */
61 public final class DocumentDeserializerImpl
62     implements OfficeConstants, DocConstants, DocumentDeserializer {
63 
64     /**  A <code>ConvertData</code> object assigned to this object. */
65     private ConvertData cd = null;
66 
67 
68     /**
69      *  Constructor that assigns the given <code>ConvertData</code>
70      *  to this object as input.
71      *
72      *  @param  cd  A <code>ConvertData</code> object to read data for
73      *              the conversion process by the <code>deserialize</code>
74      *              method.
75      */
DocumentDeserializerImpl(ConvertData cd)76     public DocumentDeserializerImpl(ConvertData cd) {
77         this.cd = cd;
78     }
79 
80 
81     /**
82      *  Convert the given <code>ConvertData</code> object
83      *  into a <code>SxwDocument</code> object.
84      *
85      *  @return  Resulting <code>SxwDocument</code> object.
86      *
87      *  @throws  ConvertException   If any conversion error occurs.
88      *  @throws  IOException        If any I/O error occurs.
89      */
deserialize()90     public Document deserialize() throws IOException, ConvertException {
91 
92         int numberOfPDBs = cd.getNumDocuments();
93         Document doc = null;
94         int i=0;
95         ConvertData cdOut;
96         Enumeration e = cd.getDocumentEnumeration();
97         while (e.hasMoreElements()) {
98             PalmDocument palmDoc = (PalmDocument) e.nextElement();
99             PalmDB pdb = palmDoc.getPdb();
100 
101             log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
102             log("<AportisDoc>");
103 
104             Record[] recs = pdb.getRecords();
105             String docName = palmDoc.getName();
106             DocDecoder decoder = new DocDecoder();
107             String text = decoder.parseRecords(recs);
108             doc = buildDocument(docName, text);
109 
110             log("</AportisDoc>");
111         }
112 
113         return doc;
114     }
115 
116 
117     /**
118      *  Parses the text content of an AportisDoc format and build a
119      *  <code>SxwDocument</code>.
120      *
121      *  @param  docName  Name of <code>Document</code>.
122      *  @param  str      Text content of AportisDoc format.
123      *
124      *  @return  Resulting <code>SxwDocument</code> object.
125      *
126      *  @throws  IOException  If any I/O error occurs.
127      */
buildDocument(String docName, String str)128     private SxwDocument buildDocument(String docName, String str)
129         throws IOException {
130 
131         // create minimum office xml document.
132         SxwDocument sxwDoc = new SxwDocument(docName);
133         sxwDoc.initContentDOM();
134 
135         org.w3c.dom.Document doc = sxwDoc.getContentDOM();
136 
137         // Grab hold of the office:body tag,
138         // Assume there should be one.
139         // This is where top level paragraphs will append to.
140         NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
141         Node bodyNode = list.item(0);
142 
143         // Store all the text in a character array.
144         char[] text = str.toCharArray();
145 
146         // startIndex has 2 purposes:
147         // if value is -1, it means that there are no text characters
148         // needed to be processed for a Text node.  if value >= 0, it
149         // is the index of the starting position of a text section
150         // for a Text node.
151         int startIndex = -1;
152 
153         // Create a paragraph node to start with.
154         Element paraNode = doc.createElement(TAG_PARAGRAPH);
155 
156         log("<PARA>");
157 
158         for (int i = 0; i < text.length; i++) {
159 
160             switch (text[i]) {
161 
162                 case TAB_CHAR:
163 
164                     // Check if there are text to be processed first.
165                     if (startIndex >= 0) {
166                         addTextNode(doc, paraNode, text, startIndex, i - 1);
167                         startIndex = -1;
168                     }
169 
170                     // Then, add tab element.
171                     Element tabNode = doc.createElement(TAG_TAB_STOP);
172                     paraNode.appendChild(tabNode);
173 
174                     log("<TAB/>");
175                     break;
176 
177                 case EOL_CHAR:
178 
179                     // Check if there are text to be processed first.
180                     if (startIndex >= 0) {
181                         addTextNode(doc, paraNode, text, startIndex, i - 1);
182                         startIndex = -1;
183                     }
184 
185                     // Then, add the current paragraph to body.
186                     bodyNode.appendChild(paraNode);
187 
188                     // Create another paragraph element.
189                     paraNode = doc.createElement(TAG_PARAGRAPH);
190 
191                     log("</PARA>");
192                     log("<PARA>");
193                     break;
194 
195                 case SPACE_CHAR:
196 
197                     // count is the number of space chars from i
198                     int count = 0;
199 
200                     // Do a look ahead and count the number of space chars
201                     while (text[i + 1 + count] == SPACE_CHAR) {
202                         count++;
203                     }
204 
205                     // Need to build a space node ONLY if count is > 1.
206 
207                     if (count > 0) {
208 
209                         // Check if there are text to be processed first
210                         if (startIndex >= 0) {
211                             addTextNode(doc, paraNode, text,
212                                         startIndex, i);
213                             startIndex = -1;
214                         }
215 
216                         // Then, create a space element
217                         // with the proper attribute.
218                         Element spaceNode = doc.createElement(TAG_SPACE);
219                         spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
220                             Integer.toString(count));
221 
222                         paraNode.appendChild(spaceNode);
223 
224                         // reposition i to the last space character.
225                         i += count;
226 
227                         log("<SPACE count=\"" + count + "\" />");
228 
229                     } else {
230 
231                         // If there are no chars for text node yet,
232                         // consider this one.
233                         if (startIndex < 0) {
234 
235                             startIndex = i;
236                             log("<TEXT>");
237                         }
238                     }
239 
240                     break;
241 
242                 default:
243 
244                     // If there are no chars for text node yet,
245                     // this should be the start.
246                     if (startIndex < 0) {
247 
248                         startIndex = i;
249                         log("<TEXT>");
250                     }
251 
252                     break;
253             }
254         }
255 
256         int lastIndex = text.length - 1;
257 
258         // Check if there are text to be processed first.
259 
260         if (startIndex >= 0) {
261             addTextNode(doc, paraNode, text, startIndex, lastIndex);
262         }
263 
264         // Then, add the last paragraph element if it is not added yet.
265         if (text[lastIndex] != EOL_CHAR) {
266             bodyNode.appendChild(paraNode);
267         }
268 
269         log("</PARA>");
270 
271         return sxwDoc;
272     }
273 
274 
275     /**
276      *  Add a Text <code>Node</code> to the given paragraph node with the
277      *  text starting at the given <code>startPos</code> until
278      *  <code>endPos</code>.
279      *
280      *  @param  doc       <code>org.w3c.dom.Document</code> object for creating
281      *                    <code>Node</code> objects.
282      *  @param  para      The current paragraph <code>Node</code> to append
283      *                    text <code>Node</code>.
284      *  @param  text      Array of characters containing text.
285      *  @param  startPos  Starting index position for text value.
286      *  @param  endPos    End index position for text value.
287      */
addTextNode(org.w3c.dom.Document doc, Node para, char text[], int startPos, int endPos)288     private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
289         int startPos, int endPos) {
290 
291         String str = new String(text, startPos, endPos - startPos + 1);
292         Text textNode = doc.createTextNode(str);
293         para.appendChild(textNode);
294         log(str);
295         log("</TEXT>");
296     }
297 
298     /**
299      *  Sends message to the log object.
300      *
301      *  @param  str  Debug message.
302      */
log(String str)303     private void log(String str) {
304 
305         Debug.log(Debug.TRACE, str);
306     }
307 }
308 
309