1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; 25 26 import org.w3c.dom.NodeList; 27 import org.w3c.dom.Node; 28 import org.w3c.dom.Element; 29 import org.w3c.dom.Text; 30 31 import java.io.IOException; 32 import java.util.Enumeration; 33 34 import org.openoffice.xmerge.Document; 35 import org.openoffice.xmerge.ConvertData; 36 import org.openoffice.xmerge.ConvertException; 37 import org.openoffice.xmerge.DocumentDeserializer; 38 import org.openoffice.xmerge.converter.xml.OfficeConstants; 39 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; 40 import org.openoffice.xmerge.converter.palm.PalmDB; 41 import org.openoffice.xmerge.converter.palm.Record; 42 import org.openoffice.xmerge.converter.palm.PalmDocument; 43 import org.openoffice.xmerge.util.Debug; 44 45 /** 46 * <p>AportisDoc implementation of <code>DocumentDeserializer</code> 47 * for the {@link 48 * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl 49 * PluginFactoryImpl}.</p> 50 * 51 * <p>This converts an file in AportisDoc PDB format to StarOffice 52 * XML format.</p> 53 * 54 * <p>The <code>deserialize</code> method uses a <code>DocDecoder</code> 55 * to read the AportisDoc format into a <code>String</code> object, then 56 * it calls <code>buildDocument</code> to create a <code>SxwDocument</code> 57 * object from it.</p> 58 * 59 * @author Herbie Ong 60 */ 61 public final class DocumentDeserializerImpl 62 implements OfficeConstants, DocConstants, DocumentDeserializer { 63 64 /** A <code>ConvertData</code> object assigned to this object. */ 65 private ConvertData cd = null; 66 67 68 /** 69 * Constructor that assigns the given <code>ConvertData</code> 70 * to this object as input. 71 * 72 * @param cd A <code>ConvertData</code> object to read data for 73 * the conversion process by the <code>deserialize</code> 74 * method. 75 */ DocumentDeserializerImpl(ConvertData cd)76 public DocumentDeserializerImpl(ConvertData cd) { 77 this.cd = cd; 78 } 79 80 81 /** 82 * Convert the given <code>ConvertData</code> object 83 * into a <code>SxwDocument</code> object. 84 * 85 * @return Resulting <code>SxwDocument</code> object. 86 * 87 * @throws ConvertException If any conversion error occurs. 88 * @throws IOException If any I/O error occurs. 89 */ deserialize()90 public Document deserialize() throws IOException, ConvertException { 91 92 int numberOfPDBs = cd.getNumDocuments(); 93 Document doc = null; 94 int i=0; 95 ConvertData cdOut; 96 Enumeration e = cd.getDocumentEnumeration(); 97 while (e.hasMoreElements()) { 98 PalmDocument palmDoc = (PalmDocument) e.nextElement(); 99 PalmDB pdb = palmDoc.getPdb(); 100 101 log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 102 log("<AportisDoc>"); 103 104 Record[] recs = pdb.getRecords(); 105 String docName = palmDoc.getName(); 106 DocDecoder decoder = new DocDecoder(); 107 String text = decoder.parseRecords(recs); 108 doc = buildDocument(docName, text); 109 110 log("</AportisDoc>"); 111 } 112 113 return doc; 114 } 115 116 117 /** 118 * Parses the text content of an AportisDoc format and build a 119 * <code>SxwDocument</code>. 120 * 121 * @param docName Name of <code>Document</code>. 122 * @param str Text content of AportisDoc format. 123 * 124 * @return Resulting <code>SxwDocument</code> object. 125 * 126 * @throws IOException If any I/O error occurs. 127 */ buildDocument(String docName, String str)128 private SxwDocument buildDocument(String docName, String str) 129 throws IOException { 130 131 // create minimum office xml document. 132 SxwDocument sxwDoc = new SxwDocument(docName); 133 sxwDoc.initContentDOM(); 134 135 org.w3c.dom.Document doc = sxwDoc.getContentDOM(); 136 137 // Grab hold of the office:body tag, 138 // Assume there should be one. 139 // This is where top level paragraphs will append to. 140 NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); 141 Node bodyNode = list.item(0); 142 143 // Store all the text in a character array. 144 char[] text = str.toCharArray(); 145 146 // startIndex has 2 purposes: 147 // if value is -1, it means that there are no text characters 148 // needed to be processed for a Text node. if value >= 0, it 149 // is the index of the starting position of a text section 150 // for a Text node. 151 int startIndex = -1; 152 153 // Create a paragraph node to start with. 154 Element paraNode = doc.createElement(TAG_PARAGRAPH); 155 156 log("<PARA>"); 157 158 for (int i = 0; i < text.length; i++) { 159 160 switch (text[i]) { 161 162 case TAB_CHAR: 163 164 // Check if there are text to be processed first. 165 if (startIndex >= 0) { 166 addTextNode(doc, paraNode, text, startIndex, i - 1); 167 startIndex = -1; 168 } 169 170 // Then, add tab element. 171 Element tabNode = doc.createElement(TAG_TAB_STOP); 172 paraNode.appendChild(tabNode); 173 174 log("<TAB/>"); 175 break; 176 177 case EOL_CHAR: 178 179 // Check if there are text to be processed first. 180 if (startIndex >= 0) { 181 addTextNode(doc, paraNode, text, startIndex, i - 1); 182 startIndex = -1; 183 } 184 185 // Then, add the current paragraph to body. 186 bodyNode.appendChild(paraNode); 187 188 // Create another paragraph element. 189 paraNode = doc.createElement(TAG_PARAGRAPH); 190 191 log("</PARA>"); 192 log("<PARA>"); 193 break; 194 195 case SPACE_CHAR: 196 197 // count is the number of space chars from i 198 int count = 0; 199 200 // Do a look ahead and count the number of space chars 201 while (text[i + 1 + count] == SPACE_CHAR) { 202 count++; 203 } 204 205 // Need to build a space node ONLY if count is > 1. 206 207 if (count > 0) { 208 209 // Check if there are text to be processed first 210 if (startIndex >= 0) { 211 addTextNode(doc, paraNode, text, 212 startIndex, i); 213 startIndex = -1; 214 } 215 216 // Then, create a space element 217 // with the proper attribute. 218 Element spaceNode = doc.createElement(TAG_SPACE); 219 spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, 220 Integer.toString(count)); 221 222 paraNode.appendChild(spaceNode); 223 224 // reposition i to the last space character. 225 i += count; 226 227 log("<SPACE count=\"" + count + "\" />"); 228 229 } else { 230 231 // If there are no chars for text node yet, 232 // consider this one. 233 if (startIndex < 0) { 234 235 startIndex = i; 236 log("<TEXT>"); 237 } 238 } 239 240 break; 241 242 default: 243 244 // If there are no chars for text node yet, 245 // this should be the start. 246 if (startIndex < 0) { 247 248 startIndex = i; 249 log("<TEXT>"); 250 } 251 252 break; 253 } 254 } 255 256 int lastIndex = text.length - 1; 257 258 // Check if there are text to be processed first. 259 260 if (startIndex >= 0) { 261 addTextNode(doc, paraNode, text, startIndex, lastIndex); 262 } 263 264 // Then, add the last paragraph element if it is not added yet. 265 if (text[lastIndex] != EOL_CHAR) { 266 bodyNode.appendChild(paraNode); 267 } 268 269 log("</PARA>"); 270 271 return sxwDoc; 272 } 273 274 275 /** 276 * Add a Text <code>Node</code> to the given paragraph node with the 277 * text starting at the given <code>startPos</code> until 278 * <code>endPos</code>. 279 * 280 * @param doc <code>org.w3c.dom.Document</code> object for creating 281 * <code>Node</code> objects. 282 * @param para The current paragraph <code>Node</code> to append 283 * text <code>Node</code>. 284 * @param text Array of characters containing text. 285 * @param startPos Starting index position for text value. 286 * @param endPos End index position for text value. 287 */ addTextNode(org.w3c.dom.Document doc, Node para, char text[], int startPos, int endPos)288 private void addTextNode(org.w3c.dom.Document doc, Node para, char text[], 289 int startPos, int endPos) { 290 291 String str = new String(text, startPos, endPos - startPos + 1); 292 Text textNode = doc.createTextNode(str); 293 para.appendChild(textNode); 294 log(str); 295 log("</TEXT>"); 296 } 297 298 /** 299 * Sends message to the log object. 300 * 301 * @param str Debug message. 302 */ log(String str)303 private void log(String str) { 304 305 Debug.log(Debug.TRACE, str); 306 } 307 } 308 309