/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.w3c.dom.Element; import org.w3c.dom.Text; import java.io.IOException; import java.util.Enumeration; import org.openoffice.xmerge.Document; import org.openoffice.xmerge.ConvertData; import org.openoffice.xmerge.ConvertException; import org.openoffice.xmerge.DocumentDeserializer; import org.openoffice.xmerge.converter.xml.OfficeConstants; import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; import org.openoffice.xmerge.converter.palm.PalmDB; import org.openoffice.xmerge.converter.palm.Record; import org.openoffice.xmerge.converter.palm.PalmDocument; import org.openoffice.xmerge.util.Debug; /** *

AportisDoc implementation of DocumentDeserializer * for the {@link * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl * PluginFactoryImpl}.

* *

This converts an file in AportisDoc PDB format to StarOffice * XML format.

* *

The deserialize method uses a DocDecoder * to read the AportisDoc format into a String object, then * it calls buildDocument to create a SxwDocument * object from it.

* * @author Herbie Ong */ public final class DocumentDeserializerImpl implements OfficeConstants, DocConstants, DocumentDeserializer { /** A ConvertData object assigned to this object. */ private ConvertData cd = null; /** * Constructor that assigns the given ConvertData * to this object as input. * * @param cd A ConvertData object to read data for * the conversion process by the deserialize * method. */ public DocumentDeserializerImpl(ConvertData cd) { this.cd = cd; } /** * Convert the given ConvertData object * into a SxwDocument object. * * @return Resulting SxwDocument object. * * @throws ConvertException If any conversion error occurs. * @throws IOException If any I/O error occurs. */ public Document deserialize() throws IOException, ConvertException { int numberOfPDBs = cd.getNumDocuments(); Document doc = null; int i=0; ConvertData cdOut; Enumeration e = cd.getDocumentEnumeration(); while (e.hasMoreElements()) { PalmDocument palmDoc = (PalmDocument) e.nextElement(); PalmDB pdb = palmDoc.getPdb(); log(""); log(""); Record[] recs = pdb.getRecords(); String docName = palmDoc.getName(); DocDecoder decoder = new DocDecoder(); String text = decoder.parseRecords(recs); doc = buildDocument(docName, text); log(""); } return doc; } /** * Parses the text content of an AportisDoc format and build a * SxwDocument. * * @param docName Name of Document. * @param str Text content of AportisDoc format. * * @return Resulting SxwDocument object. * * @throws IOException If any I/O error occurs. */ private SxwDocument buildDocument(String docName, String str) throws IOException { // create minimum office xml document. SxwDocument sxwDoc = new SxwDocument(docName); sxwDoc.initContentDOM(); org.w3c.dom.Document doc = sxwDoc.getContentDOM(); // Grab hold of the office:body tag, // Assume there should be one. // This is where top level paragraphs will append to. NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); Node bodyNode = list.item(0); // Store all the text in a character array. char[] text = str.toCharArray(); // startIndex has 2 purposes: // if value is -1, it means that there are no text characters // needed to be processed for a Text node. if value >= 0, it // is the index of the starting position of a text section // for a Text node. int startIndex = -1; // Create a paragraph node to start with. Element paraNode = doc.createElement(TAG_PARAGRAPH); log(""); for (int i = 0; i < text.length; i++) { switch (text[i]) { case TAB_CHAR: // Check if there are text to be processed first. if (startIndex >= 0) { addTextNode(doc, paraNode, text, startIndex, i - 1); startIndex = -1; } // Then, add tab element. Element tabNode = doc.createElement(TAG_TAB_STOP); paraNode.appendChild(tabNode); log(""); break; case EOL_CHAR: // Check if there are text to be processed first. if (startIndex >= 0) { addTextNode(doc, paraNode, text, startIndex, i - 1); startIndex = -1; } // Then, add the current paragraph to body. bodyNode.appendChild(paraNode); // Create another paragraph element. paraNode = doc.createElement(TAG_PARAGRAPH); log(""); log(""); break; case SPACE_CHAR: // count is the number of space chars from i int count = 0; // Do a look ahead and count the number of space chars while (text[i + 1 + count] == SPACE_CHAR) { count++; } // Need to build a space node ONLY if count is > 1. if (count > 0) { // Check if there are text to be processed first if (startIndex >= 0) { addTextNode(doc, paraNode, text, startIndex, i); startIndex = -1; } // Then, create a space element // with the proper attribute. Element spaceNode = doc.createElement(TAG_SPACE); spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, Integer.toString(count)); paraNode.appendChild(spaceNode); // reposition i to the last space character. i += count; log(""); } else { // If there are no chars for text node yet, // consider this one. if (startIndex < 0) { startIndex = i; log(""); } } break; default: // If there are no chars for text node yet, // this should be the start. if (startIndex < 0) { startIndex = i; log(""); } break; } } int lastIndex = text.length - 1; // Check if there are text to be processed first. if (startIndex >= 0) { addTextNode(doc, paraNode, text, startIndex, lastIndex); } // Then, add the last paragraph element if it is not added yet. if (text[lastIndex] != EOL_CHAR) { bodyNode.appendChild(paraNode); } log(""); return sxwDoc; } /** * Add a Text Node to the given paragraph node with the * text starting at the given startPos until * endPos. * * @param doc org.w3c.dom.Document object for creating * Node objects. * @param para The current paragraph Node to append * text Node. * @param text Array of characters containing text. * @param startPos Starting index position for text value. * @param endPos End index position for text value. */ private void addTextNode(org.w3c.dom.Document doc, Node para, char text[], int startPos, int endPos) { String str = new String(text, startPos, endPos - startPos + 1); Text textNode = doc.createTextNode(str); para.appendChild(textNode); log(str); log(""); } /** * Sends message to the log object. * * @param str Debug message. */ private void log(String str) { Debug.log(Debug.TRACE, str); } }