/************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *************************************************************/ package org.openoffice.xmerge.converter.xml; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.BufferedReader; import java.io.StringReader; import java.io.StringWriter; import java.io.InputStreamReader; import java.io.ByteArrayOutputStream; import java.io.ByteArrayInputStream; import java.io.IOException; import java.util.Iterator; import java.util.Map; import java.util.HashMap; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Node; import org.w3c.dom.Element; import org.w3c.dom.Document; import org.w3c.dom.DOMImplementation; import org.w3c.dom.DocumentType; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.w3c.dom.NamedNodeMap; import org.xml.sax.SAXException; import javax.xml.transform.*; import javax.xml.transform.dom.*; import javax.xml.transform.stream.*; import org.openoffice.xmerge.util.Resources; import org.openoffice.xmerge.util.Debug; /** * An implementation of Document for * StarOffice documents. */ public abstract class OfficeDocument implements org.openoffice.xmerge.Document, OfficeConstants { /** Factory for DocumentBuilder objects. */ private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); /** DOM Document of content.xml. */ private Document contentDoc = null; /** DOM Document of meta.xml. */ private Document metaDoc = null; /** DOM Document of settings.xml. */ private Document settingsDoc = null; /** DOM Document of content.xml. */ private Document styleDoc = null; /** DOM Docuemtn of META-INF/manifest.xml. */ private Document manifestDoc = null; private String documentName = null; private String fileName = null; /** Resources object. */ private Resources res = null; /** * OfficeZip object to store zip contents from * read InputStream. Note that this member * will still be null if it was initialized using a template * file instead of reading from a StarOffice zipped * XML file. */ private OfficeZip zip = null; /** Collection to keep track of the embedded objects in the document. */ private Map embeddedObjects = null; /** * Default constructor. * * @param name Document name. */ public OfficeDocument(String name) { this(name, true, false); } /** * Constructor with arguments to set namespaceAware * and validating flags. * * @param name Document name (may or may not * contain extension). * @param namespaceAware Value for namespaceAware flag. * @param validating Value for validating flag. */ public OfficeDocument(String name, boolean namespaceAware, boolean validating) { res = Resources.getInstance(); factory.setValidating(validating); factory.setNamespaceAware(namespaceAware); this.documentName = trimDocumentName(name); this.fileName = documentName + getFileExtension(); } /** * Removes the file extension from the Document * name. * * @param name Full Document name with extension. * * @return Name of Document without the extension. */ private String trimDocumentName(String name) { String temp = name.toLowerCase(); String ext = getFileExtension(); if (temp.endsWith(ext)) { // strip the extension int nlen = name.length(); int endIndex = nlen - ext.length(); name = name.substring(0,endIndex); } return name; } /** * Return a DOM Document object of the content.xml * file. Note that a content DOM is not created when the constructor * is called. So, either the read method or the * initContentDOM method will need to be called ahead * on this object before calling this method. * * @return DOM Document object. */ public Document getContentDOM() { return contentDoc; } /** * Return a DOM Document object of the meta.xml * file. Note that a content DOM is not created when the constructor * is called. So, either the read method or the * initContentDOM method will need to be called ahead * on this object before calling this method. * * @return DOM Document object. */ public Document getMetaDOM() { return metaDoc; } /** * Return a DOM Document object of the settings.xml * file. Note that a content DOM is not created when the constructor * is called. So, either the read method or the * initContentDOM method will need to be called ahead * on this object before calling this method. * * @return DOM Document object. */ public Document getSettingsDOM() { return settingsDoc; } /** * Sets the content tree of the document. * * @param newDom Node containing the new content tree. */ public void setContentDOM( Node newDom) { contentDoc = (Document)newDom; } /** * Sets the meta tree of the document. * * @param newDom Node containing the new meta tree. */ public void setMetaDOM (Node newDom) { metaDoc = (Document)newDom; } /** * Sets the settings tree of the document. * * @param newDom Node containing the new settings tree. */ public void setSettingsDOM (Node newDom) { settingsDoc = (Document)newDom; } /** * Sets the style tree of the document. * * @param newDom Node containing the new style tree. */ public void setStyleDOM (Node newDom) { styleDoc = (Document)newDom; } /** * Return a DOM Document object of the style.xml file. * Note that this may return null if there is no style DOM. * Note that a style DOM is not created when the constructor * is called. Depending on the InputStream, a * read method may or may not build a style DOM. When * creating a new style DOM, call the initStyleDOM method * first. * * @return DOM Document object. */ public Document getStyleDOM() { return styleDoc; } /** * Return the name of the Document. * * @return The name of Document. */ public String getName() { return documentName; } /** * Return the file name of the Document, possibly * with the standard extension. * * @return The file name of Document. */ public String getFileName() { return fileName; } /** * Returns the file extension for this type of * Document. * * @return The file extension of Document. */ protected abstract String getFileExtension(); /** * Returns all the embedded objects (graphics, formulae, etc.) present in * this document. * * @return An Iterator of EmbeddedObject objects. */ public Iterator getEmbeddedObjects() { if (embeddedObjects == null && manifestDoc != null) { embeddedObjects = new HashMap(); // Need to read the manifest file and construct a list of objects NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE); // Dont create the HashMap if there are no embedded objects int len = nl.getLength(); for (int i = 0; i < len; i++) { Node n = nl.item(i); NamedNodeMap attrs = n.getAttributes(); String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue(); String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue(); /* * According to OpenOffice.org XML File Format document (ver. 1) * there are only two types of embedded object: * * Objects with an XML representation. * Objects without an XML representation. * * The former are represented by one or more XML files. * The latter are in binary form. */ if (type.startsWith("application/vnd.sun.xml")) { if (path.equals("/")) { // Exclude the main document entries continue; } // Take off the trailing '/' String name = path.substring(0, path.length() - 1); embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip)); } else if (type.equals("text/xml")) { // XML entries are either embedded StarOffice doc entries or main // document entries continue; } else { // FIX (HJ): allows empty MIME type embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip)); } } } return embeddedObjects.values().iterator(); } /** * Returns the embedded object corresponding to the name provided. * The name should be stripped of any preceding path characters, such as * '/', '.' or '#'. * * @param name The name of the embedded object to retrieve. * * @return An EmbeddedObject instance representing the named * object. */ public EmbeddedObject getEmbeddedObject(String name) { if (name == null) { return null; } if (embeddedObjects == null) { getEmbeddedObjects(); } if (embeddedObjects.containsKey(name)) { return (EmbeddedObject)embeddedObjects.get(name); } else { return null; } } /** * Adds a new embedded object to the document. * * @param embObj An instance of EmbeddedObject. */ public void addEmbeddedObject(EmbeddedObject embObj) { if (embObj == null) { return; } if (embeddedObjects == null) { embeddedObjects = new HashMap(); } embeddedObjects.put(embObj.getName(), embObj); } /** * Read the Office Document from the given * InputStream. * * @param is Office document InputStream. * * @throws IOException If any I/O error occurs. */ public void read(InputStream is) throws IOException { Debug.log(Debug.INFO, "reading Office file"); DocumentBuilder builder = null; try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException ex) { throw new OfficeDocumentException(ex); } // read in Office zip file format zip = new OfficeZip(); zip.read(is); // grab the content.xml and // parse it into contentDoc. byte contentBytes[] = zip.getContentXMLBytes(); if (contentBytes == null) { throw new OfficeDocumentException("Entry content.xml not found in file"); } try { contentDoc = parse(builder, contentBytes); } catch (SAXException ex) { throw new OfficeDocumentException(ex); } // if style.xml exists, grab the style.xml // parse it into styleDoc. byte styleBytes[] = zip.getStyleXMLBytes(); if (styleBytes != null) { try { styleDoc = parse(builder, styleBytes); } catch (SAXException ex) { throw new OfficeDocumentException(ex); } } byte metaBytes[] = zip.getMetaXMLBytes(); if (metaBytes != null) { try { metaDoc = parse(builder, metaBytes); } catch (SAXException ex) { throw new OfficeDocumentException(ex); } } byte settingsBytes[] = zip.getSettingsXMLBytes(); if (settingsBytes != null) { try { settingsDoc = parse(builder, settingsBytes); } catch (SAXException ex) { throw new OfficeDocumentException(ex); } } // Read in the META-INF/manifest.xml file byte manifestBytes[] = zip.getManifestXMLBytes(); if (manifestBytes != null) { try { manifestDoc = parse(builder, manifestBytes); } catch (SAXException ex) { throw new OfficeDocumentException(ex); } } } /** * Read the Office Document from the given * InputStream. * * @param is Office document InputStream. * @param isZip boolean Identifies whether * a file is zipped or not * * @throws IOException If any I/O error occurs. */ public void read(InputStream is, boolean isZip) throws IOException { Debug.log(Debug.INFO, "reading Office file"); DocumentBuilder builder = null; try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException ex) { throw new OfficeDocumentException(ex); } if (isZip) { read(is); } else{ try{ //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating()); //contentDoc= builder.parse((InputStream)is); Reader r = secondHack(is); InputSource ins = new InputSource(r); org.w3c.dom.Document newDoc = builder.parse(ins); //org.w3c.dom.Document newDoc = builder.parse((InputStream)is); Element rootElement=newDoc.getDocumentElement(); NodeList nodeList; Node tmpNode; Node rootNode = (Node)rootElement; if (newDoc !=null){ /*content*/ contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); rootElement=contentDoc.getDocumentElement(); rootNode = (Node)rootElement; // FIX (HJ): Include office:font-decls in content DOM nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); if (nodeList.getLength()>0){ tmpNode = contentDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); if (nodeList.getLength()>0){ tmpNode = contentDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY); if (nodeList.getLength()>0){ tmpNode = contentDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } /*Styles*/ styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); rootElement=styleDoc.getDocumentElement(); rootNode = (Node)rootElement; // FIX (HJ): Include office:font-decls in styles DOM nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); if (nodeList.getLength()>0){ tmpNode = styleDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES); if (nodeList.getLength()>0){ tmpNode = styleDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } // FIX (HJ): Include office:automatic-styles in styles DOM nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); if (nodeList.getLength()>0){ tmpNode = styleDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } // FIX (HJ): Include office:master-styles in styles DOM nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); if (nodeList.getLength()>0){ tmpNode = styleDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } /*Settings*/ settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS); rootElement=settingsDoc.getDocumentElement(); rootNode = (Node)rootElement; nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); if (nodeList.getLength()>0){ tmpNode = settingsDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } /*Meta*/ metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META); rootElement=metaDoc.getDocumentElement(); rootNode = (Node)rootElement; nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META); if (nodeList.getLength()>0){ tmpNode = metaDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } } } catch (SAXException ex) { throw new OfficeDocumentException(ex); } } } /** * Parse given byte array into a DOM * Document object using the * DocumentBuilder object. * * @param builder DocumentBuilder object for parsing. * @param bytes byte array for parsing. * * @return Resulting DOM Document object. * * @throws SAXException If any parsing error occurs. */ static Document parse(DocumentBuilder builder, byte bytes[]) throws SAXException, IOException { Document doc = null; ByteArrayInputStream is = new ByteArrayInputStream(bytes); // TODO: replace hack with a more appropriate fix. Reader r = hack(is); InputSource ins = new InputSource(r); doc = builder.parse(ins); return doc; } /** * Method to return the MIME type of the document. * * @return String The document's MIME type. */ protected abstract String getDocumentMimeType(); /** * Write out Office ZIP file format. * * @param os XML OutputStream. * * @throws IOException If any I/O error occurs. */ public void write(OutputStream os) throws IOException { if (zip == null) { zip = new OfficeZip(); } initManifestDOM(); Element domEntry; Element manifestRoot = manifestDoc.getDocumentElement(); // The EmbeddedObjects come first. Iterator embObjs = getEmbeddedObjects(); while (embObjs.hasNext()) { EmbeddedObject obj = (EmbeddedObject)embObjs.next(); obj.writeManifestData(manifestDoc); obj.write(zip); } // Add in the entry for the Pictures directory. Always present. domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/"); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, ""); manifestRoot.appendChild(domEntry); // Write content to the Zip file and then write any of the optional // data, if it exists. zip.setContentXMLBytes(docToBytes(contentDoc)); domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml"); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); manifestRoot.appendChild(domEntry); if (styleDoc != null) { zip.setStyleXMLBytes(docToBytes(styleDoc)); domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml"); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); manifestRoot.appendChild(domEntry); } if (metaDoc != null) { zip.setMetaXMLBytes(docToBytes(metaDoc)); domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml"); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); manifestRoot.appendChild(domEntry); } if (settingsDoc != null) { zip.setSettingsXMLBytes(docToBytes(settingsDoc)); domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml"); domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); manifestRoot.appendChild(domEntry); } zip.setManifestXMLBytes(docToBytes(manifestDoc)); zip.write(os); } /** * Write out Office ZIP file format. * * @param os XML OutputStream. * @param isZip boolean * * @throws IOException If any I/O error occurs. */ public void write(OutputStream os, boolean isZip) throws IOException { // Create an OfficeZip object if one does not exist. if (isZip){ write(os); } else{ try{ DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder= builderFactory.newDocumentBuilder(); DOMImplementation domImpl = builder.getDOMImplementation(); DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null); org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null); Element rootElement=newDoc.getDocumentElement(); rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office"); rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" ); rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text"); rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table"); rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing"); rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" ); rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" ); rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" ); rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" ); rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" ); rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" ); rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" ); rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" ); rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" ); rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" ); rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" ); rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" ); // #i41033# OASIS format needs the "office:class" set. if(getDocumentMimeType() == SXC_MIME_TYPE) rootElement.setAttribute("office:class","spreadsheet" ); else if(getDocumentMimeType() == SXW_MIME_TYPE) rootElement.setAttribute("office:class","text" ); rootElement.setAttribute("office:version","1.0"); NodeList nodeList; Node tmpNode; Node rootNode = (Node)rootElement; if (metaDoc !=null){ nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META); if (nodeList.getLength()>0){ tmpNode = newDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } }if (styleDoc !=null){ nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES); if (nodeList.getLength()>0){ tmpNode = newDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } }if (settingsDoc !=null){ nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); if (nodeList.getLength()>0){ tmpNode = newDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } } if (contentDoc !=null){ nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); if (nodeList.getLength()>0){ tmpNode = newDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY); if (nodeList.getLength()>0){ tmpNode = newDoc.importNode(nodeList.item(0),true); rootNode.appendChild(tmpNode); } } byte contentBytes[] = docToBytes(newDoc); //System.out.println(new String(contentBytes)); os.write(contentBytes); } catch(Exception exc){ System.out.println("\nException in OfficeDocument.write():" +exc); } //byte contentBytes[] = docToBytes(contentDoc); } } /** *

Write out a org.w3c.dom.Document object into a * byte array.

* *

TODO: remove dependency on com.sun.xml.tree.XmlDocument * package!

* * @param doc DOM Document object. * * @return byte array of DOM Document * object. * * @throws IOException If any I/O error occurs. */ static byte[] docToBytes(Document doc) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); java.lang.reflect.Constructor con; java.lang.reflect.Method meth; String domImpl = doc.getClass().getName(); /* * We may have multiple XML parsers in the Classpath. * Depending on which one is first, the actual type of * doc may vary. Need a way to find out which API is being * used and use an appropriate serialization method. */ try { // First of all try for JAXP 1.0 if (domImpl.equals("com.sun.xml.tree.XmlDocument")) { Debug.log(Debug.INFO, "Using JAXP"); Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument"); // The method is in the XMLDocument class itself, not a helper meth = jaxpDoc.getMethod("write", new Class[] { Class.forName("java.io.OutputStream") } ); meth.invoke(doc, new Object [] { baos } ); } else if (domImpl.equals("org.apache.crimson.tree.XmlDocument")) { Debug.log(Debug.INFO, "Using Crimson"); Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument"); // The method is in the XMLDocument class itself, not a helper meth = crimsonDoc.getMethod("write", new Class[] { Class.forName("java.io.OutputStream") } ); meth.invoke(doc, new Object [] { baos } ); } else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl") || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) { Debug.log(Debug.INFO, "Using Xerces"); // Try for Xerces Class xercesSer = Class.forName("org.apache.xml.serialize.XMLSerializer"); // Get the OutputStream constructor // May want to use the OutputFormat parameter at some stage too con = xercesSer.getConstructor(new Class [] { Class.forName("java.io.OutputStream"), Class.forName("org.apache.xml.serialize.OutputFormat") } ); // Get the serialize method meth = xercesSer.getMethod("serialize", new Class [] { Class.forName("org.w3c.dom.Document") } ); // Get an instance Object serializer = con.newInstance(new Object [] { baos, null } ); // Now call serialize to write the document meth.invoke(serializer, new Object [] { doc } ); } else if (domImpl.equals("gnu.xml.dom.DomDocument")) { Debug.log(Debug.INFO, "Using GNU"); Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer"); // Get the serialize method meth = gnuSer.getMethod("serialize", new Class [] { Class.forName("org.w3c.dom.Node"), Class.forName("java.io.OutputStream") } ); // Get an instance Object serializer = gnuSer.newInstance(); // Now call serialize to write the document meth.invoke(serializer, new Object [] { doc, baos } ); } else { try { DOMSource domSource = new DOMSource(doc); StringWriter writer = new StringWriter(); StreamResult result = new StreamResult(writer); TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer = tf.newTransformer(); transformer.transform(domSource, result); return writer.toString().getBytes(); } catch (Exception e) { // We don't have another parser throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl); } } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe.toString()); } catch (Exception e) { // We may get some other errors, but the bottom line is that // the steps being executed no longer work throw new IOException(e.toString()); } byte bytes[] = baos.toByteArray(); return bytes; } /** * Initializes a new DOM Document with the content * containing minimum OpenOffice XML tags. * * @throws IOException If any I/O error occurs. */ public final void initContentDOM() throws IOException { contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); // this is a work-around for a bug in Office6.0 - not really // needed but StarCalc 6.0 will crash without this tag. Element root = contentDoc.getDocumentElement(); Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS); root.appendChild(child); child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); root.appendChild(child); child = contentDoc.createElement(TAG_OFFICE_BODY); root.appendChild(child); } /** * Initializes a new DOM Document with the content * containing minimum OpenOffice XML tags. * * @throws IOException If any I/O error occurs. */ public final void initSettingsDOM() throws IOException { settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS); // this is a work-around for a bug in Office6.0 - not really // needed but StarCalc 6.0 will crash without this tag. Element root = settingsDoc.getDocumentElement(); Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS); root.appendChild(child); } /** * Initializes a new DOM Document with styles * containing minimum OpenOffice XML tags. * * @throws IOException If any I/O error occurs. */ public final void initStyleDOM() throws IOException { styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); } /** *

Creates a new DOM Document containing minimum * OpenOffice.org XML tags.

* *

This method uses the subclass * getOfficeClassAttribute method to get the * attribute for office:class.

* * @param rootName root name of Document. * * @throws IOException If any I/O error occurs. */ private final Document createSettingsDOM(String rootName) throws IOException { Document doc = null; try { DocumentBuilder builder = factory.newDocumentBuilder(); doc = builder.newDocument(); } catch (ParserConfigurationException ex) { throw new OfficeDocumentException(ex); } Element root = (Element) doc.createElement(rootName); doc.appendChild(root); root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink"); root.setAttribute("xmlns:config", "http://openoffice.org/2001/config"); root.setAttribute("office:version", "1.0"); return doc; } /** *

Creates a new DOM Document containing minimum * OpenOffice.org XML tags.

* *

This method uses the subclass * getOfficeClassAttribute method to get the * attribute for office:class.

* * @param rootName root name of Document. * * @throws IOException If any I/O error occurs. */ private final Document createDOM(String rootName) throws IOException { Document doc = null; try { DocumentBuilder builder = factory.newDocumentBuilder(); doc = builder.newDocument(); } catch (ParserConfigurationException ex) { throw new OfficeDocumentException(ex); } Element root = (Element) doc.createElement(rootName); doc.appendChild(root); root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); root.setAttribute("xmlns:style", "http://openoffice.org/2000/style"); root.setAttribute("xmlns:text", "http://openoffice.org/2000/text"); root.setAttribute("xmlns:table", "http://openoffice.org/2000/table"); root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing"); root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format"); root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle"); root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg"); root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart"); root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d"); root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML"); root.setAttribute("xmlns:form", "http://openoffice.org/2000/form"); root.setAttribute("xmlns:script", "http://openoffice.org/2000/script"); root.setAttribute("office:class", getOfficeClassAttribute()); root.setAttribute("office:version", "1.0"); return doc; } /** * Return the office:class attribute value. * * @return The attribute value. */ protected abstract String getOfficeClassAttribute(); /** *

Hacked code to filter <!DOCTYPE> tag before * sending stream to parser.

* *

This hacked code needs to be changed later on.

* *

Issue: using current jaxp1.0 parser, there is no way * to turn off processing of dtds. Current set of dtds * have bugs, processing them will throw exceptions.

* *

This is a simple hack that assumes the whole <!DOCTYPE> * tag are all in the same line. This is sufficient for * current StarOffice 6.0 generated XML files. Since this * hack really needs to go away, I don't want to spend * too much time in making it a perfect hack.

* FIX (HJ): Removed requirement for DOCTYPE to be in one line * FIX (HJ): No longer removes newlines * * @param is InputStream to be filtered. * * @return Reader value without the <!DOCTYPE> tag. * * @throws IOException If any I/O error occurs. */ private static Reader hack(InputStream is) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); StringBuffer buffer = new StringBuffer(); String str = null; while ((str = br.readLine()) != null) { int sIndex = str.indexOf(" -1) { buffer.append(str.substring(0, sIndex)); int eIndex = str.indexOf('>', sIndex + 8 ); if (eIndex > -1) { buffer.append(str.substring(eIndex + 1, str.length())); // FIX (HJ): Preserve the newline buffer.append("\n"); } else { // FIX (HJ): More than one line. Search for '>' in following lines boolean bOK = false; while ((str = br.readLine())!=null) { eIndex = str.indexOf('>'); if (eIndex>-1) { buffer.append(str.substring(eIndex+1)); // FIX (HJ): Preserve the newline buffer.append("\n"); bOK = true; break; } } if (!bOK) { throw new IOException("Invalid XML"); } } } else { buffer.append(str); // FIX (HJ): Preserve the newline buffer.append("\n"); } } StringReader r = new StringReader(buffer.toString()); return r; } /** *

Transform the InputStream to a Reader Stream.

* *

This hacked code needs to be changed later on.

* *

Issue: the new oasis input file stream means * that the old input stream fails. see #i33702#

* * @param is InputStream to be filtered. * * @return Reader value of the InputStream(). * * @throws IOException If any I/O error occurs. */ private static Reader secondHack(InputStream is) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); char[] charArray = new char[4096]; StringBuffer sBuf = new StringBuffer(); int n = 0; while ((n=br.read(charArray, 0, charArray.length)) > 0) sBuf.append(charArray, 0, n); // ensure there is no trailing garbage after the end of the stream. int sIndex = sBuf.lastIndexOf(""); sBuf.delete(sIndex, sBuf.length()); sBuf.append(""); StringReader r = new StringReader(sBuf.toString()); return r; } /** * Method to create the initial entries in the manifest.xml file stored * in an SX? file. */ private void initManifestDOM() throws IOException { try { DocumentBuilder builder = factory.newDocumentBuilder(); DOMImplementation domImpl = builder.getDOMImplementation(); DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT, "-//OpenOffice.org//DTD Manifest 1.0//EN", "Manifest.dtd"); manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType); } catch (ParserConfigurationException ex) { throw new OfficeDocumentException(ex); } // Add the entry Element manifestRoot = manifestDoc.getDocumentElement(); manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest"); Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE); docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/"); docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType()); manifestRoot.appendChild(docRoot); } }