1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml;
25 
26 import java.io.InputStream;
27 import java.io.OutputStream;
28 import java.io.Reader;
29 import java.io.BufferedReader;
30 import java.io.StringReader;
31 import java.io.StringWriter;
32 import java.io.InputStreamReader;
33 import java.io.ByteArrayOutputStream;
34 import java.io.ByteArrayInputStream;
35 import java.io.IOException;
36 import java.util.Iterator;
37 import java.util.Map;
38 import java.util.HashMap;
39 
40 import javax.xml.parsers.DocumentBuilderFactory;
41 import javax.xml.parsers.DocumentBuilder;
42 import javax.xml.parsers.ParserConfigurationException;
43 
44 import org.w3c.dom.Node;
45 import org.w3c.dom.Element;
46 import org.w3c.dom.Document;
47 import org.w3c.dom.DOMImplementation;
48 import org.w3c.dom.DocumentType;
49 import org.w3c.dom.NodeList;
50 import org.xml.sax.InputSource;
51 import org.w3c.dom.NamedNodeMap;
52 import org.xml.sax.SAXException;
53 
54 import javax.xml.transform.*;
55 import javax.xml.transform.dom.*;
56 import javax.xml.transform.stream.*;
57 
58 import org.openoffice.xmerge.util.Resources;
59 import org.openoffice.xmerge.util.Debug;
60 
61 /**
62  *  An implementation of <code>Document</code> for
63  *  StarOffice documents.
64  */
65 public abstract class OfficeDocument
66     implements org.openoffice.xmerge.Document,
67                OfficeConstants {
68 
69     /** Factory for <code>DocumentBuilder</code> objects. */
70     private static DocumentBuilderFactory factory =
71        DocumentBuilderFactory.newInstance();
72 
73     /** DOM <code>Document</code> of content.xml. */
74     private Document contentDoc = null;
75 
76    /** DOM <code>Document</code> of meta.xml. */
77     private Document metaDoc = null;
78 
79    /** DOM <code>Document</code> of settings.xml. */
80     private Document settingsDoc = null;
81 
82     /** DOM <code>Document</code> of content.xml. */
83     private Document styleDoc = null;
84 
85     /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */
86     private Document manifestDoc = null;
87 
88     private String documentName = null;
89     private String fileName = null;
90 
91     /** Resources object. */
92     private Resources res = null;
93 
94     /**
95      *  <code>OfficeZip</code> object to store zip contents from
96      *  read <code>InputStream</code>.  Note that this member
97      *  will still be null if it was initialized using a template
98      *  file instead of reading from a StarOffice zipped
99      *  XML file.
100      */
101     private OfficeZip zip = null;
102 
103     /** Collection to keep track of the embedded objects in the document. */
104     private Map embeddedObjects = null;
105 
106     /**
107      *  Default constructor.
108      *
109      *  @param  name  <code>Document</code> name.
110      */
OfficeDocument(String name)111     public OfficeDocument(String name)
112     {
113         this(name, true, false);
114     }
115 
116 
117     /**
118      *  Constructor with arguments to set <code>namespaceAware</code>
119      *  and <code>validating</code> flags.
120      *
121      *  @param  name            <code>Document</code> name (may or may not
122      *                          contain extension).
123      *  @param  namespaceAware  Value for <code>namespaceAware</code> flag.
124      *  @param  validating      Value for <code>validating</code> flag.
125      */
OfficeDocument(String name, boolean namespaceAware, boolean validating)126     public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
127 
128         res = Resources.getInstance();
129         factory.setValidating(validating);
130         factory.setNamespaceAware(namespaceAware);
131         this.documentName = trimDocumentName(name);
132         this.fileName = documentName + getFileExtension();
133     }
134 
135 
136     /**
137      *  Removes the file extension from the <code>Document</code>
138      *  name.
139      *
140      *  @param  name  Full <code>Document</code> name with extension.
141      *
142      *  @return  Name of <code>Document</code> without the extension.
143      */
trimDocumentName(String name)144     private String trimDocumentName(String name) {
145         String temp = name.toLowerCase();
146         String ext = getFileExtension();
147 
148         if (temp.endsWith(ext)) {
149             // strip the extension
150             int nlen = name.length();
151             int endIndex = nlen - ext.length();
152             name = name.substring(0,endIndex);
153         }
154 
155         return name;
156     }
157 
158 
159     /**
160      *  Return a DOM <code>Document</code> object of the content.xml
161      *  file.  Note that a content DOM is not created when the constructor
162      *  is called.  So, either the <code>read</code> method or the
163      *  <code>initContentDOM</code> method will need to be called ahead
164      *  on this object before calling this method.
165      *
166      *  @return  DOM <code>Document</code> object.
167      */
getContentDOM()168     public Document getContentDOM() {
169 
170         return contentDoc;
171     }
172 
173  /**
174      *  Return a DOM <code>Document</code> object of the meta.xml
175      *  file.  Note that a content DOM is not created when the constructor
176      *  is called.  So, either the <code>read</code> method or the
177      *  <code>initContentDOM</code> method will need to be called ahead
178      *  on this object before calling this method.
179      *
180      *  @return  DOM <code>Document</code> object.
181      */
getMetaDOM()182     public Document getMetaDOM() {
183 
184         return metaDoc;
185     }
186 
187 
188  /**
189      *  Return a DOM <code>Document</code> object of the settings.xml
190      *  file.  Note that a content DOM is not created when the constructor
191      *  is called.  So, either the <code>read</code> method or the
192      *  <code>initContentDOM</code> method will need to be called ahead
193      *  on this object before calling this method.
194      *
195      *  @return  DOM <code>Document</code> object.
196      */
getSettingsDOM()197     public Document getSettingsDOM() {
198 
199         return settingsDoc;
200     }
201 
202 
203     /**
204      * Sets the content tree of the document.
205      *
206      * @param   newDom  <code>Node</code> containing the new content tree.
207      */
setContentDOM( Node newDom)208     public void setContentDOM( Node newDom) {
209         contentDoc = (Document)newDom;
210     }
211 
212 
213     /**
214      * Sets the meta tree of the document.
215      *
216      * @param   newDom  <code>Node</code> containing the new meta tree.
217      */
setMetaDOM(Node newDom)218     public void setMetaDOM (Node newDom) {
219         metaDoc = (Document)newDom;
220     }
221 
222 
223     /**
224      * Sets the settings tree of the document.
225      *
226      * @param   newDom  <code>Node</code> containing the new settings tree.
227      */
setSettingsDOM(Node newDom)228     public void setSettingsDOM (Node newDom) {
229         settingsDoc = (Document)newDom;
230     }
231 
232 
233     /**
234      * Sets the style tree of the document.
235      *
236      * @param   newDom  <code>Node</code> containing the new style tree.
237      */
setStyleDOM(Node newDom)238     public void setStyleDOM (Node newDom) {
239         styleDoc = (Document)newDom;
240     }
241 
242 
243     /**
244      *  Return a DOM <code>Document</code> object of the style.xml file.
245      *  Note that this may return null if there is no style DOM.
246      *  Note that a style DOM is not created when the constructor
247      *  is called.  Depending on the <code>InputStream</code>, a
248      *  <code>read</code> method may or may not build a style DOM.  When
249      *  creating a new style DOM, call the <code>initStyleDOM</code> method
250      *  first.
251      *
252      *  @return  DOM <code>Document</code> object.
253      */
getStyleDOM()254     public Document getStyleDOM() {
255 
256         return styleDoc;
257     }
258 
259 
260     /**
261      *  Return the name of the <code>Document</code>.
262      *
263      *  @return  The name of <code>Document</code>.
264      */
getName()265     public String getName() {
266 
267         return documentName;
268     }
269 
270 
271     /**
272      *  Return the file name of the <code>Document</code>, possibly
273      *  with the standard extension.
274      *
275      *  @return  The file name of <code>Document</code>.
276      */
getFileName()277     public String getFileName() {
278 
279         return fileName;
280     }
281 
282 
283     /**
284      *  Returns the file extension for this type of
285      *  <code>Document</code>.
286      *
287      *  @return  The file extension of <code>Document</code>.
288      */
getFileExtension()289     protected abstract String getFileExtension();
290 
291 
292     /**
293      * Returns all the embedded objects (graphics, formulae, etc.) present in
294      * this document.
295      *
296      * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
297      */
getEmbeddedObjects()298     public Iterator getEmbeddedObjects() {
299 
300         if (embeddedObjects == null && manifestDoc != null) {
301             embeddedObjects = new HashMap();
302 
303             // Need to read the manifest file and construct a list of objects
304             NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
305 
306             // Dont create the HashMap if there are no embedded objects
307             int len = nl.getLength();
308             for (int i = 0; i < len; i++) {
309                 Node n = nl.item(i);
310 
311                 NamedNodeMap attrs = n.getAttributes();
312 
313                 String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
314                 String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
315 
316 
317                 /*
318                  * According to OpenOffice.org XML File Format document (ver. 1)
319                  * there are only two types of embedded object:
320                  *
321                  *      Objects with an XML representation.
322                  *      Objects without an XML representation.
323                  *
324                  * The former are represented by one or more XML files.
325                  * The latter are in binary form.
326                  */
327                 if (type.startsWith("application/vnd.sun.xml"))
328                 {
329                     if (path.equals("/")) {
330                         // Exclude the main document entries
331                         continue;
332                     }
333                     // Take off the trailing '/'
334                     String name = path.substring(0, path.length() - 1);
335                     embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
336                 }
337                 else if (type.equals("text/xml")) {
338                     // XML entries are either embedded StarOffice doc entries or main
339                     // document entries
340                     continue;
341                 }
342                 else { // FIX (HJ): allows empty MIME type
343                     embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
344                 }
345             }
346         }
347 
348         return embeddedObjects.values().iterator();
349     }
350 
351     /**
352      * Returns the embedded object corresponding to the name provided.
353      * The name should be stripped of any preceding path characters, such as
354      * '/', '.' or '#'.
355      *
356      * @param   name    The name of the embedded object to retrieve.
357      *
358      * @return  An <code>EmbeddedObject</code> instance representing the named
359      *          object.
360      */
getEmbeddedObject(String name)361     public EmbeddedObject getEmbeddedObject(String name) {
362         if (name == null) {
363             return null;
364         }
365 
366         if (embeddedObjects == null) {
367             getEmbeddedObjects();
368         }
369 
370         if (embeddedObjects.containsKey(name)) {
371             return (EmbeddedObject)embeddedObjects.get(name);
372         }
373         else {
374             return null;
375         }
376     }
377 
378 
379     /**
380      * Adds a new embedded object to the document.
381      *
382      * @param   embObj  An instance of <code>EmbeddedObject</code>.
383      */
addEmbeddedObject(EmbeddedObject embObj)384     public void addEmbeddedObject(EmbeddedObject embObj) {
385         if (embObj == null) {
386             return;
387         }
388 
389         if (embeddedObjects == null) {
390             embeddedObjects = new HashMap();
391         }
392 
393         embeddedObjects.put(embObj.getName(), embObj);
394     }
395 
396 
397     /**
398      *  Read the Office <code>Document</code> from the given
399      *  <code>InputStream</code>.
400      *
401      *  @param  is  Office document <code>InputStream</code>.
402      *
403      *  @throws  IOException  If any I/O error occurs.
404      */
read(InputStream is)405     public void read(InputStream is) throws IOException {
406 
407         Debug.log(Debug.INFO, "reading Office file");
408 
409         DocumentBuilder builder = null;
410 
411         try {
412             builder = factory.newDocumentBuilder();
413         } catch (ParserConfigurationException ex) {
414             throw new OfficeDocumentException(ex);
415         }
416 
417         // read in Office zip file format
418 
419         zip = new OfficeZip();
420         zip.read(is);
421 
422         // grab the content.xml and
423         // parse it into contentDoc.
424 
425         byte contentBytes[] = zip.getContentXMLBytes();
426 
427         if (contentBytes == null) {
428 
429             throw new OfficeDocumentException("Entry content.xml not found in file");
430         }
431 
432         try {
433 
434             contentDoc = parse(builder, contentBytes);
435 
436         } catch (SAXException ex) {
437 
438             throw new OfficeDocumentException(ex);
439         }
440 
441         // if style.xml exists, grab the style.xml
442         // parse it into styleDoc.
443 
444         byte styleBytes[] = zip.getStyleXMLBytes();
445 
446         if (styleBytes != null) {
447 
448             try {
449 
450                 styleDoc = parse(builder, styleBytes);
451 
452             } catch (SAXException ex) {
453 
454                 throw new OfficeDocumentException(ex);
455             }
456         }
457 
458 	byte metaBytes[] = zip.getMetaXMLBytes();
459 
460         if (metaBytes != null) {
461 
462             try {
463 
464                 metaDoc = parse(builder, metaBytes);
465 
466             } catch (SAXException ex) {
467 
468                 throw new OfficeDocumentException(ex);
469             }
470         }
471 
472 	byte settingsBytes[] = zip.getSettingsXMLBytes();
473 
474         if (settingsBytes != null) {
475 
476             try {
477 
478                 settingsDoc = parse(builder, settingsBytes);
479 
480             } catch (SAXException ex) {
481 
482                 throw new OfficeDocumentException(ex);
483             }
484         }
485 
486 
487         // Read in the META-INF/manifest.xml file
488         byte manifestBytes[] = zip.getManifestXMLBytes();
489 
490         if (manifestBytes != null) {
491 
492             try {
493                 manifestDoc = parse(builder, manifestBytes);
494             } catch (SAXException ex) {
495                 throw new OfficeDocumentException(ex);
496             }
497         }
498 
499     }
500 
501 
502     /**
503      *  Read the Office <code>Document</code> from the given
504      *  <code>InputStream</code>.
505      *
506      *  @param  is  Office document <code>InputStream</code>.
507      *  @param  isZip <code>boolean</code> Identifies whether
508      *                 a file is zipped or not
509      *
510      *  @throws  IOException  If any I/O error occurs.
511      */
read(InputStream is, boolean isZip)512     public void read(InputStream is, boolean isZip) throws IOException {
513 
514         Debug.log(Debug.INFO, "reading Office file");
515 
516         DocumentBuilder builder = null;
517 
518         try {
519             builder = factory.newDocumentBuilder();
520         } catch (ParserConfigurationException ex) {
521             throw new OfficeDocumentException(ex);
522         }
523 
524 	if (isZip)
525 	{
526             read(is);
527 	}
528 	else{
529 	    try{
530 		//System.out.println("\nParsing Input stream, validating?: "+builder.isValidating());
531 		//contentDoc=  builder.parse((InputStream)is);
532 
533                Reader r = secondHack(is);
534                InputSource ins = new InputSource(r);
535 	        org.w3c.dom.Document newDoc = builder.parse(ins);
536 	        //org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
537 	        Element rootElement=newDoc.getDocumentElement();
538 
539 	        NodeList nodeList;
540 	        Node tmpNode;
541 	        Node rootNode = (Node)rootElement;
542                 if (newDoc !=null){
543 		    /*content*/
544                    contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
545                    rootElement=contentDoc.getDocumentElement();
546                    rootNode = (Node)rootElement;
547 
548                    // FIX (HJ): Include office:font-decls in content DOM
549                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
550                    if (nodeList.getLength()>0){
551                        tmpNode = contentDoc.importNode(nodeList.item(0),true);
552                        rootNode.appendChild(tmpNode);
553                    }
554 
555                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
556                    if (nodeList.getLength()>0){
557 	              tmpNode = contentDoc.importNode(nodeList.item(0),true);
558 	              rootNode.appendChild(tmpNode);
559                    }
560 
561                     nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
562                    if (nodeList.getLength()>0){
563 	              tmpNode = contentDoc.importNode(nodeList.item(0),true);
564 	              rootNode.appendChild(tmpNode);
565                    }
566 
567 		   /*Styles*/
568                    styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
569                    rootElement=styleDoc.getDocumentElement();
570                    rootNode = (Node)rootElement;
571 
572                    // FIX (HJ): Include office:font-decls in styles DOM
573                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
574                    if (nodeList.getLength()>0){
575     	              tmpNode = styleDoc.importNode(nodeList.item(0),true);
576                       rootNode.appendChild(tmpNode);
577                    }
578 
579                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
580                    if (nodeList.getLength()>0){
581 	              tmpNode = styleDoc.importNode(nodeList.item(0),true);
582 	              rootNode.appendChild(tmpNode);
583                    }
584 
585                    // FIX (HJ): Include office:automatic-styles in styles DOM
586                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
587                    if (nodeList.getLength()>0){
588                       tmpNode = styleDoc.importNode(nodeList.item(0),true);
589                       rootNode.appendChild(tmpNode);
590                    }
591 
592                    // FIX (HJ): Include office:master-styles in styles DOM
593                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
594                    if (nodeList.getLength()>0){
595                        tmpNode = styleDoc.importNode(nodeList.item(0),true);
596                        rootNode.appendChild(tmpNode);
597                    }
598 
599 		   /*Settings*/
600                    settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
601                    rootElement=settingsDoc.getDocumentElement();
602                    rootNode = (Node)rootElement;
603                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
604                    if (nodeList.getLength()>0){
605 	              tmpNode = settingsDoc.importNode(nodeList.item(0),true);
606 	              rootNode.appendChild(tmpNode);
607                    }
608 		   /*Meta*/
609                    metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
610                    rootElement=metaDoc.getDocumentElement();
611                    rootNode = (Node)rootElement;
612                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
613                    if (nodeList.getLength()>0){
614 	              tmpNode = metaDoc.importNode(nodeList.item(0),true);
615 	              rootNode.appendChild(tmpNode);
616                    }
617                 }
618 	    }
619 	    catch (SAXException ex) {
620 		throw new OfficeDocumentException(ex);
621 	    }
622 	}
623 
624     }
625 
626 
627 
628     /**
629      *  Parse given <code>byte</code> array into a DOM
630      *  <code>Document</code> object using the
631      *  <code>DocumentBuilder</code> object.
632      *
633      *  @param  builder  <code>DocumentBuilder</code> object for parsing.
634      *  @param  bytes    <code>byte</code> array for parsing.
635      *
636      *  @return  Resulting DOM <code>Document</code> object.
637      *
638      *  @throws  SAXException  If any parsing error occurs.
639      */
parse(DocumentBuilder builder, byte bytes[])640     static Document parse(DocumentBuilder builder, byte bytes[])
641         throws SAXException, IOException {
642 
643         Document doc = null;
644 
645         ByteArrayInputStream is = new ByteArrayInputStream(bytes);
646 
647         // TODO:  replace hack with a more appropriate fix.
648 
649         Reader r = hack(is);
650         InputSource ins = new InputSource(r);
651         doc = builder.parse(ins);
652 
653         return doc;
654     }
655 
656 
657     /**
658      * Method to return the MIME type of the document.
659      *
660      * @return  String  The document's MIME type.
661      */
getDocumentMimeType()662     protected abstract String getDocumentMimeType();
663 
664 
665     /**
666      *  Write out Office ZIP file format.
667      *
668      *  @param  os  XML <code>OutputStream</code>.
669      *
670      *  @throws  IOException  If any I/O error occurs.
671      */
write(OutputStream os)672     public void write(OutputStream os) throws IOException {
673         if (zip == null) {
674             zip = new OfficeZip();
675     	}
676 
677         initManifestDOM();
678 
679         Element domEntry;
680         Element manifestRoot = manifestDoc.getDocumentElement();
681 
682         // The EmbeddedObjects come first.
683         Iterator embObjs = getEmbeddedObjects();
684         while (embObjs.hasNext()) {
685             EmbeddedObject obj = (EmbeddedObject)embObjs.next();
686             obj.writeManifestData(manifestDoc);
687 
688             obj.write(zip);
689         }
690 
691         // Add in the entry for the Pictures directory.  Always present.
692         domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
693         domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
694         domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
695         manifestRoot.appendChild(domEntry);
696 
697 	// Write content to the Zip file and then write any of the optional
698         // data, if it exists.
699 	zip.setContentXMLBytes(docToBytes(contentDoc));
700 
701         domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
702         domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
703         domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
704 
705         manifestRoot.appendChild(domEntry);
706 
707 	if (styleDoc != null) {
708             zip.setStyleXMLBytes(docToBytes(styleDoc));
709 
710             domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
711             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
712             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
713             manifestRoot.appendChild(domEntry);
714         }
715 
716         if (metaDoc != null) {
717             zip.setMetaXMLBytes(docToBytes(metaDoc));
718 
719             domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
720             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");
721             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
722             manifestRoot.appendChild(domEntry);
723         }
724 
725         if (settingsDoc != null) {
726             zip.setSettingsXMLBytes(docToBytes(settingsDoc));
727 
728             domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
729             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
730             domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
731             manifestRoot.appendChild(domEntry);
732         }
733 
734         zip.setManifestXMLBytes(docToBytes(manifestDoc));
735 
736         zip.write(os);
737     }
738 
739 
740      /**
741      *  Write out Office ZIP file format.
742      *
743      *  @param  os  XML <code>OutputStream</code>.
744      *  @param  isZip <code>boolean</code>
745      *
746      *  @throws  IOException  If any I/O error occurs.
747      */
write(OutputStream os, boolean isZip)748     public void write(OutputStream os, boolean isZip) throws IOException {
749 
750         // Create an OfficeZip object if one does not exist.
751         if (isZip){
752             write(os);
753 	}
754 	else{
755 	    try{
756 		DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
757 		DocumentBuilder builder= builderFactory.newDocumentBuilder();
758 		DOMImplementation domImpl = builder.getDOMImplementation();
759 		DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
760 		org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
761 
762 
763 		Element rootElement=newDoc.getDocumentElement();
764 		rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office");
765 		rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" );
766 		rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text");
767 		rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table");
768 
769 		rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing");
770 		rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" );
771 		rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" );
772 		rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" );
773 		rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" );
774 		rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" );
775 		rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" );
776 		rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" );
777 		rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" );
778 		rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" );
779 		rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" );
780 		rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" );
781 		rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" );
782 		// #i41033# OASIS format needs the "office:class" set.
783 		if(getDocumentMimeType() == SXC_MIME_TYPE)
784 		    rootElement.setAttribute("office:class","spreadsheet" );
785 		else if(getDocumentMimeType() == SXW_MIME_TYPE)
786 		    rootElement.setAttribute("office:class","text" );
787 		rootElement.setAttribute("office:version","1.0");
788 
789 
790 		NodeList nodeList;
791 		Node tmpNode;
792 		Node rootNode = (Node)rootElement;
793 		if (metaDoc !=null){
794 		    nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
795 		    if (nodeList.getLength()>0){
796 			tmpNode = newDoc.importNode(nodeList.item(0),true);
797 			rootNode.appendChild(tmpNode);
798 		    }
799 		}if (styleDoc !=null){
800 		    nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
801 		    if (nodeList.getLength()>0){
802 			tmpNode = newDoc.importNode(nodeList.item(0),true);
803 			rootNode.appendChild(tmpNode);
804                 }
805 
806 		}if (settingsDoc !=null){
807 		    nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
808 		    if (nodeList.getLength()>0){
809 			tmpNode = newDoc.importNode(nodeList.item(0),true);
810 			rootNode.appendChild(tmpNode);
811 		    }
812 		}
813 		if (contentDoc !=null){
814 		    nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
815 		    if (nodeList.getLength()>0){
816 			tmpNode = newDoc.importNode(nodeList.item(0),true);
817 	             rootNode.appendChild(tmpNode);
818 		    }
819 
820 		    nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);
821 		    if (nodeList.getLength()>0){
822 			tmpNode = newDoc.importNode(nodeList.item(0),true);
823 			rootNode.appendChild(tmpNode);
824 		    }
825 		}
826 
827 		byte contentBytes[] = docToBytes(newDoc);
828 		//System.out.println(new String(contentBytes));
829 		os.write(contentBytes);
830             }
831             catch(Exception exc){
832 		System.out.println("\nException in OfficeDocument.write():" +exc);
833             }
834 	    //byte contentBytes[] = docToBytes(contentDoc);
835 	}
836     }
837 
838 
839     /**
840      *  <p>Write out a <code>org.w3c.dom.Document</code> object into a
841      *  <code>byte</code> array.</p>
842      *
843      *  <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument
844      *  package!</p>
845      *
846      *  @param  doc  DOM <code>Document</code> object.
847      *
848      *  @return  <code>byte</code> array of DOM <code>Document</code>
849      *           object.
850      *
851      *  @throws  IOException  If any I/O error occurs.
852      */
docToBytes(Document doc)853     static byte[] docToBytes(Document doc)
854         throws IOException {
855 
856         ByteArrayOutputStream baos = new ByteArrayOutputStream();
857 
858         java.lang.reflect.Constructor con;
859         java.lang.reflect.Method meth;
860 
861         String domImpl = doc.getClass().getName();
862 
863         /*
864          * We may have multiple XML parsers in the Classpath.
865          * Depending on which one is first, the actual type of
866          * doc may vary.  Need a way to find out which API is being
867          * used and use an appropriate serialization method.
868          */
869 
870         try {
871             // First of all try for JAXP 1.0
872             if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
873 
874                 Debug.log(Debug.INFO, "Using JAXP");
875 
876                 Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
877 
878                 // The method is in the XMLDocument class itself, not a helper
879                 meth = jaxpDoc.getMethod("write",
880                             new Class[] { Class.forName("java.io.OutputStream") } );
881 
882                 meth.invoke(doc, new Object [] { baos } );
883             }
884 	     else if (domImpl.equals("org.apache.crimson.tree.XmlDocument"))
885 	    {
886                 Debug.log(Debug.INFO, "Using Crimson");
887 
888 		 Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
889 		 // The method is in the XMLDocument class itself, not a helper
890                 meth = crimsonDoc.getMethod("write",
891                             new Class[] { Class.forName("java.io.OutputStream") } );
892 
893                 meth.invoke(doc, new Object [] { baos } );
894 	    }
895             else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl")
896             || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
897 
898                 Debug.log(Debug.INFO, "Using Xerces");
899 
900                 // Try for Xerces
901                 Class xercesSer =
902                         Class.forName("org.apache.xml.serialize.XMLSerializer");
903 
904                 // Get the OutputStream constructor
905                 // May want to use the OutputFormat parameter at some stage too
906                 con = xercesSer.getConstructor(new Class []
907                         { Class.forName("java.io.OutputStream"),
908                           Class.forName("org.apache.xml.serialize.OutputFormat") } );
909 
910 
911                 // Get the serialize method
912                 meth = xercesSer.getMethod("serialize",
913                             new Class [] { Class.forName("org.w3c.dom.Document") } );
914 
915 
916                 // Get an instance
917                 Object serializer = con.newInstance(new Object [] { baos, null } );
918 
919 
920                 // Now call serialize to write the document
921                 meth.invoke(serializer, new Object [] { doc } );
922             }
923             else if (domImpl.equals("gnu.xml.dom.DomDocument")) {
924                 Debug.log(Debug.INFO, "Using GNU");
925 
926                 Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer");
927 
928                 // Get the serialize method
929                 meth = gnuSer.getMethod("serialize",
930                             new Class [] { Class.forName("org.w3c.dom.Node"),
931                             Class.forName("java.io.OutputStream") } );
932 
933                 // Get an instance
934                 Object serializer = gnuSer.newInstance();
935 
936                 // Now call serialize to write the document
937                 meth.invoke(serializer, new Object [] { doc, baos } );
938             }
939             else {
940 		try {
941 			DOMSource domSource = new DOMSource(doc);
942 			StringWriter writer = new StringWriter();
943 			StreamResult result = new StreamResult(writer);
944 			TransformerFactory tf = TransformerFactory.newInstance();
945 			Transformer transformer = tf.newTransformer();
946 			transformer.transform(domSource, result);
947 			return writer.toString().getBytes();
948 		    }
949                 catch (Exception e) {
950                     // We don't have another parser
951                     throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
952                 }
953             }
954         }
955         catch (ClassNotFoundException cnfe) {
956             throw new IOException(cnfe.toString());
957         }
958         catch (Exception e) {
959             // We may get some other errors, but the bottom line is that
960             // the steps being executed no longer work
961             throw new IOException(e.toString());
962         }
963 
964         byte bytes[] = baos.toByteArray();
965 
966         return bytes;
967     }
968 
969 
970     /**
971      *  Initializes a new DOM <code>Document</code> with the content
972      *  containing minimum OpenOffice XML tags.
973      *
974      *  @throws  IOException  If any I/O error occurs.
975      */
initContentDOM()976     public final void initContentDOM() throws IOException {
977 
978         contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
979 
980         // this is a work-around for a bug in Office6.0 - not really
981         // needed but StarCalc 6.0 will crash without this tag.
982         Element root = contentDoc.getDocumentElement();
983 
984         Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
985         root.appendChild(child);
986 
987         child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
988         root.appendChild(child);
989 
990         child = contentDoc.createElement(TAG_OFFICE_BODY);
991         root.appendChild(child);
992     }
993 
994     /**
995      *  Initializes a new DOM <code>Document</code> with the content
996      *  containing minimum OpenOffice XML tags.
997      *
998      *  @throws  IOException  If any I/O error occurs.
999      */
initSettingsDOM()1000     public final void initSettingsDOM() throws IOException {
1001 
1002         settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
1003 
1004         // this is a work-around for a bug in Office6.0 - not really
1005         // needed but StarCalc 6.0 will crash without this tag.
1006         Element root = settingsDoc.getDocumentElement();
1007 
1008         Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
1009         root.appendChild(child);
1010     }
1011 
1012     /**
1013      *  Initializes a new DOM Document with styles
1014      *  containing minimum OpenOffice XML tags.
1015      *
1016      *  @throws  IOException  If any I/O error occurs.
1017      */
initStyleDOM()1018     public final void initStyleDOM() throws IOException {
1019 
1020         styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
1021     }
1022 
1023     /**
1024      *  <p>Creates a new DOM <code>Document</code> containing minimum
1025      *  OpenOffice.org XML tags.</p>
1026      *
1027      *  <p>This method uses the subclass
1028      *  <code>getOfficeClassAttribute</code> method to get the
1029      *  attribute for <i>office:class</i>.</p>
1030      *
1031      *  @param  rootName  root name of <code>Document</code>.
1032      *
1033      *  @throws  IOException  If any I/O error occurs.
1034      */
createSettingsDOM(String rootName)1035     private final Document createSettingsDOM(String rootName) throws IOException {
1036 
1037         Document doc = null;
1038 
1039         try {
1040 
1041             DocumentBuilder builder = factory.newDocumentBuilder();
1042             doc = builder.newDocument();
1043 
1044         } catch (ParserConfigurationException ex) {
1045 
1046             throw new OfficeDocumentException(ex);
1047 
1048         }
1049 
1050         Element root = (Element) doc.createElement(rootName);
1051         doc.appendChild(root);
1052 
1053         root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
1054         root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
1055         root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
1056         root.setAttribute("office:version", "1.0");
1057 
1058         return doc;
1059     }
1060 
1061 
1062     /**
1063      *  <p>Creates a new DOM <code>Document</code> containing minimum
1064      *  OpenOffice.org XML tags.</p>
1065      *
1066      *  <p>This method uses the subclass
1067      *  <code>getOfficeClassAttribute</code> method to get the
1068      *  attribute for <i>office:class</i>.</p>
1069      *
1070      *  @param  rootName  root name of <code>Document</code>.
1071      *
1072      *  @throws  IOException  If any I/O error occurs.
1073      */
createDOM(String rootName)1074     private final Document createDOM(String rootName) throws IOException {
1075 
1076         Document doc = null;
1077 
1078         try {
1079 
1080             DocumentBuilder builder = factory.newDocumentBuilder();
1081             doc = builder.newDocument();
1082 
1083         } catch (ParserConfigurationException ex) {
1084 
1085             throw new OfficeDocumentException(ex);
1086 
1087         }
1088 
1089         Element root = (Element) doc.createElement(rootName);
1090         doc.appendChild(root);
1091 
1092         root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
1093         root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
1094         root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
1095         root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
1096         root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
1097         root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
1098         root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
1099         root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
1100         root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
1101         root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
1102         root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
1103         root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
1104         root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
1105         root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
1106         root.setAttribute("office:class", getOfficeClassAttribute());
1107         root.setAttribute("office:version", "1.0");
1108 
1109         return doc;
1110     }
1111 
1112 
1113     /**
1114      *  Return the <i>office:class</i> attribute value.
1115      *
1116      *  @return  The attribute value.
1117      */
getOfficeClassAttribute()1118     protected abstract String getOfficeClassAttribute();
1119 
1120 
1121     /**
1122      *  <p>Hacked code to filter &lt;!DOCTYPE&gt; tag before
1123      *  sending stream to parser.</p>
1124      *
1125      *  <p>This hacked code needs to be changed later on.</p>
1126      *
1127      *  <p>Issue: using current jaxp1.0 parser, there is no way
1128      *  to turn off processing of dtds.  Current set of dtds
1129      *  have bugs, processing them will throw exceptions.</p>
1130      *
1131      *  <p>This is a simple hack that assumes the whole &lt;!DOCTYPE&gt;
1132      *  tag are all in the same line.  This is sufficient for
1133      *  current StarOffice 6.0 generated XML files.  Since this
1134      *  hack really needs to go away, I don't want to spend
1135      *  too much time in making it a perfect hack.</p>
1136      *  FIX (HJ): Removed requirement for DOCTYPE to be in one line
1137      *  FIX (HJ): No longer removes newlines
1138      *
1139      *  @param  is  <code>InputStream</code> to be filtered.
1140      *
1141      *  @return  Reader value without the &lt;!DOCTYPE&gt; tag.
1142      *
1143      *  @throws  IOException  If any I/O error occurs.
1144      */
hack(InputStream is)1145     private static Reader hack(InputStream is) throws IOException {
1146 
1147         BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
1148         StringBuffer buffer = new StringBuffer();
1149 
1150         String str = null;
1151 
1152         while ((str = br.readLine()) != null) {
1153 
1154             int sIndex = str.indexOf("<!DOCTYPE");
1155 
1156             if (sIndex > -1) {
1157 
1158                 buffer.append(str.substring(0, sIndex));
1159 
1160                 int eIndex = str.indexOf('>', sIndex + 8 );
1161 
1162                 if (eIndex > -1) {
1163 
1164                     buffer.append(str.substring(eIndex + 1, str.length()));
1165                     // FIX (HJ): Preserve the newline
1166                     buffer.append("\n");
1167 
1168                 } else {
1169 
1170                     // FIX (HJ): More than one line. Search for '>' in following lines
1171                     boolean bOK = false;
1172                     while ((str = br.readLine())!=null) {
1173                         eIndex = str.indexOf('>');
1174                         if (eIndex>-1) {
1175                             buffer.append(str.substring(eIndex+1));
1176                             // FIX (HJ): Preserve the newline
1177                             buffer.append("\n");
1178                             bOK = true;
1179                             break;
1180                         }
1181                     }
1182 
1183                     if (!bOK) { throw new IOException("Invalid XML"); }
1184                 }
1185 
1186             } else {
1187 
1188                 buffer.append(str);
1189                 // FIX (HJ): Preserve the newline
1190                 buffer.append("\n");
1191             }
1192         }
1193 
1194         StringReader r = new StringReader(buffer.toString());
1195         return r;
1196     }
1197 
1198     /**
1199      *  <p>Transform the InputStream to a Reader Stream.</p>
1200      *
1201      *  <p>This hacked code needs to be changed later on.</p>
1202      *
1203      *  <p>Issue: the new oasis input file stream means
1204      *  that the old input stream fails. see #i33702# </p>
1205      *
1206      *  @param  is  <code>InputStream</code> to be filtered.
1207      *
1208      *  @return  Reader value of the InputStream().
1209      *
1210      *  @throws  IOException  If any I/O error occurs.
1211      */
secondHack(InputStream is)1212     private static Reader secondHack(InputStream is) throws IOException {
1213 
1214         BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
1215         char[] charArray = new char[4096];
1216         StringBuffer sBuf = new StringBuffer();
1217         int n = 0;
1218         while ((n=br.read(charArray, 0, charArray.length)) > 0)
1219             sBuf.append(charArray, 0, n);
1220 
1221         // ensure there is no trailing garbage after the end of the stream.
1222         int sIndex = sBuf.lastIndexOf("</office:document>");
1223         sBuf.delete(sIndex, sBuf.length());
1224         sBuf.append("</office:document>");
1225         StringReader r = new StringReader(sBuf.toString());
1226         return r;
1227     }
1228 
1229 
1230     /**
1231      * Method to create the initial entries in the manifest.xml file stored
1232      * in an SX? file.
1233      */
initManifestDOM()1234     private void initManifestDOM() throws IOException {
1235 
1236         try {
1237             DocumentBuilder builder = factory.newDocumentBuilder();
1238             DOMImplementation domImpl = builder.getDOMImplementation();
1239 
1240             DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT,
1241                                         "-//OpenOffice.org//DTD Manifest 1.0//EN",
1242                                         "Manifest.dtd");
1243 	    manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
1244         } catch (ParserConfigurationException ex) {
1245             throw new OfficeDocumentException(ex);
1246         }
1247 
1248         // Add the <manifest:manifest> entry
1249         Element manifestRoot = manifestDoc.getDocumentElement();
1250 
1251         manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");
1252 
1253         Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);
1254 
1255         docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
1256         docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
1257 
1258         manifestRoot.appendChild(docRoot);
1259     }
1260 }
1261 
1262