1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml; 25 26 import java.io.InputStream; 27 import java.io.OutputStream; 28 import java.io.Reader; 29 import java.io.BufferedReader; 30 import java.io.StringReader; 31 import java.io.StringWriter; 32 import java.io.InputStreamReader; 33 import java.io.ByteArrayOutputStream; 34 import java.io.ByteArrayInputStream; 35 import java.io.IOException; 36 import java.util.Iterator; 37 import java.util.Map; 38 import java.util.HashMap; 39 40 import javax.xml.parsers.DocumentBuilderFactory; 41 import javax.xml.parsers.DocumentBuilder; 42 import javax.xml.parsers.ParserConfigurationException; 43 44 import org.w3c.dom.Node; 45 import org.w3c.dom.Element; 46 import org.w3c.dom.Document; 47 import org.w3c.dom.DOMImplementation; 48 import org.w3c.dom.DocumentType; 49 import org.w3c.dom.NodeList; 50 import org.xml.sax.InputSource; 51 import org.w3c.dom.NamedNodeMap; 52 import org.xml.sax.SAXException; 53 54 import javax.xml.transform.*; 55 import javax.xml.transform.dom.*; 56 import javax.xml.transform.stream.*; 57 58 import org.openoffice.xmerge.util.Resources; 59 import org.openoffice.xmerge.util.Debug; 60 61 /** 62 * An implementation of <code>Document</code> for 63 * StarOffice documents. 64 */ 65 public abstract class OfficeDocument 66 implements org.openoffice.xmerge.Document, 67 OfficeConstants { 68 69 /** Factory for <code>DocumentBuilder</code> objects. */ 70 private static DocumentBuilderFactory factory = 71 DocumentBuilderFactory.newInstance(); 72 73 /** DOM <code>Document</code> of content.xml. */ 74 private Document contentDoc = null; 75 76 /** DOM <code>Document</code> of meta.xml. */ 77 private Document metaDoc = null; 78 79 /** DOM <code>Document</code> of settings.xml. */ 80 private Document settingsDoc = null; 81 82 /** DOM <code>Document</code> of content.xml. */ 83 private Document styleDoc = null; 84 85 /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */ 86 private Document manifestDoc = null; 87 88 private String documentName = null; 89 private String fileName = null; 90 91 /** Resources object. */ 92 private Resources res = null; 93 94 /** 95 * <code>OfficeZip</code> object to store zip contents from 96 * read <code>InputStream</code>. Note that this member 97 * will still be null if it was initialized using a template 98 * file instead of reading from a StarOffice zipped 99 * XML file. 100 */ 101 private OfficeZip zip = null; 102 103 /** Collection to keep track of the embedded objects in the document. */ 104 private Map embeddedObjects = null; 105 106 /** 107 * Default constructor. 108 * 109 * @param name <code>Document</code> name. 110 */ OfficeDocument(String name)111 public OfficeDocument(String name) 112 { 113 this(name, true, false); 114 } 115 116 117 /** 118 * Constructor with arguments to set <code>namespaceAware</code> 119 * and <code>validating</code> flags. 120 * 121 * @param name <code>Document</code> name (may or may not 122 * contain extension). 123 * @param namespaceAware Value for <code>namespaceAware</code> flag. 124 * @param validating Value for <code>validating</code> flag. 125 */ OfficeDocument(String name, boolean namespaceAware, boolean validating)126 public OfficeDocument(String name, boolean namespaceAware, boolean validating) { 127 128 res = Resources.getInstance(); 129 factory.setValidating(validating); 130 factory.setNamespaceAware(namespaceAware); 131 this.documentName = trimDocumentName(name); 132 this.fileName = documentName + getFileExtension(); 133 } 134 135 136 /** 137 * Removes the file extension from the <code>Document</code> 138 * name. 139 * 140 * @param name Full <code>Document</code> name with extension. 141 * 142 * @return Name of <code>Document</code> without the extension. 143 */ trimDocumentName(String name)144 private String trimDocumentName(String name) { 145 String temp = name.toLowerCase(); 146 String ext = getFileExtension(); 147 148 if (temp.endsWith(ext)) { 149 // strip the extension 150 int nlen = name.length(); 151 int endIndex = nlen - ext.length(); 152 name = name.substring(0,endIndex); 153 } 154 155 return name; 156 } 157 158 159 /** 160 * Return a DOM <code>Document</code> object of the content.xml 161 * file. Note that a content DOM is not created when the constructor 162 * is called. So, either the <code>read</code> method or the 163 * <code>initContentDOM</code> method will need to be called ahead 164 * on this object before calling this method. 165 * 166 * @return DOM <code>Document</code> object. 167 */ getContentDOM()168 public Document getContentDOM() { 169 170 return contentDoc; 171 } 172 173 /** 174 * Return a DOM <code>Document</code> object of the meta.xml 175 * file. Note that a content DOM is not created when the constructor 176 * is called. So, either the <code>read</code> method or the 177 * <code>initContentDOM</code> method will need to be called ahead 178 * on this object before calling this method. 179 * 180 * @return DOM <code>Document</code> object. 181 */ getMetaDOM()182 public Document getMetaDOM() { 183 184 return metaDoc; 185 } 186 187 188 /** 189 * Return a DOM <code>Document</code> object of the settings.xml 190 * file. Note that a content DOM is not created when the constructor 191 * is called. So, either the <code>read</code> method or the 192 * <code>initContentDOM</code> method will need to be called ahead 193 * on this object before calling this method. 194 * 195 * @return DOM <code>Document</code> object. 196 */ getSettingsDOM()197 public Document getSettingsDOM() { 198 199 return settingsDoc; 200 } 201 202 203 /** 204 * Sets the content tree of the document. 205 * 206 * @param newDom <code>Node</code> containing the new content tree. 207 */ setContentDOM( Node newDom)208 public void setContentDOM( Node newDom) { 209 contentDoc = (Document)newDom; 210 } 211 212 213 /** 214 * Sets the meta tree of the document. 215 * 216 * @param newDom <code>Node</code> containing the new meta tree. 217 */ setMetaDOM(Node newDom)218 public void setMetaDOM (Node newDom) { 219 metaDoc = (Document)newDom; 220 } 221 222 223 /** 224 * Sets the settings tree of the document. 225 * 226 * @param newDom <code>Node</code> containing the new settings tree. 227 */ setSettingsDOM(Node newDom)228 public void setSettingsDOM (Node newDom) { 229 settingsDoc = (Document)newDom; 230 } 231 232 233 /** 234 * Sets the style tree of the document. 235 * 236 * @param newDom <code>Node</code> containing the new style tree. 237 */ setStyleDOM(Node newDom)238 public void setStyleDOM (Node newDom) { 239 styleDoc = (Document)newDom; 240 } 241 242 243 /** 244 * Return a DOM <code>Document</code> object of the style.xml file. 245 * Note that this may return null if there is no style DOM. 246 * Note that a style DOM is not created when the constructor 247 * is called. Depending on the <code>InputStream</code>, a 248 * <code>read</code> method may or may not build a style DOM. When 249 * creating a new style DOM, call the <code>initStyleDOM</code> method 250 * first. 251 * 252 * @return DOM <code>Document</code> object. 253 */ getStyleDOM()254 public Document getStyleDOM() { 255 256 return styleDoc; 257 } 258 259 260 /** 261 * Return the name of the <code>Document</code>. 262 * 263 * @return The name of <code>Document</code>. 264 */ getName()265 public String getName() { 266 267 return documentName; 268 } 269 270 271 /** 272 * Return the file name of the <code>Document</code>, possibly 273 * with the standard extension. 274 * 275 * @return The file name of <code>Document</code>. 276 */ getFileName()277 public String getFileName() { 278 279 return fileName; 280 } 281 282 283 /** 284 * Returns the file extension for this type of 285 * <code>Document</code>. 286 * 287 * @return The file extension of <code>Document</code>. 288 */ getFileExtension()289 protected abstract String getFileExtension(); 290 291 292 /** 293 * Returns all the embedded objects (graphics, formulae, etc.) present in 294 * this document. 295 * 296 * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects. 297 */ getEmbeddedObjects()298 public Iterator getEmbeddedObjects() { 299 300 if (embeddedObjects == null && manifestDoc != null) { 301 embeddedObjects = new HashMap(); 302 303 // Need to read the manifest file and construct a list of objects 304 NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE); 305 306 // Dont create the HashMap if there are no embedded objects 307 int len = nl.getLength(); 308 for (int i = 0; i < len; i++) { 309 Node n = nl.item(i); 310 311 NamedNodeMap attrs = n.getAttributes(); 312 313 String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue(); 314 String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue(); 315 316 317 /* 318 * According to OpenOffice.org XML File Format document (ver. 1) 319 * there are only two types of embedded object: 320 * 321 * Objects with an XML representation. 322 * Objects without an XML representation. 323 * 324 * The former are represented by one or more XML files. 325 * The latter are in binary form. 326 */ 327 if (type.startsWith("application/vnd.sun.xml")) 328 { 329 if (path.equals("/")) { 330 // Exclude the main document entries 331 continue; 332 } 333 // Take off the trailing '/' 334 String name = path.substring(0, path.length() - 1); 335 embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip)); 336 } 337 else if (type.equals("text/xml")) { 338 // XML entries are either embedded StarOffice doc entries or main 339 // document entries 340 continue; 341 } 342 else { // FIX (HJ): allows empty MIME type 343 embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip)); 344 } 345 } 346 } 347 348 return embeddedObjects.values().iterator(); 349 } 350 351 /** 352 * Returns the embedded object corresponding to the name provided. 353 * The name should be stripped of any preceding path characters, such as 354 * '/', '.' or '#'. 355 * 356 * @param name The name of the embedded object to retrieve. 357 * 358 * @return An <code>EmbeddedObject</code> instance representing the named 359 * object. 360 */ getEmbeddedObject(String name)361 public EmbeddedObject getEmbeddedObject(String name) { 362 if (name == null) { 363 return null; 364 } 365 366 if (embeddedObjects == null) { 367 getEmbeddedObjects(); 368 } 369 370 if (embeddedObjects.containsKey(name)) { 371 return (EmbeddedObject)embeddedObjects.get(name); 372 } 373 else { 374 return null; 375 } 376 } 377 378 379 /** 380 * Adds a new embedded object to the document. 381 * 382 * @param embObj An instance of <code>EmbeddedObject</code>. 383 */ addEmbeddedObject(EmbeddedObject embObj)384 public void addEmbeddedObject(EmbeddedObject embObj) { 385 if (embObj == null) { 386 return; 387 } 388 389 if (embeddedObjects == null) { 390 embeddedObjects = new HashMap(); 391 } 392 393 embeddedObjects.put(embObj.getName(), embObj); 394 } 395 396 397 /** 398 * Read the Office <code>Document</code> from the given 399 * <code>InputStream</code>. 400 * 401 * @param is Office document <code>InputStream</code>. 402 * 403 * @throws IOException If any I/O error occurs. 404 */ read(InputStream is)405 public void read(InputStream is) throws IOException { 406 407 Debug.log(Debug.INFO, "reading Office file"); 408 409 DocumentBuilder builder = null; 410 411 try { 412 builder = factory.newDocumentBuilder(); 413 } catch (ParserConfigurationException ex) { 414 throw new OfficeDocumentException(ex); 415 } 416 417 // read in Office zip file format 418 419 zip = new OfficeZip(); 420 zip.read(is); 421 422 // grab the content.xml and 423 // parse it into contentDoc. 424 425 byte contentBytes[] = zip.getContentXMLBytes(); 426 427 if (contentBytes == null) { 428 429 throw new OfficeDocumentException("Entry content.xml not found in file"); 430 } 431 432 try { 433 434 contentDoc = parse(builder, contentBytes); 435 436 } catch (SAXException ex) { 437 438 throw new OfficeDocumentException(ex); 439 } 440 441 // if style.xml exists, grab the style.xml 442 // parse it into styleDoc. 443 444 byte styleBytes[] = zip.getStyleXMLBytes(); 445 446 if (styleBytes != null) { 447 448 try { 449 450 styleDoc = parse(builder, styleBytes); 451 452 } catch (SAXException ex) { 453 454 throw new OfficeDocumentException(ex); 455 } 456 } 457 458 byte metaBytes[] = zip.getMetaXMLBytes(); 459 460 if (metaBytes != null) { 461 462 try { 463 464 metaDoc = parse(builder, metaBytes); 465 466 } catch (SAXException ex) { 467 468 throw new OfficeDocumentException(ex); 469 } 470 } 471 472 byte settingsBytes[] = zip.getSettingsXMLBytes(); 473 474 if (settingsBytes != null) { 475 476 try { 477 478 settingsDoc = parse(builder, settingsBytes); 479 480 } catch (SAXException ex) { 481 482 throw new OfficeDocumentException(ex); 483 } 484 } 485 486 487 // Read in the META-INF/manifest.xml file 488 byte manifestBytes[] = zip.getManifestXMLBytes(); 489 490 if (manifestBytes != null) { 491 492 try { 493 manifestDoc = parse(builder, manifestBytes); 494 } catch (SAXException ex) { 495 throw new OfficeDocumentException(ex); 496 } 497 } 498 499 } 500 501 502 /** 503 * Read the Office <code>Document</code> from the given 504 * <code>InputStream</code>. 505 * 506 * @param is Office document <code>InputStream</code>. 507 * @param isZip <code>boolean</code> Identifies whether 508 * a file is zipped or not 509 * 510 * @throws IOException If any I/O error occurs. 511 */ read(InputStream is, boolean isZip)512 public void read(InputStream is, boolean isZip) throws IOException { 513 514 Debug.log(Debug.INFO, "reading Office file"); 515 516 DocumentBuilder builder = null; 517 518 try { 519 builder = factory.newDocumentBuilder(); 520 } catch (ParserConfigurationException ex) { 521 throw new OfficeDocumentException(ex); 522 } 523 524 if (isZip) 525 { 526 read(is); 527 } 528 else{ 529 try{ 530 //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating()); 531 //contentDoc= builder.parse((InputStream)is); 532 533 Reader r = secondHack(is); 534 InputSource ins = new InputSource(r); 535 org.w3c.dom.Document newDoc = builder.parse(ins); 536 //org.w3c.dom.Document newDoc = builder.parse((InputStream)is); 537 Element rootElement=newDoc.getDocumentElement(); 538 539 NodeList nodeList; 540 Node tmpNode; 541 Node rootNode = (Node)rootElement; 542 if (newDoc !=null){ 543 /*content*/ 544 contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); 545 rootElement=contentDoc.getDocumentElement(); 546 rootNode = (Node)rootElement; 547 548 // FIX (HJ): Include office:font-decls in content DOM 549 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); 550 if (nodeList.getLength()>0){ 551 tmpNode = contentDoc.importNode(nodeList.item(0),true); 552 rootNode.appendChild(tmpNode); 553 } 554 555 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); 556 if (nodeList.getLength()>0){ 557 tmpNode = contentDoc.importNode(nodeList.item(0),true); 558 rootNode.appendChild(tmpNode); 559 } 560 561 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY); 562 if (nodeList.getLength()>0){ 563 tmpNode = contentDoc.importNode(nodeList.item(0),true); 564 rootNode.appendChild(tmpNode); 565 } 566 567 /*Styles*/ 568 styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); 569 rootElement=styleDoc.getDocumentElement(); 570 rootNode = (Node)rootElement; 571 572 // FIX (HJ): Include office:font-decls in styles DOM 573 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); 574 if (nodeList.getLength()>0){ 575 tmpNode = styleDoc.importNode(nodeList.item(0),true); 576 rootNode.appendChild(tmpNode); 577 } 578 579 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES); 580 if (nodeList.getLength()>0){ 581 tmpNode = styleDoc.importNode(nodeList.item(0),true); 582 rootNode.appendChild(tmpNode); 583 } 584 585 // FIX (HJ): Include office:automatic-styles in styles DOM 586 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); 587 if (nodeList.getLength()>0){ 588 tmpNode = styleDoc.importNode(nodeList.item(0),true); 589 rootNode.appendChild(tmpNode); 590 } 591 592 // FIX (HJ): Include office:master-styles in styles DOM 593 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); 594 if (nodeList.getLength()>0){ 595 tmpNode = styleDoc.importNode(nodeList.item(0),true); 596 rootNode.appendChild(tmpNode); 597 } 598 599 /*Settings*/ 600 settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS); 601 rootElement=settingsDoc.getDocumentElement(); 602 rootNode = (Node)rootElement; 603 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); 604 if (nodeList.getLength()>0){ 605 tmpNode = settingsDoc.importNode(nodeList.item(0),true); 606 rootNode.appendChild(tmpNode); 607 } 608 /*Meta*/ 609 metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META); 610 rootElement=metaDoc.getDocumentElement(); 611 rootNode = (Node)rootElement; 612 nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META); 613 if (nodeList.getLength()>0){ 614 tmpNode = metaDoc.importNode(nodeList.item(0),true); 615 rootNode.appendChild(tmpNode); 616 } 617 } 618 } 619 catch (SAXException ex) { 620 throw new OfficeDocumentException(ex); 621 } 622 } 623 624 } 625 626 627 628 /** 629 * Parse given <code>byte</code> array into a DOM 630 * <code>Document</code> object using the 631 * <code>DocumentBuilder</code> object. 632 * 633 * @param builder <code>DocumentBuilder</code> object for parsing. 634 * @param bytes <code>byte</code> array for parsing. 635 * 636 * @return Resulting DOM <code>Document</code> object. 637 * 638 * @throws SAXException If any parsing error occurs. 639 */ parse(DocumentBuilder builder, byte bytes[])640 static Document parse(DocumentBuilder builder, byte bytes[]) 641 throws SAXException, IOException { 642 643 Document doc = null; 644 645 ByteArrayInputStream is = new ByteArrayInputStream(bytes); 646 647 // TODO: replace hack with a more appropriate fix. 648 649 Reader r = hack(is); 650 InputSource ins = new InputSource(r); 651 doc = builder.parse(ins); 652 653 return doc; 654 } 655 656 657 /** 658 * Method to return the MIME type of the document. 659 * 660 * @return String The document's MIME type. 661 */ getDocumentMimeType()662 protected abstract String getDocumentMimeType(); 663 664 665 /** 666 * Write out Office ZIP file format. 667 * 668 * @param os XML <code>OutputStream</code>. 669 * 670 * @throws IOException If any I/O error occurs. 671 */ write(OutputStream os)672 public void write(OutputStream os) throws IOException { 673 if (zip == null) { 674 zip = new OfficeZip(); 675 } 676 677 initManifestDOM(); 678 679 Element domEntry; 680 Element manifestRoot = manifestDoc.getDocumentElement(); 681 682 // The EmbeddedObjects come first. 683 Iterator embObjs = getEmbeddedObjects(); 684 while (embObjs.hasNext()) { 685 EmbeddedObject obj = (EmbeddedObject)embObjs.next(); 686 obj.writeManifestData(manifestDoc); 687 688 obj.write(zip); 689 } 690 691 // Add in the entry for the Pictures directory. Always present. 692 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); 693 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/"); 694 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, ""); 695 manifestRoot.appendChild(domEntry); 696 697 // Write content to the Zip file and then write any of the optional 698 // data, if it exists. 699 zip.setContentXMLBytes(docToBytes(contentDoc)); 700 701 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); 702 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml"); 703 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); 704 705 manifestRoot.appendChild(domEntry); 706 707 if (styleDoc != null) { 708 zip.setStyleXMLBytes(docToBytes(styleDoc)); 709 710 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); 711 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml"); 712 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); 713 manifestRoot.appendChild(domEntry); 714 } 715 716 if (metaDoc != null) { 717 zip.setMetaXMLBytes(docToBytes(metaDoc)); 718 719 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); 720 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml"); 721 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); 722 manifestRoot.appendChild(domEntry); 723 } 724 725 if (settingsDoc != null) { 726 zip.setSettingsXMLBytes(docToBytes(settingsDoc)); 727 728 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE); 729 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml"); 730 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml"); 731 manifestRoot.appendChild(domEntry); 732 } 733 734 zip.setManifestXMLBytes(docToBytes(manifestDoc)); 735 736 zip.write(os); 737 } 738 739 740 /** 741 * Write out Office ZIP file format. 742 * 743 * @param os XML <code>OutputStream</code>. 744 * @param isZip <code>boolean</code> 745 * 746 * @throws IOException If any I/O error occurs. 747 */ write(OutputStream os, boolean isZip)748 public void write(OutputStream os, boolean isZip) throws IOException { 749 750 // Create an OfficeZip object if one does not exist. 751 if (isZip){ 752 write(os); 753 } 754 else{ 755 try{ 756 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); 757 DocumentBuilder builder= builderFactory.newDocumentBuilder(); 758 DOMImplementation domImpl = builder.getDOMImplementation(); 759 DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null); 760 org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null); 761 762 763 Element rootElement=newDoc.getDocumentElement(); 764 rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office"); 765 rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" ); 766 rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text"); 767 rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table"); 768 769 rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing"); 770 rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" ); 771 rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" ); 772 rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" ); 773 rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" ); 774 rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" ); 775 rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" ); 776 rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" ); 777 rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" ); 778 rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" ); 779 rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" ); 780 rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" ); 781 rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" ); 782 // #i41033# OASIS format needs the "office:class" set. 783 if(getDocumentMimeType() == SXC_MIME_TYPE) 784 rootElement.setAttribute("office:class","spreadsheet" ); 785 else if(getDocumentMimeType() == SXW_MIME_TYPE) 786 rootElement.setAttribute("office:class","text" ); 787 rootElement.setAttribute("office:version","1.0"); 788 789 790 NodeList nodeList; 791 Node tmpNode; 792 Node rootNode = (Node)rootElement; 793 if (metaDoc !=null){ 794 nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META); 795 if (nodeList.getLength()>0){ 796 tmpNode = newDoc.importNode(nodeList.item(0),true); 797 rootNode.appendChild(tmpNode); 798 } 799 }if (styleDoc !=null){ 800 nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES); 801 if (nodeList.getLength()>0){ 802 tmpNode = newDoc.importNode(nodeList.item(0),true); 803 rootNode.appendChild(tmpNode); 804 } 805 806 }if (settingsDoc !=null){ 807 nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); 808 if (nodeList.getLength()>0){ 809 tmpNode = newDoc.importNode(nodeList.item(0),true); 810 rootNode.appendChild(tmpNode); 811 } 812 } 813 if (contentDoc !=null){ 814 nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); 815 if (nodeList.getLength()>0){ 816 tmpNode = newDoc.importNode(nodeList.item(0),true); 817 rootNode.appendChild(tmpNode); 818 } 819 820 nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY); 821 if (nodeList.getLength()>0){ 822 tmpNode = newDoc.importNode(nodeList.item(0),true); 823 rootNode.appendChild(tmpNode); 824 } 825 } 826 827 byte contentBytes[] = docToBytes(newDoc); 828 //System.out.println(new String(contentBytes)); 829 os.write(contentBytes); 830 } 831 catch(Exception exc){ 832 System.out.println("\nException in OfficeDocument.write():" +exc); 833 } 834 //byte contentBytes[] = docToBytes(contentDoc); 835 } 836 } 837 838 839 /** 840 * <p>Write out a <code>org.w3c.dom.Document</code> object into a 841 * <code>byte</code> array.</p> 842 * 843 * <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument 844 * package!</p> 845 * 846 * @param doc DOM <code>Document</code> object. 847 * 848 * @return <code>byte</code> array of DOM <code>Document</code> 849 * object. 850 * 851 * @throws IOException If any I/O error occurs. 852 */ docToBytes(Document doc)853 static byte[] docToBytes(Document doc) 854 throws IOException { 855 856 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 857 858 java.lang.reflect.Constructor con; 859 java.lang.reflect.Method meth; 860 861 String domImpl = doc.getClass().getName(); 862 863 /* 864 * We may have multiple XML parsers in the Classpath. 865 * Depending on which one is first, the actual type of 866 * doc may vary. Need a way to find out which API is being 867 * used and use an appropriate serialization method. 868 */ 869 870 try { 871 // First of all try for JAXP 1.0 872 if (domImpl.equals("com.sun.xml.tree.XmlDocument")) { 873 874 Debug.log(Debug.INFO, "Using JAXP"); 875 876 Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument"); 877 878 // The method is in the XMLDocument class itself, not a helper 879 meth = jaxpDoc.getMethod("write", 880 new Class[] { Class.forName("java.io.OutputStream") } ); 881 882 meth.invoke(doc, new Object [] { baos } ); 883 } 884 else if (domImpl.equals("org.apache.crimson.tree.XmlDocument")) 885 { 886 Debug.log(Debug.INFO, "Using Crimson"); 887 888 Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument"); 889 // The method is in the XMLDocument class itself, not a helper 890 meth = crimsonDoc.getMethod("write", 891 new Class[] { Class.forName("java.io.OutputStream") } ); 892 893 meth.invoke(doc, new Object [] { baos } ); 894 } 895 else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl") 896 || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) { 897 898 Debug.log(Debug.INFO, "Using Xerces"); 899 900 // Try for Xerces 901 Class xercesSer = 902 Class.forName("org.apache.xml.serialize.XMLSerializer"); 903 904 // Get the OutputStream constructor 905 // May want to use the OutputFormat parameter at some stage too 906 con = xercesSer.getConstructor(new Class [] 907 { Class.forName("java.io.OutputStream"), 908 Class.forName("org.apache.xml.serialize.OutputFormat") } ); 909 910 911 // Get the serialize method 912 meth = xercesSer.getMethod("serialize", 913 new Class [] { Class.forName("org.w3c.dom.Document") } ); 914 915 916 // Get an instance 917 Object serializer = con.newInstance(new Object [] { baos, null } ); 918 919 920 // Now call serialize to write the document 921 meth.invoke(serializer, new Object [] { doc } ); 922 } 923 else if (domImpl.equals("gnu.xml.dom.DomDocument")) { 924 Debug.log(Debug.INFO, "Using GNU"); 925 926 Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer"); 927 928 // Get the serialize method 929 meth = gnuSer.getMethod("serialize", 930 new Class [] { Class.forName("org.w3c.dom.Node"), 931 Class.forName("java.io.OutputStream") } ); 932 933 // Get an instance 934 Object serializer = gnuSer.newInstance(); 935 936 // Now call serialize to write the document 937 meth.invoke(serializer, new Object [] { doc, baos } ); 938 } 939 else { 940 try { 941 DOMSource domSource = new DOMSource(doc); 942 StringWriter writer = new StringWriter(); 943 StreamResult result = new StreamResult(writer); 944 TransformerFactory tf = TransformerFactory.newInstance(); 945 Transformer transformer = tf.newTransformer(); 946 transformer.transform(domSource, result); 947 return writer.toString().getBytes(); 948 } 949 catch (Exception e) { 950 // We don't have another parser 951 throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl); 952 } 953 } 954 } 955 catch (ClassNotFoundException cnfe) { 956 throw new IOException(cnfe.toString()); 957 } 958 catch (Exception e) { 959 // We may get some other errors, but the bottom line is that 960 // the steps being executed no longer work 961 throw new IOException(e.toString()); 962 } 963 964 byte bytes[] = baos.toByteArray(); 965 966 return bytes; 967 } 968 969 970 /** 971 * Initializes a new DOM <code>Document</code> with the content 972 * containing minimum OpenOffice XML tags. 973 * 974 * @throws IOException If any I/O error occurs. 975 */ initContentDOM()976 public final void initContentDOM() throws IOException { 977 978 contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); 979 980 // this is a work-around for a bug in Office6.0 - not really 981 // needed but StarCalc 6.0 will crash without this tag. 982 Element root = contentDoc.getDocumentElement(); 983 984 Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS); 985 root.appendChild(child); 986 987 child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); 988 root.appendChild(child); 989 990 child = contentDoc.createElement(TAG_OFFICE_BODY); 991 root.appendChild(child); 992 } 993 994 /** 995 * Initializes a new DOM <code>Document</code> with the content 996 * containing minimum OpenOffice XML tags. 997 * 998 * @throws IOException If any I/O error occurs. 999 */ initSettingsDOM()1000 public final void initSettingsDOM() throws IOException { 1001 1002 settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS); 1003 1004 // this is a work-around for a bug in Office6.0 - not really 1005 // needed but StarCalc 6.0 will crash without this tag. 1006 Element root = settingsDoc.getDocumentElement(); 1007 1008 Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS); 1009 root.appendChild(child); 1010 } 1011 1012 /** 1013 * Initializes a new DOM Document with styles 1014 * containing minimum OpenOffice XML tags. 1015 * 1016 * @throws IOException If any I/O error occurs. 1017 */ initStyleDOM()1018 public final void initStyleDOM() throws IOException { 1019 1020 styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); 1021 } 1022 1023 /** 1024 * <p>Creates a new DOM <code>Document</code> containing minimum 1025 * OpenOffice.org XML tags.</p> 1026 * 1027 * <p>This method uses the subclass 1028 * <code>getOfficeClassAttribute</code> method to get the 1029 * attribute for <i>office:class</i>.</p> 1030 * 1031 * @param rootName root name of <code>Document</code>. 1032 * 1033 * @throws IOException If any I/O error occurs. 1034 */ createSettingsDOM(String rootName)1035 private final Document createSettingsDOM(String rootName) throws IOException { 1036 1037 Document doc = null; 1038 1039 try { 1040 1041 DocumentBuilder builder = factory.newDocumentBuilder(); 1042 doc = builder.newDocument(); 1043 1044 } catch (ParserConfigurationException ex) { 1045 1046 throw new OfficeDocumentException(ex); 1047 1048 } 1049 1050 Element root = (Element) doc.createElement(rootName); 1051 doc.appendChild(root); 1052 1053 root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); 1054 root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink"); 1055 root.setAttribute("xmlns:config", "http://openoffice.org/2001/config"); 1056 root.setAttribute("office:version", "1.0"); 1057 1058 return doc; 1059 } 1060 1061 1062 /** 1063 * <p>Creates a new DOM <code>Document</code> containing minimum 1064 * OpenOffice.org XML tags.</p> 1065 * 1066 * <p>This method uses the subclass 1067 * <code>getOfficeClassAttribute</code> method to get the 1068 * attribute for <i>office:class</i>.</p> 1069 * 1070 * @param rootName root name of <code>Document</code>. 1071 * 1072 * @throws IOException If any I/O error occurs. 1073 */ createDOM(String rootName)1074 private final Document createDOM(String rootName) throws IOException { 1075 1076 Document doc = null; 1077 1078 try { 1079 1080 DocumentBuilder builder = factory.newDocumentBuilder(); 1081 doc = builder.newDocument(); 1082 1083 } catch (ParserConfigurationException ex) { 1084 1085 throw new OfficeDocumentException(ex); 1086 1087 } 1088 1089 Element root = (Element) doc.createElement(rootName); 1090 doc.appendChild(root); 1091 1092 root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); 1093 root.setAttribute("xmlns:style", "http://openoffice.org/2000/style"); 1094 root.setAttribute("xmlns:text", "http://openoffice.org/2000/text"); 1095 root.setAttribute("xmlns:table", "http://openoffice.org/2000/table"); 1096 root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing"); 1097 root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format"); 1098 root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); 1099 root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle"); 1100 root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg"); 1101 root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart"); 1102 root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d"); 1103 root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML"); 1104 root.setAttribute("xmlns:form", "http://openoffice.org/2000/form"); 1105 root.setAttribute("xmlns:script", "http://openoffice.org/2000/script"); 1106 root.setAttribute("office:class", getOfficeClassAttribute()); 1107 root.setAttribute("office:version", "1.0"); 1108 1109 return doc; 1110 } 1111 1112 1113 /** 1114 * Return the <i>office:class</i> attribute value. 1115 * 1116 * @return The attribute value. 1117 */ getOfficeClassAttribute()1118 protected abstract String getOfficeClassAttribute(); 1119 1120 1121 /** 1122 * <p>Hacked code to filter <!DOCTYPE> tag before 1123 * sending stream to parser.</p> 1124 * 1125 * <p>This hacked code needs to be changed later on.</p> 1126 * 1127 * <p>Issue: using current jaxp1.0 parser, there is no way 1128 * to turn off processing of dtds. Current set of dtds 1129 * have bugs, processing them will throw exceptions.</p> 1130 * 1131 * <p>This is a simple hack that assumes the whole <!DOCTYPE> 1132 * tag are all in the same line. This is sufficient for 1133 * current StarOffice 6.0 generated XML files. Since this 1134 * hack really needs to go away, I don't want to spend 1135 * too much time in making it a perfect hack.</p> 1136 * FIX (HJ): Removed requirement for DOCTYPE to be in one line 1137 * FIX (HJ): No longer removes newlines 1138 * 1139 * @param is <code>InputStream</code> to be filtered. 1140 * 1141 * @return Reader value without the <!DOCTYPE> tag. 1142 * 1143 * @throws IOException If any I/O error occurs. 1144 */ hack(InputStream is)1145 private static Reader hack(InputStream is) throws IOException { 1146 1147 BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); 1148 StringBuffer buffer = new StringBuffer(); 1149 1150 String str = null; 1151 1152 while ((str = br.readLine()) != null) { 1153 1154 int sIndex = str.indexOf("<!DOCTYPE"); 1155 1156 if (sIndex > -1) { 1157 1158 buffer.append(str.substring(0, sIndex)); 1159 1160 int eIndex = str.indexOf('>', sIndex + 8 ); 1161 1162 if (eIndex > -1) { 1163 1164 buffer.append(str.substring(eIndex + 1, str.length())); 1165 // FIX (HJ): Preserve the newline 1166 buffer.append("\n"); 1167 1168 } else { 1169 1170 // FIX (HJ): More than one line. Search for '>' in following lines 1171 boolean bOK = false; 1172 while ((str = br.readLine())!=null) { 1173 eIndex = str.indexOf('>'); 1174 if (eIndex>-1) { 1175 buffer.append(str.substring(eIndex+1)); 1176 // FIX (HJ): Preserve the newline 1177 buffer.append("\n"); 1178 bOK = true; 1179 break; 1180 } 1181 } 1182 1183 if (!bOK) { throw new IOException("Invalid XML"); } 1184 } 1185 1186 } else { 1187 1188 buffer.append(str); 1189 // FIX (HJ): Preserve the newline 1190 buffer.append("\n"); 1191 } 1192 } 1193 1194 StringReader r = new StringReader(buffer.toString()); 1195 return r; 1196 } 1197 1198 /** 1199 * <p>Transform the InputStream to a Reader Stream.</p> 1200 * 1201 * <p>This hacked code needs to be changed later on.</p> 1202 * 1203 * <p>Issue: the new oasis input file stream means 1204 * that the old input stream fails. see #i33702# </p> 1205 * 1206 * @param is <code>InputStream</code> to be filtered. 1207 * 1208 * @return Reader value of the InputStream(). 1209 * 1210 * @throws IOException If any I/O error occurs. 1211 */ secondHack(InputStream is)1212 private static Reader secondHack(InputStream is) throws IOException { 1213 1214 BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); 1215 char[] charArray = new char[4096]; 1216 StringBuffer sBuf = new StringBuffer(); 1217 int n = 0; 1218 while ((n=br.read(charArray, 0, charArray.length)) > 0) 1219 sBuf.append(charArray, 0, n); 1220 1221 // ensure there is no trailing garbage after the end of the stream. 1222 int sIndex = sBuf.lastIndexOf("</office:document>"); 1223 sBuf.delete(sIndex, sBuf.length()); 1224 sBuf.append("</office:document>"); 1225 StringReader r = new StringReader(sBuf.toString()); 1226 return r; 1227 } 1228 1229 1230 /** 1231 * Method to create the initial entries in the manifest.xml file stored 1232 * in an SX? file. 1233 */ initManifestDOM()1234 private void initManifestDOM() throws IOException { 1235 1236 try { 1237 DocumentBuilder builder = factory.newDocumentBuilder(); 1238 DOMImplementation domImpl = builder.getDOMImplementation(); 1239 1240 DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT, 1241 "-//OpenOffice.org//DTD Manifest 1.0//EN", 1242 "Manifest.dtd"); 1243 manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType); 1244 } catch (ParserConfigurationException ex) { 1245 throw new OfficeDocumentException(ex); 1246 } 1247 1248 // Add the <manifest:manifest> entry 1249 Element manifestRoot = manifestDoc.getDocumentElement(); 1250 1251 manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest"); 1252 1253 Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE); 1254 1255 docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/"); 1256 docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType()); 1257 1258 manifestRoot.appendChild(docRoot); 1259 } 1260 } 1261 1262