1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.wordsmith; 25 26 import org.w3c.dom.NodeList; 27 import org.w3c.dom.Node; 28 import org.w3c.dom.NamedNodeMap; 29 import org.w3c.dom.Element; 30 31 import java.io.IOException; 32 33 import org.openoffice.xmerge.Document; 34 import org.openoffice.xmerge.ConvertData; 35 import org.openoffice.xmerge.ConvertException; 36 import org.openoffice.xmerge.DocumentSerializer; 37 import org.openoffice.xmerge.ConverterCapabilities; 38 import org.openoffice.xmerge.converter.xml.OfficeConstants; 39 import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; 40 import org.openoffice.xmerge.converter.palm.PalmDB; 41 import org.openoffice.xmerge.converter.palm.PdbEncoder; 42 import org.openoffice.xmerge.converter.palm.Record; 43 import org.openoffice.xmerge.converter.palm.PdbUtil; 44 import org.openoffice.xmerge.converter.palm.PalmDocument; 45 import org.openoffice.xmerge.converter.xml.OfficeDocument; 46 import org.openoffice.xmerge.util.*; 47 import org.openoffice.xmerge.converter.xml.*; 48 49 /** 50 * <p>WordSmith implementation of 51 * org.openoffice.xmerge.DocumentSerializer 52 * for the {@link 53 * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl 54 * PluginFactoryImpl}.</p> 55 * 56 * <p>The <code>serialize</code> method traverses the DOM 57 * document from the given <code>Document</code> object. It uses a 58 * <code>DocEncoder</code> object for the actual conversion of 59 * contents to the WordSmith format.</p> 60 * 61 * @author Herbie Ong, David Proulx 62 */ 63 64 // DJP: take out "implements OfficeConstants" 65 public final class DocumentSerializerImpl 66 implements OfficeConstants, DocumentSerializer { 67 68 /** A WSEncoder object for encoding to WordSmith. */ 69 private WSEncoder encoder = null; 70 71 /** The <code>StyleCatalog</code>. */ 72 private StyleCatalog styleCat = null; 73 74 private WseFontTable fontTable = new WseFontTable(); 75 private WseColorTable colorTable = new WseColorTable(); 76 77 /** 78 * The <code>SxwDocument</code> object that this converter 79 * processes. 80 */ 81 private SxwDocument sxwDoc = null; 82 83 /** 84 * Constructor. 85 * 86 * @param doc The <code>Document</code> to convert. 87 */ DocumentSerializerImpl(Document doc)88 public DocumentSerializerImpl(Document doc) { 89 sxwDoc = (SxwDocument) doc; 90 } 91 92 93 /** 94 * <p>Method to convert a <code>Document</code> into a 95 * <code>PalmDocument</code>.</p> 96 * 97 * <p>This method is not thread safe for performance reasons. 98 * This method should not be called from within two threads. 99 * It would be best to call this method only once per object 100 * instance.</p> 101 * 102 * <p>Note that the doc parameter needs to be an XML 103 * <code>Document</code>, else this method will throw a 104 * <code>ClassCastException</code>. I think this is a hack, 105 * but this is the only way to not modify most of the existing 106 * code right now.</p> 107 * 108 * @param doc Input should be an XML <code>Document</code> 109 * object 110 * @param os Output of <code>PalmDB</code> object 111 * 112 * @throws ConvertException If any conversion error occurs. 113 * @throws IOException If any I/O error occurs. 114 */ serialize()115 public ConvertData serialize() 116 throws IOException { 117 118 119 // get the server document name 120 String docName = sxwDoc.getName(); 121 122 // get DOM document 123 org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); 124 125 // Create WordSmith encoder object. Add WordSmith header, 126 // empty font table to it. 127 encoder = new WSEncoder(); 128 encoder.addElement(fontTable); 129 encoder.addElement(colorTable); 130 131 // Read the styles into the style catalog 132 String families[] = new String[3]; 133 families[0] = "text"; 134 families[1] = "paragraph"; 135 families[2] = "paragraph"; 136 Class classes[] = new Class[3]; 137 classes[0] = TextStyle.class; 138 classes[1] = ParaStyle.class; 139 classes[2] = TextStyle.class; 140 styleCat = new StyleCatalog(25); 141 142 // Parse the input document 143 // DJP todo: eliminate multiple calls to add() when it can 144 // recurse properly. 145 NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); 146 styleCat.add(nl.item(0), families, classes, null, false); 147 nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); 148 styleCat.add(nl.item(0), families, classes, null, false); 149 nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); 150 styleCat.add(nl.item(0), families, classes, null, false); 151 152 // Traverse to the office:body element. 153 // There should only be one. 154 NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); 155 int len = list.getLength(); 156 if (len > 0) { 157 Node node = list.item(0); 158 traverseBody(node); 159 } 160 161 // create a PalmDB object and ConvertData object. 162 // 163 Record records[] = encoder.getRecords(); 164 165 ConvertData cd = new ConvertData(); 166 PalmDocument palmDoc = new PalmDocument(docName, 167 PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0, 168 PalmDB.PDB_HEADER_ATTR_BACKUP, records); 169 cd.addDocument(palmDoc); 170 return cd; 171 } 172 173 174 /** 175 * This method traverses <i>office:body</i> element. 176 * 177 * @param node <i>office:body</i> <code>Node</code>. 178 * 179 * @throws IOException If any I/O error occurs. 180 */ traverseBody(Node node)181 private void traverseBody(Node node) throws IOException { 182 183 if (node.hasChildNodes()) { 184 185 NodeList nodeList = node.getChildNodes(); 186 int len = nodeList.getLength(); 187 188 for (int i = 0; i < len; i++) { 189 190 Node child = nodeList.item(i); 191 192 if (child.getNodeType() == Node.ELEMENT_NODE) { 193 String nodeName = child.getNodeName(); 194 195 if (nodeName.equals(TAG_PARAGRAPH) || 196 nodeName.equals(TAG_HEADING)) { 197 198 traverseParagraph(child); 199 200 } else if (nodeName.equals(TAG_UNORDERED_LIST)) { 201 202 traverseList(child); 203 204 } else if (nodeName.equals(TAG_ORDERED_LIST)) { 205 206 traverseList(child); 207 208 } else { 209 210 Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); 211 } 212 } 213 } 214 } 215 216 } 217 218 219 /** 220 * This method traverses the <i>text:p</i> and <i>text:h</i> 221 * element <code>Node</code> objects. 222 * 223 * @param node A <i>text:p</i> or <i>text:h</i> <code>Node</code>. 224 * 225 * @throws IOException If any I/O error occurs. 226 */ traverseParagraph(Node node)227 private void traverseParagraph(Node node) throws IOException { 228 229 String styleName = findAttribute(node, "text:style-name"); 230 ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", 231 null, ParaStyle.class); 232 233 // If the style does not exist in the style catalog for some reason, 234 // make up a default style and use it. We'll have to add this default 235 // style to the style catalog the first time it is used. 236 if (pstyle == null) { 237 styleName = "CONVERTER-DEFAULT"; 238 pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null, 239 ParaStyle.class); 240 if (pstyle == null) { 241 pstyle = new ParaStyle(styleName, "paragraph", null, 242 (String [])null, null, styleCat); 243 styleCat.add(pstyle); 244 styleCat.add(new TextStyle(styleName, "paragraph", null, 245 0, 0, 12, "Times-Roman", styleCat)); 246 } 247 } 248 249 pstyle = (ParaStyle)pstyle.getResolved(); 250 encoder.addElement(new WsePara(pstyle, styleCat)); 251 TextStyle defParaTextStyle = (TextStyle) 252 styleCat.lookup(styleName, "paragraph", null, TextStyle.class); 253 254 traverseParaContents(node, defParaTextStyle); 255 } 256 257 258 /** 259 * This method traverses a paragraph content. Note that this 260 * method may recurse to call itself. 261 * 262 * @param node A paragraph or content <code>Node</code> 263 */ traverseParaContents(Node node, TextStyle defTextStyle)264 private void traverseParaContents(Node node, TextStyle defTextStyle) { 265 266 String styleName = findAttribute(node, "text:style-name"); 267 TextStyle style = (TextStyle) 268 styleCat.lookup(styleName, "text", null, TextStyle.class); 269 270 if (node.hasChildNodes()) { 271 NodeList nodeList = node.getChildNodes(); 272 int nChildren = nodeList.getLength(); 273 274 for (int i = 0; i < nChildren; i++) { 275 Node child = nodeList.item(i); 276 277 if (child.getNodeType() == Node.TEXT_NODE) { 278 279 // this is for grabbing text nodes. 280 String s = child.getNodeValue(); 281 282 if (s.length() > 0) { 283 if (style != null) 284 encoder.addElement(new WseTextRun(s, style, styleCat, 285 fontTable, colorTable)); 286 else 287 encoder.addElement(new WseTextRun(s, defTextStyle, 288 styleCat, fontTable, colorTable)); 289 } 290 291 } else if (child.getNodeType() == Node.ELEMENT_NODE) { 292 293 String childNodeName = child.getNodeName(); 294 295 if (childNodeName.equals(TAG_SPACE)) { 296 297 // this is for text:s tags. 298 NamedNodeMap map = child.getAttributes(); 299 Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); 300 StringBuffer space = new StringBuffer(" "); 301 int count = 1; 302 303 if (attr != null) { 304 try { 305 String countStr = attr.getNodeValue(); 306 count = Integer.parseInt(countStr.trim()); 307 } catch (NumberFormatException e) { 308 Debug.log(Debug.ERROR, "Problem parsing space tag", e); 309 } 310 } 311 312 for (int j = 1; j < count; j++) 313 space.append(" "); 314 315 encoder.addElement(new WseTextRun(space.toString(), 316 defTextStyle, 317 styleCat, fontTable, colorTable)); 318 Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />"); 319 320 } else if (childNodeName.equals(TAG_TAB_STOP)) { 321 322 // this is for text:tab-stop 323 encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat, 324 fontTable, colorTable)); 325 326 Debug.log(Debug.INFO, "<TAB/>"); 327 328 } else if (childNodeName.equals(TAG_LINE_BREAK)) { 329 330 // this is for text:line-break 331 encoder.addElement(new WseTextRun("\n", defTextStyle, 332 styleCat, fontTable, colorTable)); 333 334 Debug.log(Debug.INFO, "<LINE-BREAK/>"); 335 336 } else if (childNodeName.equals(TAG_SPAN)) { 337 338 // this is for text:span 339 Debug.log(Debug.INFO, "<SPAN>"); 340 traverseParaContents(child, defTextStyle); 341 Debug.log(Debug.INFO, "</SPAN>"); 342 343 } else if (childNodeName.equals(TAG_HYPERLINK)) { 344 345 // this is for text:a 346 Debug.log(Debug.INFO, "<HYPERLINK>"); 347 traverseParaContents(child, defTextStyle); 348 Debug.log(Debug.INFO, "<HYPERLINK/>"); 349 350 } else if (childNodeName.equals(TAG_BOOKMARK) || 351 childNodeName.equals(TAG_BOOKMARK_START)) { 352 353 Debug.log(Debug.INFO, "<BOOKMARK/>"); 354 355 } else { 356 357 Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); 358 } 359 360 } 361 362 } 363 } 364 } 365 366 367 /** 368 * This method traverses list tags <i>text:unordered-list</i> and 369 * <i>text:ordered-list</i>. A list can only contain one optional 370 * <i>text:list-header</i> and one or more <i>text:list-item</i> 371 * elements. 372 * 373 * @param node A list <code>Node</code>. 374 * 375 * @throws IOException If any I/O error occurs. 376 */ traverseList(Node node)377 private void traverseList(Node node) throws IOException { 378 379 Debug.log(Debug.TRACE, "<LIST>"); 380 381 if (node.hasChildNodes()) { 382 383 NodeList nodeList = node.getChildNodes(); 384 int len = nodeList.getLength(); 385 386 for (int i = 0; i < len; i++) { 387 388 Node child = nodeList.item(i); 389 390 if (child.getNodeType() == Node.ELEMENT_NODE) { 391 392 String nodeName = child.getNodeName(); 393 394 if (nodeName.equals(TAG_LIST_ITEM)) { 395 396 traverseListItem(child); 397 398 } else if (nodeName.equals(TAG_LIST_HEADER)) { 399 400 traverseListHeader(child); 401 402 } else { 403 404 Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); 405 } 406 } 407 } 408 } 409 410 Debug.log(Debug.TRACE, "</LIST>"); 411 } 412 413 414 /** 415 * This method traverses a <i>text:list-header</i> element. 416 * It contains one or more <i>text:p</i> elements. 417 * 418 * @param node A list header <code>Node</code>. 419 * 420 * @throws IOException If any I/O error occurs. 421 */ traverseListHeader(Node node)422 private void traverseListHeader(Node node) throws IOException { 423 424 Debug.log(Debug.TRACE, "<LIST-HEADER>"); 425 426 if (node.hasChildNodes()) { 427 428 NodeList nodeList = node.getChildNodes(); 429 int len = nodeList.getLength(); 430 431 for (int i = 0; i < len; i++) { 432 433 Node child = nodeList.item(i); 434 435 if (child.getNodeType() == Node.ELEMENT_NODE) { 436 437 String nodeName = child.getNodeName(); 438 439 if (nodeName.equals(TAG_PARAGRAPH)) { 440 441 traverseParagraph(child); 442 443 } else { 444 445 Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />"); 446 } 447 } 448 } 449 } 450 451 Debug.log(Debug.TRACE, "</LIST-HEADER>"); 452 } 453 454 455 /** 456 * This method will traverse a <i>text:list-item</i>. 457 * A list item may contain one or more of <i>text:p</i>, 458 * <i>text:h</i>, <i>text:section</i>, 459 * <i>text:ordered-list</i> and <i>text:unordered-list</i>. 460 * 461 * This method currently only implements grabbing <i>text:p</i>, 462 * <i>text:h</i>, <i>text:unordered-list</i> and 463 * <i>text:ordered-list</i>. 464 * 465 * @param node <code>Node</code> to traverse. 466 * 467 * @throws IOException If any I/O error occurs. 468 */ traverseListItem(Node node)469 private void traverseListItem(Node node) throws IOException { 470 471 Debug.log(Debug.TRACE, "<LIST-ITEM>"); 472 473 if (node.hasChildNodes()) { 474 475 NodeList nodeList = node.getChildNodes(); 476 int len = nodeList.getLength(); 477 478 for (int i = 0; i < len; i++) { 479 480 Node child = nodeList.item(i); 481 482 if (child.getNodeType() == Node.ELEMENT_NODE) { 483 484 String nodeName = child.getNodeName(); 485 486 if (nodeName.equals(TAG_PARAGRAPH)) { 487 488 traverseParagraph(child); 489 490 } else if (nodeName.equals(TAG_UNORDERED_LIST)) { 491 492 traverseList(child); 493 494 } else if (nodeName.equals(TAG_ORDERED_LIST)) { 495 496 traverseList(child); 497 498 } else { 499 500 Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); 501 } 502 } 503 } 504 } 505 506 Debug.log(Debug.TRACE, "</LIST-ITEM>"); 507 } 508 509 510 /** 511 * Look up a <code>Node</code> object's named attribute and return 512 * its value 513 * 514 * @param node The <code>Node</code>. 515 * @param name The attribute name. 516 * 517 * @return The value of the named attribute 518 */ findAttribute(Node node, String name)519 private String findAttribute(Node node, String name) { 520 NamedNodeMap attrNodes = node.getAttributes(); 521 if (attrNodes != null) { 522 int len = attrNodes.getLength(); 523 for (int i = 0; i < len; i++) { 524 Node attr = attrNodes.item(i); 525 if (attr.getNodeName().equals(name)) 526 return attr.getNodeValue(); 527 } 528 } 529 return null; 530 } 531 } 532 533