1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.merger.diff; 25 26 import org.w3c.dom.Node; 27 28 import org.openoffice.xmerge.converter.xml.OfficeConstants; 29 30 import java.util.Vector; 31 import java.util.List; 32 33 34 /** 35 * <p>This is a parser to return a character array for difference purpose. 36 * It will use depth first search to traverse all the characters inside the 37 * text <code>Node</code> under a given <code>Node</code> (most likely to be 38 * a paragraph <code>Node</code>).</p> 39 * 40 * <p>Note: Once the XML Tree is parsed, then the <code>Iterator</code> will be 41 * a snap shot of that tree. That means even the tree is modified later, than 42 * the cached paragraph <code>Node</code> list will not be updated accordingly. 43 * For this reason and for performance reasons this <code>Iterator</code> does 44 * not support any operation methods such as insert, remove or replace. The 45 * main purpose of this <code>Iterator</code> is to be used with difference, 46 * not with merge.</p> 47 * 48 * @author smak 49 */ 50 public class CharacterParser { 51 52 private TextNodeIterator textNodes; 53 private int currentPosition = 0; 54 private List nodeList_ = null; 55 private char[] charArray; 56 57 58 /** 59 * Standard constructor. 60 * 61 * @param node The initial root <code>Node</code>. 62 */ CharacterParser(Node node)63 public CharacterParser(Node node) { 64 textNodes = new TextNodeIterator(node); 65 nodeList_ = new Vector(); 66 67 parseNodes(); 68 } 69 70 71 /** 72 * Returns the <code>Node</code> pointer with the given character position. 73 * 74 * @return The <code>Node</code> pointer with the given character position. 75 */ getNodeList()76 public List getNodeList() { 77 // will go through the nodeList to find the corresponding node 78 return nodeList_; 79 } 80 81 /** 82 * Returns the character array representation of the text. 83 * 84 * @return The character array representation of the text. 85 */ getCharArray()86 public char[] getCharArray() { 87 return charArray; 88 } 89 parseNodes()90 private void parseNodes() { 91 92 StringBuffer strBuf = new StringBuffer(); 93 94 /* create the character array by iterate the textnode iterator */ 95 Node currentNode = (Node)(textNodes.start()); 96 for (; 97 currentNode != null; 98 currentNode = (Node)(textNodes.next())) { 99 100 // add the text value into the array 101 String textValue = null; 102 String nodeName = currentNode.getNodeName(); 103 104 // TODO: Space node have a count attribute which is not handled! 105 if (currentNode.getNodeType() == Node.TEXT_NODE) { 106 textValue = currentNode.getNodeValue(); 107 } else if (nodeName.equals(OfficeConstants.TAG_SPACE)) { 108 textValue = " "; 109 } else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) { 110 textValue = "\t"; 111 } 112 113 if (textValue != null) { 114 strBuf.append(textValue); 115 addNewNodeEntry(textValue.length(), currentNode); 116 } 117 } 118 119 charArray = strBuf.toString().toCharArray(); 120 } 121 122 123 /** 124 * Adds a new <code>Node</code> entry. 125 * 126 * @param textLen The text length. 127 * @param node The <code>Node</code>. 128 */ addNewNodeEntry(int textLen, Node node)129 private void addNewNodeEntry(int textLen, Node node) { 130 131 TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition, 132 currentPosition + textLen - 1, node); 133 currentPosition = currentPosition + textLen; 134 135 nodeList_.add(nodeEntry); 136 } 137 } 138 139