1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.merger.diff;
25 
26 import org.w3c.dom.Node;
27 
28 import org.openoffice.xmerge.converter.xml.OfficeConstants;
29 
30 import java.util.Vector;
31 import java.util.List;
32 
33 
34 /**
35  *  <p>This is a parser to return a character array for difference purpose.
36  *  It will use depth first search to traverse all the characters inside the
37  *  text <code>Node</code> under a given <code>Node</code> (most likely to be
38  *  a paragraph <code>Node</code>).</p>
39  *
40  *  <p>Note: Once the XML Tree is parsed, then the <code>Iterator</code> will be
41  *  a snap shot of  that tree.  That means even the tree is modified later, than
42  *  the cached paragraph <code>Node</code> list will not be updated accordingly.
43  *  For this reason and for performance reasons this <code>Iterator</code> does
44  *  not support any operation methods such as insert, remove or replace.  The
45  *  main purpose of this <code>Iterator</code> is to be used with difference,
46  *  not with merge.</p>
47  *
48  *  @author smak
49  */
50 public class CharacterParser {
51 
52     private TextNodeIterator textNodes;
53     private int currentPosition = 0;
54     private List nodeList_ = null;
55     private char[] charArray;
56 
57 
58     /**
59      *  Standard constructor.
60      *
61      *  @param  node  The initial root <code>Node</code>.
62      */
CharacterParser(Node node)63     public CharacterParser(Node node) {
64         textNodes = new TextNodeIterator(node);
65         nodeList_ = new Vector();
66 
67         parseNodes();
68     }
69 
70 
71     /**
72      *  Returns the <code>Node</code> pointer with the given character position.
73      *
74      *  @return  The <code>Node</code> pointer with the given character position.
75      */
getNodeList()76     public List getNodeList() {
77         // will go through the nodeList to find the corresponding node
78         return  nodeList_;
79     }
80 
81     /**
82      *  Returns the character array representation of the text.
83      *
84      *  @return  The character array representation of the text.
85      */
getCharArray()86     public char[] getCharArray() {
87         return charArray;
88     }
89 
parseNodes()90     private void parseNodes() {
91 
92         StringBuffer strBuf = new StringBuffer();
93 
94         /* create the character array by iterate the textnode iterator */
95         Node currentNode = (Node)(textNodes.start());
96         for (;
97              currentNode != null;
98              currentNode = (Node)(textNodes.next())) {
99 
100             // add the text value into the array
101             String textValue  = null;
102             String nodeName = currentNode.getNodeName();
103 
104             // TODO: Space node have a count attribute which is not handled!
105             if (currentNode.getNodeType() == Node.TEXT_NODE) {
106                 textValue = currentNode.getNodeValue();
107             } else if (nodeName.equals(OfficeConstants.TAG_SPACE)) {
108                 textValue = " ";
109             } else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) {
110                 textValue = "\t";
111             }
112 
113             if (textValue != null) {
114                 strBuf.append(textValue);
115                 addNewNodeEntry(textValue.length(), currentNode);
116             }
117         }
118 
119         charArray = strBuf.toString().toCharArray();
120     }
121 
122 
123     /**
124      *  Adds a new <code>Node</code> entry.
125      *
126      *  @param  textLen  The text length.
127      *  @param  node     The <code>Node</code>.
128      */
addNewNodeEntry(int textLen, Node node)129     private void addNewNodeEntry(int textLen, Node node) {
130 
131         TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition,
132                                       currentPosition + textLen - 1, node);
133         currentPosition     = currentPosition + textLen;
134 
135         nodeList_.add(nodeEntry);
136     }
137 }
138 
139