1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
25 
26 import java.io.ByteArrayInputStream;
27 import java.io.DataInputStream;
28 import java.io.IOException;
29 
30 import org.openoffice.xmerge.converter.palm.Record;
31 import org.openoffice.xmerge.util.Resources;
32 import org.openoffice.xmerge.util.Debug;
33 
34 /**
35  *  This class is used by {@link DocumentDeserializerImpl}
36  *  to decode the AportisDoc format.  It currently decodes
37  *  the text content into a single <code>String</code> object.
38  *
39  *  @author   Herbie Ong
40  */
41 final class DocDecoder implements DocConstants {
42 
43     /** For decoding purposes. */
44     private final static int COUNT_BITS = 3;
45 
46     /** Resources object for I18N. */
47     private Resources res = null;
48 
49 
50     /**
51      *  Default constructor creates a header and a text buffer
52      *  for holding all the text in the AportisDoc database.
53      */
DocDecoder()54     DocDecoder() {
55         res = Resources.getInstance();
56     }
57 
58 
59     /**
60      *  Decode the text records into a single <code>String</code>
61      *  of text content.
62      *
63      *  @param  recs  <code>Record</code> array holding AportisDoc
64      *                  contents.
65      *
66      *  @throws  IOException  If any I/O error occurs.
67      */
parseRecords(Record[] recs)68     String parseRecords(Record[] recs) throws IOException {
69 
70         // read the header record
71         HeaderInfo header = readHeader(recs[0].getBytes());
72 
73         dumpHeader(header);
74 
75         // store all the characters in textBuffer
76         StringBuffer textBuffer = new StringBuffer(header.textLen);
77 
78         switch (header.version) {
79 
80             case COMPRESSED:
81                 for (int i = 1; i <= header.textRecordCount; i++) {
82 
83                     byte[] bytes = decompress(recs[i].getBytes(),
84                                               header.textRecordSize);
85                     log("processing " + bytes.length + " bytes");
86                     String str = new String(bytes, ENCODING);
87                     textBuffer.append(str);
88                 }
89 
90                 break;
91 
92             case UNCOMPRESSED:
93                 for (int i = 1; i <= header.textRecordCount; i++) {
94 
95                     byte[] bytes = recs[i].getBytes();
96                     log("processing " + bytes.length + " bytes");
97                     String str = new String(bytes, ENCODING);
98                     textBuffer.append(str);
99                 }
100 
101                 break;
102 
103             default:
104                 throw new IOException(res.getString("UNKNOWN_DOC_VERSION"));
105 
106         }
107 
108         return textBuffer.toString();
109     }
110 
111 
112     /**
113      *  <p>Decompress the <code>byte</code> array.</p>
114      *
115      *  <p>The resulting uncompressed <code>byte</code> array should
116      *  be within <code>textRecordSize</code> length, definitely
117      *  within twice the size it claims, else treat it as a problem
118      *  with the encoding of that PDB and throw
119      *  <code>IOException</code>.</p>
120      *
121      *  @param  cBytes           Compressed <code>byte</code> array.
122      *  @param  textRecordSize  Size of uncompressed
123      *                          <code>byte</code> array.
124      *
125      *  @throws  IOException  If <code>textRecordSize</code> &lt;
126      *                        <code>cBytes.length</code>.
127      */
decompress(byte[] cBytes, int textRecordSize)128     private byte[] decompress(byte[] cBytes, int textRecordSize)
129         throws IOException {
130 
131         // create byte array for storing uncompressed bytes
132         // it should be within textRecordSize range, definitely
133         // within twice of textRecordSize!  if not, then
134         // an ArrayIndexOutOfBoundsException will get thrown,
135         // and it should be converted into an IOException, and
136         // treat it as a conversion error.
137         byte[] uBytes = new byte[textRecordSize*2];
138 
139         int up = 0;
140         int cp = 0;
141 
142         try {
143 
144             while (cp < cBytes.length) {
145 
146                 int c = cBytes[cp++] & 0xff;
147 
148                 // codes 1...8 mean copy that many bytes
149                 if (c > 0 && c < 9) {
150 
151                     while (c-- > 0)
152                         uBytes[up++] = cBytes[cp++];
153                 }
154 
155                 // codes 0, 9...0x7F represent themselves
156                 else if (c < 0x80) {
157                     uBytes[up++] = (byte) c;
158                 }
159 
160                 // codes 0xC0...0xFF represent "space + ascii char"
161                 else if (c >= 0xC0) {
162                     uBytes[up++] = (byte) ' ';
163                     uBytes[up++] = (byte) (c ^ 0x80);
164                 }
165 
166                 // codes 0x80...0xBf represent sequences
167                 else {
168                     c <<= 8;
169                     c += cBytes[cp++] & 0xff;
170                     int m = (c & 0x3fff) >> COUNT_BITS;
171                     int n = c & ((1 << COUNT_BITS) - 1);
172                     n += COUNT_BITS;
173                     while (n-- > 0) {
174                         uBytes[up] = uBytes[up - m];
175                         up++;
176                     }
177                 }
178             }
179 
180         } catch (ArrayIndexOutOfBoundsException e) {
181 
182             throw new IOException(
183                 res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED"));
184         }
185 
186         // note that ubytes may be larger that the amount of
187         // uncompressed bytes, so trim it to another byte array
188         // with the exact size.
189         byte[] textBytes = new byte[up];
190         System.arraycopy(uBytes, 0, textBytes, 0, up);
191 
192         return textBytes;
193     }
194 
195 
196     /**
197      *  Read the header <code>byte</code> array.
198      *
199      *  @param  bytes  <code>byte</code> array containing header
200      *                 record data.
201      *
202      *  @return  <code>HeaderInfo</code> object.
203      *
204      *  @throws  IOException  If any I/O error occurs.
205      */
readHeader(byte[] bytes)206     private HeaderInfo readHeader(byte[] bytes) throws IOException {
207 
208         HeaderInfo header = new HeaderInfo();
209 
210         ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
211         DataInputStream dis = new DataInputStream(bis);
212 
213         // Normally the first 2 bytes comprised of the version
214         // which should either be COMPRESSED or UNCOMPRESSED
215         // SmartDoc/Quickword would add a 0x01 to the first
216         // byte, thus their version would be 0x0101 for UNCOMPRESSED
217         // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of
218         // 0x0002.
219 
220         dis.readByte();
221         header.version = dis.readByte();
222 
223         // read extra 2 unused bytes
224         dis.readShort();
225 
226         // Read the text length, this should be unsigned 4 bytes.
227         // We could store the read value into a long, but then
228         // our current buffer limit is the max positive of an int.
229         // That is a large enough limit, thus we shall stay with
230         // storing the value in an int.  If it exceeds, then
231         // an IOException should be thrown.
232         header.textLen = dis.readInt();
233         if (header.textLen < 0) {
234             throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED"));
235         }
236 
237         // read the number of records - unsigned 2 bytes
238         header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff;
239 
240         // read the record size - unsigned 2 bytes
241         header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff;
242 
243         // read extra 4 unused bytes
244         dis.readInt();
245 
246         return header;
247     }
248 
249 
250     /**
251      *  Prints out header info into log. Used for debugging purposes only.
252      *
253      *  @param  header  <code>HeaderInfo</code> structure.
254      */
dumpHeader(HeaderInfo header)255     private void dumpHeader(HeaderInfo header) {
256 
257         log("<DOC_INFO ");
258         log("version=\"" + header.version + "\" ");
259         log("text-length=\"" + header.textLen + "\" ");
260         log("number-of-records=\"" + header.textRecordCount + "\" ");
261         log("record-size=\"" + header.textRecordSize  + "\" />");
262     }
263 
264 
265     /**
266      *  Sends message to the log object.
267      *
268      *  @param  str  Debug string message.
269      */
log(String str)270     private void log(String str) {
271         Debug.log(Debug.TRACE, str);
272     }
273 
274 
275     /**
276      *  Inner class to store AportisDoc header information.
277      */
278     private class HeaderInfo {
279 
280         /** length of text section */
281         int textLen = 0;
282 
283         /** number of text records */
284         int textRecordCount = 0;
285 
286         /**
287          *  size of a text record.  This is normally the same as
288          *  TEXT_RECORD_SIZE, but some applications may modify this.
289          */
290         int textRecordSize = 0;
291 
292         /** compression type */
293         int version = 0;
294     }
295 }
296 
297