1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package org.openoffice.xmerge.converter.xml.sxw.pocketword;
25 
26 import org.openoffice.xmerge.Document;
27 import org.openoffice.xmerge.converter.xml.ParaStyle;
28 import org.openoffice.xmerge.converter.xml.TextStyle;
29 
30 import java.io.InputStream;
31 import java.io.IOException;
32 import java.io.OutputStream;
33 import java.io.ByteArrayOutputStream;
34 import java.io.DataOutputStream;
35 
36 import java.util.Enumeration;
37 import java.util.Vector;
38 
39 
40 /**
41  * <p>Class representing a Pocket Word Document.</p>
42  *
43  * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
44  *    and to read existing data to allow for conversion to OpenOffice.org XML Writer
45  *    format.</p>
46  *
47  * @author  Mark Murnane
48  * @version 1.1
49  */
50 public class PocketWordDocument implements Document, PocketWordConstants {
51     private String      docName;
52 
53     private byte[] preamble;
54     private Vector fonts;
55     private DocumentDescriptor descriptor;
56     private Vector paragraphs;
57 
58     private ParaStyle   pStyle;
59     private Paragraph   currentPara;
60 
61     /*
62      * The trailer currently appears to be constant, but if its found to
63      * have a variable component, then this initialisation should be moved
64      * to an initTrailer() method.
65      *
66      * Padding is sometimes needed before the trailer to ensure the file
67      * ends on a 4-byte boundary, but this is handled in write().
68      */
69     private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
70                                                              0x09, 0x00,
71                                                              0x03, 0x00,
72                                                              (byte)0x82, 0x00,
73                                                              0x00, 0x00,
74                                                              0x00, 0x00,
75                                                              0x00, 0x00,
76                                                              0x00, 0x00,
77                                                              0x00, 0x00 };
78 
79 
80     /**
81      * <p>Constructs a new Pocket Word Document.</p>
82      *
83      * <p>This new document does notcontain any information.  Document data must
84      *    either be added using appropriate methods, or an existing file can be
85      *    {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
86      *
87      * @param   name    The name of the <code>PocketWordDocument</code>.
88      */
PocketWordDocument(String name)89     public PocketWordDocument(String name) {
90 
91         docName = trimDocumentName(name);
92 
93         preamble   = new byte[52];
94         fonts      = new Vector(0, 1);
95         descriptor = new DocumentDescriptor();
96         paragraphs = new Vector(0, 1);
97     }
98 
99 
100     /**
101      * <p>This method reads <code>byte</code> data from the InputStream and
102      *    extracts font and paragraph data from the file.</p>
103      *
104      * @param   docData      InputStream containing a Pocket Word data file.
105      *
106      * @throws  IOException     In case of any I/O errors.
107      */
read(InputStream docData)108     public void read(InputStream docData) throws IOException {
109 
110         if (docData == null) {
111             throw new IOException ("No input stream to convert");
112         }
113 
114         // The preamble may become important for font declarations.
115         int readValue = docData.read(preamble);
116         // #i33702# check for an empty InputStream.
117         if(readValue == -1) {
118             System.err.println("Error:invalid input stream");
119             return;
120         }
121 
122         byte[] font = new byte[80];
123         int numfonts = 0;
124         do {
125             docData.read(font);
126 
127             String name = new String(font, 0, 64, "UTF-16LE");
128             fonts.add(name.trim());
129 
130         } while (!(font[76] == 5 && font[77] == 0
131                             && font[78] == 1 && font[79] == 0));
132 
133         /*
134          * TODO:  The document descriptor data that follows the fonts ends with
135          *        a variable section containing data for each of the paragraphs.
136          *        It may be possible to use this information to calculate staring
137          *        positions for each paragraph rather than iterating through the
138          *        entire byte stream.
139          */
140 
141         int value;
142         ByteArrayOutputStream bos = new ByteArrayOutputStream();
143         while ((value = docData.read()) != -1) {
144             bos.write(value);
145         }
146 
147 
148         byte[] contentData = bos.toByteArray();
149         int start = 0, end = 0;
150         boolean sawMarker = false;
151 
152         for (int i = 0; i < contentData.length; i += 4) {
153             if (contentData[i  + 2] == (byte)0xFF
154                         && contentData[i + 3] == (byte)0xFF && !sawMarker)  {
155                 start = i - 8;
156                 sawMarker = true;
157                 continue;
158             }
159 
160             if (contentData[i + 2] == (byte)0xFF
161                         && contentData[i + 3] == (byte)0xFF && sawMarker) {
162                 end = i - 8;
163                 ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
164                 paragraph.write(contentData, start, end - start);
165                 paragraphs.add(new Paragraph(paragraph.toByteArray()));
166 
167                 // Reset the markers
168                 sawMarker = false;
169                 i -= 4;  // Skip back
170             }
171 
172         }
173 
174         /*
175          * Special case, the last paragraph
176          * If we got here, and the marker is set then we saw the start of the
177          * last paragraph, but no following paragraph
178          */
179         ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
180         if (contentData[contentData.length - 19] == 0) {
181             paragraph.write(contentData, start, contentData.length - start - 20);
182         }
183         else {
184             paragraph.write(contentData, start, contentData.length - start - 18);
185         }
186         paragraphs.add(new Paragraph(paragraph.toByteArray()));
187     }
188 
189 
190     /*
191      * Utility method to make sure the document name is stripped of any file
192      * extensions before use.
193      */
trimDocumentName(String name)194     private String trimDocumentName(String name) {
195         String temp = name.toLowerCase();
196 
197         if (temp.endsWith(FILE_EXTENSION)) {
198             // strip the extension
199             int nlen = name.length();
200             int endIndex = nlen - FILE_EXTENSION.length();
201             name = name.substring(0,endIndex);
202         }
203 
204         return name;
205     }
206 
207 
208     /**
209      * <p>Method to provide access to all of the <code>Paragraph</code> objects
210      *    in the <code>Document</code>.</p>
211      *
212      * @return <code>Enumeration</code> over the paragraphs in the document.
213      */
getParagraphEnumeration()214     public Enumeration getParagraphEnumeration() {
215         return paragraphs.elements();
216     }
217 
218 
219     /**
220      * <p>Returns the <code>Document</code> name with no file extension.</p>
221      *
222      * @return  The <code>Document</code> name with no file extension.
223      */
getName()224     public String getName() {
225         return docName;
226     }
227 
228 
229     /**
230      * <p>Returns the <code>Document</code> name with file extension.</p>
231      *
232      * @return  The <code>Document</code> name with file extension.
233      */
getFileName()234     public String getFileName() {
235         return new String(docName + FILE_EXTENSION);
236     }
237 
238 
239     /**
240      * <p>Writes out the <code>Document</code> content to the specified
241      * <code>OutputStream</code>.</p>
242      *
243      * <p>This method may not be thread-safe.
244      * Implementations may or may not synchronize this
245      * method.  User code (i.e. caller) must make sure that
246      * calls to this method are thread-safe.</p>
247      *
248      * @param  os  <code>OutputStream</code> to write out the
249      *             <code>Document</code> content.
250      *
251      * @throws  IOException  If any I/O error occurs.
252      */
write(OutputStream os)253     public void write(OutputStream os) throws IOException {
254         DataOutputStream dos = new DataOutputStream(os);
255 
256         initPreamble();
257         dos.write(preamble);
258 
259         loadFonts();
260         for (int i = 0; i < fonts.size(); i++ ) {
261             ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
262             dos.write(fontData.toByteArray());
263         }
264 
265 
266         for (int i = 0; i < paragraphs.size(); i++) {
267             Paragraph para = (Paragraph)paragraphs.elementAt(i);
268             descriptor.addParagraph((short)para.getTextLength(), para.getLines());
269         }
270         dos.write(descriptor.getDescriptor());
271 
272         for (int i = 0; i < paragraphs.size(); i++ ) {
273             Paragraph para = (Paragraph)paragraphs.elementAt(i);
274 
275             // Last paragraph has some extra data
276             if (i + 1 == paragraphs.size()) {
277                 para.setLastParagraph(true);
278             }
279             dos.write(para.getParagraphData());
280         }
281 
282 
283         /*
284          * Before we write out the trailer, we need to make sure that it will
285          * lead to the file ending on a 4 byte boundary.
286          */
287         if (dos.size() % 4 == 0) {
288             dos.write((byte)0x00);
289             dos.write((byte)0x00);
290         }
291 
292         dos.write(trailer);
293 
294         dos.flush();
295         dos.close();
296     }
297 
298 
299     /**
300      * <p>This method adds a new paragraph element to the document.  No string
301      *    data is added to the paragraph.</p>
302      *
303      * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
304      *    is used as the target for all subsequent calls to addParagraphData().</p>
305      *
306      * @param   style       Paragraph Style object describing the formatting for
307      *                      the new paragraph.  Can be null.
308      * @param   listElement true if this paragraph is to be bulleted;
309      *                      false otherwise.
310      */
addParagraph(ParaStyle style, boolean listElement)311     public void addParagraph(ParaStyle style, boolean listElement)  {
312         /* For the moment, only support basic text entry in a single paragraph */
313         Paragraph para = new Paragraph(style);
314 
315         paragraphs.add(para);
316 
317         pStyle = style;
318         currentPara = para;
319 
320         if (listElement) {
321             para.setBullets(true);
322         }
323     }
324 
325 
326     /**
327      * <p>This method adds text to the current paragraph.</p>
328      *
329      * <p>If no paragraphs exist within the document, it creates one.</p>
330      *
331      * @param   data        The string data for this segment.
332      * @param   style       Text Style object describing the formatting of this
333      *                      segment.  Can be null.
334      */
addParagraphData(String data, TextStyle style)335     public void addParagraphData(String data, TextStyle style) {
336         if (currentPara == null) {
337             addParagraph(null, false);
338         }
339         currentPara.addTextSegment(data, style);
340     }
341 
342 
343     /*
344      * Preamble is the portion before font specification which never
345      * seems to change from one file, or one saved version, to the next.
346      *
347      * Bytes 18h and 19h seem to contain the number of fonts and should
348      * be modified when all of the fonts have been specified.
349      * These bytes are the first two on the fourth line below.
350      */
initPreamble()351     private void initPreamble() {
352          preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
353                                  0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
354                                  0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00,  // Bytes 3-4 Font??
355                                  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // Bytes 1-2  # Fonts
356                                  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
357                                  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
358                                  0x00, 0x00, 0x00, 0x00 };
359     }
360 
361 
362     /*
363      * This method writes the minimum font data that is used by the converter.
364      * Currently, all documents convert to 10 point Courier New.  Tahoma is
365      * always mentioned in Pocket Word files, however, even if it is not used.
366      *
367      * TODO:    Rewrite to allow for multiple fonts once font support issues
368      *          have been resolved.
369      */
loadFonts()370     private void loadFonts() {
371         ByteArrayOutputStream fontData = new ByteArrayOutputStream();
372 
373         try {
374             fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
375             fontData.write(new byte[52]);       // Rest of font name?
376             fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
377             fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
378             fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
379             fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
380 
381             fonts.add(fontData);
382 
383             fontData = new ByteArrayOutputStream();
384 
385             fontData.write(new String("Courier New").getBytes("UTF-16LE"));
386             fontData.write(new byte[42]);
387             fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
388             fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
389             fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );
390 
391             // Next part indicates that this is the last font
392             fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );
393 
394             fonts.add(fontData);
395         }
396         catch (IOException ioe) {
397             // Shouldn't happen as this is a memory based stream
398         }
399     }
400 }
401