1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxw.pocketword; 25 26 import org.openoffice.xmerge.Document; 27 import org.openoffice.xmerge.converter.xml.ParaStyle; 28 import org.openoffice.xmerge.converter.xml.TextStyle; 29 30 import java.io.InputStream; 31 import java.io.IOException; 32 import java.io.OutputStream; 33 import java.io.ByteArrayOutputStream; 34 import java.io.DataOutputStream; 35 36 import java.util.Enumeration; 37 import java.util.Vector; 38 39 40 /** 41 * <p>Class representing a Pocket Word Document.</p> 42 * 43 * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents 44 * and to read existing data to allow for conversion to OpenOffice.org XML Writer 45 * format.</p> 46 * 47 * @author Mark Murnane 48 * @version 1.1 49 */ 50 public class PocketWordDocument implements Document, PocketWordConstants { 51 private String docName; 52 53 private byte[] preamble; 54 private Vector fonts; 55 private DocumentDescriptor descriptor; 56 private Vector paragraphs; 57 58 private ParaStyle pStyle; 59 private Paragraph currentPara; 60 61 /* 62 * The trailer currently appears to be constant, but if its found to 63 * have a variable component, then this initialisation should be moved 64 * to an initTrailer() method. 65 * 66 * Padding is sometimes needed before the trailer to ensure the file 67 * ends on a 4-byte boundary, but this is handled in write(). 68 */ 69 private static final byte[] trailer = new byte[] { (byte)0x82, 0x00, 70 0x09, 0x00, 71 0x03, 0x00, 72 (byte)0x82, 0x00, 73 0x00, 0x00, 74 0x00, 0x00, 75 0x00, 0x00, 76 0x00, 0x00, 77 0x00, 0x00 }; 78 79 80 /** 81 * <p>Constructs a new Pocket Word Document.</p> 82 * 83 * <p>This new document does notcontain any information. Document data must 84 * either be added using appropriate methods, or an existing file can be 85 * {@link #read(InputStream) read} from an <code>InputStream</code>.</p> 86 * 87 * @param name The name of the <code>PocketWordDocument</code>. 88 */ PocketWordDocument(String name)89 public PocketWordDocument(String name) { 90 91 docName = trimDocumentName(name); 92 93 preamble = new byte[52]; 94 fonts = new Vector(0, 1); 95 descriptor = new DocumentDescriptor(); 96 paragraphs = new Vector(0, 1); 97 } 98 99 100 /** 101 * <p>This method reads <code>byte</code> data from the InputStream and 102 * extracts font and paragraph data from the file.</p> 103 * 104 * @param docData InputStream containing a Pocket Word data file. 105 * 106 * @throws IOException In case of any I/O errors. 107 */ read(InputStream docData)108 public void read(InputStream docData) throws IOException { 109 110 if (docData == null) { 111 throw new IOException ("No input stream to convert"); 112 } 113 114 // The preamble may become important for font declarations. 115 int readValue = docData.read(preamble); 116 // #i33702# check for an empty InputStream. 117 if(readValue == -1) { 118 System.err.println("Error:invalid input stream"); 119 return; 120 } 121 122 byte[] font = new byte[80]; 123 int numfonts = 0; 124 do { 125 docData.read(font); 126 127 String name = new String(font, 0, 64, "UTF-16LE"); 128 fonts.add(name.trim()); 129 130 } while (!(font[76] == 5 && font[77] == 0 131 && font[78] == 1 && font[79] == 0)); 132 133 /* 134 * TODO: The document descriptor data that follows the fonts ends with 135 * a variable section containing data for each of the paragraphs. 136 * It may be possible to use this information to calculate staring 137 * positions for each paragraph rather than iterating through the 138 * entire byte stream. 139 */ 140 141 int value; 142 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 143 while ((value = docData.read()) != -1) { 144 bos.write(value); 145 } 146 147 148 byte[] contentData = bos.toByteArray(); 149 int start = 0, end = 0; 150 boolean sawMarker = false; 151 152 for (int i = 0; i < contentData.length; i += 4) { 153 if (contentData[i + 2] == (byte)0xFF 154 && contentData[i + 3] == (byte)0xFF && !sawMarker) { 155 start = i - 8; 156 sawMarker = true; 157 continue; 158 } 159 160 if (contentData[i + 2] == (byte)0xFF 161 && contentData[i + 3] == (byte)0xFF && sawMarker) { 162 end = i - 8; 163 ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); 164 paragraph.write(contentData, start, end - start); 165 paragraphs.add(new Paragraph(paragraph.toByteArray())); 166 167 // Reset the markers 168 sawMarker = false; 169 i -= 4; // Skip back 170 } 171 172 } 173 174 /* 175 * Special case, the last paragraph 176 * If we got here, and the marker is set then we saw the start of the 177 * last paragraph, but no following paragraph 178 */ 179 ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); 180 if (contentData[contentData.length - 19] == 0) { 181 paragraph.write(contentData, start, contentData.length - start - 20); 182 } 183 else { 184 paragraph.write(contentData, start, contentData.length - start - 18); 185 } 186 paragraphs.add(new Paragraph(paragraph.toByteArray())); 187 } 188 189 190 /* 191 * Utility method to make sure the document name is stripped of any file 192 * extensions before use. 193 */ trimDocumentName(String name)194 private String trimDocumentName(String name) { 195 String temp = name.toLowerCase(); 196 197 if (temp.endsWith(FILE_EXTENSION)) { 198 // strip the extension 199 int nlen = name.length(); 200 int endIndex = nlen - FILE_EXTENSION.length(); 201 name = name.substring(0,endIndex); 202 } 203 204 return name; 205 } 206 207 208 /** 209 * <p>Method to provide access to all of the <code>Paragraph</code> objects 210 * in the <code>Document</code>.</p> 211 * 212 * @return <code>Enumeration</code> over the paragraphs in the document. 213 */ getParagraphEnumeration()214 public Enumeration getParagraphEnumeration() { 215 return paragraphs.elements(); 216 } 217 218 219 /** 220 * <p>Returns the <code>Document</code> name with no file extension.</p> 221 * 222 * @return The <code>Document</code> name with no file extension. 223 */ getName()224 public String getName() { 225 return docName; 226 } 227 228 229 /** 230 * <p>Returns the <code>Document</code> name with file extension.</p> 231 * 232 * @return The <code>Document</code> name with file extension. 233 */ getFileName()234 public String getFileName() { 235 return new String(docName + FILE_EXTENSION); 236 } 237 238 239 /** 240 * <p>Writes out the <code>Document</code> content to the specified 241 * <code>OutputStream</code>.</p> 242 * 243 * <p>This method may not be thread-safe. 244 * Implementations may or may not synchronize this 245 * method. User code (i.e. caller) must make sure that 246 * calls to this method are thread-safe.</p> 247 * 248 * @param os <code>OutputStream</code> to write out the 249 * <code>Document</code> content. 250 * 251 * @throws IOException If any I/O error occurs. 252 */ write(OutputStream os)253 public void write(OutputStream os) throws IOException { 254 DataOutputStream dos = new DataOutputStream(os); 255 256 initPreamble(); 257 dos.write(preamble); 258 259 loadFonts(); 260 for (int i = 0; i < fonts.size(); i++ ) { 261 ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i); 262 dos.write(fontData.toByteArray()); 263 } 264 265 266 for (int i = 0; i < paragraphs.size(); i++) { 267 Paragraph para = (Paragraph)paragraphs.elementAt(i); 268 descriptor.addParagraph((short)para.getTextLength(), para.getLines()); 269 } 270 dos.write(descriptor.getDescriptor()); 271 272 for (int i = 0; i < paragraphs.size(); i++ ) { 273 Paragraph para = (Paragraph)paragraphs.elementAt(i); 274 275 // Last paragraph has some extra data 276 if (i + 1 == paragraphs.size()) { 277 para.setLastParagraph(true); 278 } 279 dos.write(para.getParagraphData()); 280 } 281 282 283 /* 284 * Before we write out the trailer, we need to make sure that it will 285 * lead to the file ending on a 4 byte boundary. 286 */ 287 if (dos.size() % 4 == 0) { 288 dos.write((byte)0x00); 289 dos.write((byte)0x00); 290 } 291 292 dos.write(trailer); 293 294 dos.flush(); 295 dos.close(); 296 } 297 298 299 /** 300 * <p>This method adds a new paragraph element to the document. No string 301 * data is added to the paragraph.</p> 302 * 303 * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and 304 * is used as the target for all subsequent calls to addParagraphData().</p> 305 * 306 * @param style Paragraph Style object describing the formatting for 307 * the new paragraph. Can be null. 308 * @param listElement true if this paragraph is to be bulleted; 309 * false otherwise. 310 */ addParagraph(ParaStyle style, boolean listElement)311 public void addParagraph(ParaStyle style, boolean listElement) { 312 /* For the moment, only support basic text entry in a single paragraph */ 313 Paragraph para = new Paragraph(style); 314 315 paragraphs.add(para); 316 317 pStyle = style; 318 currentPara = para; 319 320 if (listElement) { 321 para.setBullets(true); 322 } 323 } 324 325 326 /** 327 * <p>This method adds text to the current paragraph.</p> 328 * 329 * <p>If no paragraphs exist within the document, it creates one.</p> 330 * 331 * @param data The string data for this segment. 332 * @param style Text Style object describing the formatting of this 333 * segment. Can be null. 334 */ addParagraphData(String data, TextStyle style)335 public void addParagraphData(String data, TextStyle style) { 336 if (currentPara == null) { 337 addParagraph(null, false); 338 } 339 currentPara.addTextSegment(data, style); 340 } 341 342 343 /* 344 * Preamble is the portion before font specification which never 345 * seems to change from one file, or one saved version, to the next. 346 * 347 * Bytes 18h and 19h seem to contain the number of fonts and should 348 * be modified when all of the fonts have been specified. 349 * These bytes are the first two on the fourth line below. 350 */ initPreamble()351 private void initPreamble() { 352 preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00, 353 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 354 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font?? 355 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts 356 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 358 0x00, 0x00, 0x00, 0x00 }; 359 } 360 361 362 /* 363 * This method writes the minimum font data that is used by the converter. 364 * Currently, all documents convert to 10 point Courier New. Tahoma is 365 * always mentioned in Pocket Word files, however, even if it is not used. 366 * 367 * TODO: Rewrite to allow for multiple fonts once font support issues 368 * have been resolved. 369 */ loadFonts()370 private void loadFonts() { 371 ByteArrayOutputStream fontData = new ByteArrayOutputStream(); 372 373 try { 374 fontData.write(new String("Tahoma").getBytes("UTF-16LE")); 375 fontData.write(new byte[52]); // Rest of font name? 376 fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } ); 377 fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } ); 378 fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); 379 fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); 380 381 fonts.add(fontData); 382 383 fontData = new ByteArrayOutputStream(); 384 385 fontData.write(new String("Courier New").getBytes("UTF-16LE")); 386 fontData.write(new byte[42]); 387 fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } ); 388 fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } ); 389 fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } ); 390 391 // Next part indicates that this is the last font 392 fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } ); 393 394 fonts.add(fontData); 395 } 396 catch (IOException ioe) { 397 // Shouldn't happen as this is a memory based stream 398 } 399 } 400 } 401