1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula; 25 26 import java.io.*; 27 import java.util.Vector; 28 import java.util.Enumeration; 29 30 import org.openoffice.xmerge.util.Debug; 31 import org.openoffice.xmerge.util.EndianConverter; 32 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.DefinedName; 33 import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook; 34 35 /** 36 * The TokenDecoder decodes a byte[] to an equivalent <code>String</code>. The only 37 * public method apart from the default constructor is the getTokenVector method. 38 * This method takes an entire formula as a pexcel byte[] and decodes it into 39 * a series of <code>Token</code>s. It adds these to a <code>Vector</code> which 40 * is returned once all the tokens have been decoded. The Decoder supports 41 * the following tokens.<br><br> 42 * 43 * Operands Floating point's, Cell references (absolute and relative), 44 * cell ranges<br> 45 * Operators +,-,*,/,<,>.<=,>=,<><br> 46 * Functions All pexcel fixed and varaible argument functions 47 * 48 */ 49 public class TokenDecoder { 50 51 private TokenFactory tf; 52 private FunctionLookup fl; 53 private OperatorLookup operatorLookup; 54 private OperandLookup operandLookup; 55 private Workbook wb; 56 57 /** 58 * Default Constructor initializes the <code>TokenFactory</code> for generating 59 * <code>Token</code> and the <code>SymbolLookup</code> for generating 60 * Strings from hex values. 61 */ TokenDecoder()62 public TokenDecoder() { 63 tf = new TokenFactory(); 64 fl = new FunctionLookup(); 65 operatorLookup = new OperatorLookup(); 66 operandLookup = new OperandLookup(); 67 } 68 69 /** 70 * Sets global workbook data needed for defined names 71 */ setWorkbook(Workbook wb)72 public void setWorkbook(Workbook wb) { 73 74 Debug.log(Debug.TRACE, "TokenDecoder : setWorkbook"); 75 this.wb = wb; 76 } 77 78 /** 79 * Returns a <code>Vector</code> of <code>Token</code> decoded from a 80 * byte[]. The byte[] is first converted to a 81 * <code>ByteArrayInputStream</code> as this is the easiest way of reading 82 * bytes. 83 * 84 * @param formula A Pocket Excel Formula byte[] 85 * @return A <code>Vector</code> of deoded <code>Token</code> 86 */ getTokenVector(byte[] formula)87 public Vector getTokenVector(byte[] formula) { 88 89 Vector v = new Vector(); 90 91 ByteArrayInputStream bis = new ByteArrayInputStream(formula); 92 int b = 0 ; 93 Token t; 94 95 while ((b = bis.read())!=-1) 96 { 97 98 99 switch (b) { 100 101 case TokenConstants.TAREA3D: 102 Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: "); 103 v.add(read3DCellAreaRefToken(bis)); 104 Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: " + v.lastElement()); 105 break; 106 case TokenConstants.TREF3D: 107 Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: "); 108 v.add(read3DCellRefToken(bis)); 109 Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: " + v.lastElement()); 110 break; 111 case TokenConstants.TREF : 112 v.add(readCellRefToken(bis)); 113 Debug.log(Debug.TRACE, "Decoded Cell Reference: " + v.lastElement()); 114 break; 115 case TokenConstants.TAREA : 116 v.add(readCellAreaRefToken(bis)); 117 Debug.log(Debug.TRACE, "Decoded Cell Area Reference: " + v.lastElement()); 118 break; 119 case TokenConstants.TNUM : 120 v.add(readNumToken(bis)); 121 Debug.log(Debug.TRACE, "Decoded number : " + v.lastElement()); 122 break; 123 case TokenConstants.TFUNCVAR : 124 v.add(readFunctionVarToken(bis)); 125 Debug.log(Debug.TRACE, "Decoded variable argument function: " + v.lastElement()); 126 break; 127 case TokenConstants.TFUNC : 128 v.add(readFunctionToken(bis)); 129 Debug.log(Debug.TRACE, "Decoded function: " + v.lastElement()); 130 break; 131 case TokenConstants.TSTRING : 132 v.add(readStringToken(bis)); 133 Debug.log(Debug.TRACE, "Decoded string: " + v.lastElement()); 134 break; 135 case TokenConstants.TNAME : 136 v.add(readNameToken(bis)); 137 Debug.log(Debug.TRACE, "Decoded defined name: " + v.lastElement()); 138 break; 139 case TokenConstants.TUPLUS: 140 case TokenConstants.TUMINUS: 141 case TokenConstants.TPERCENT: 142 v.add(readOperatorToken(b, 1)); 143 Debug.log(Debug.TRACE, "Decoded Unary operator : " + v.lastElement()); 144 break; 145 case TokenConstants.TADD : 146 case TokenConstants.TSUB : 147 case TokenConstants.TMUL : 148 case TokenConstants.TDIV : 149 case TokenConstants.TLESS : 150 case TokenConstants.TLESSEQUALS : 151 case TokenConstants.TEQUALS : 152 case TokenConstants.TGTEQUALS : 153 case TokenConstants.TGREATER : 154 case TokenConstants.TNEQUALS : 155 v.add(readOperatorToken(b, 2)); 156 Debug.log(Debug.TRACE, "Decoded Binary operator : " + v.lastElement()); 157 break; 158 159 default : 160 Debug.log(Debug.TRACE, "Unrecognized byte : " + b); 161 } 162 } 163 return v; 164 } 165 166 /** 167 * Converts a zero based integer to a char (eg. a=0, b=1). 168 * It assumes the integer is less than 26. 169 * 170 * @param i A 0 based index 171 * @return The equivalent character 172 */ int2Char(int i)173 private char int2Char(int i) { 174 return (char) ('A' + i); 175 } 176 177 /** 178 * Reads a Cell Reference token from the <code>ByteArrayInputStream</code> 179 * 180 * @param bis The <code>ByteArrayInputStream</code> from which we read the 181 * bytes. 182 * @return The decoded String <code>Token</code> 183 */ readStringToken(ByteArrayInputStream bis)184 private Token readStringToken(ByteArrayInputStream bis) { 185 186 int len = ((int)bis.read())*2; 187 int options = (int)bis.read(); 188 Debug.log(Debug.TRACE,"String length is " + len + " and Options Flag is " + options); 189 byte [] stringBytes = new byte[len]; 190 int numRead =0; 191 if ((numRead = bis.read(stringBytes, 0, len)) != len) { 192 Debug.log(Debug.TRACE,"Expected " + len + " bytes. Could only read " + numRead + " bytes."); 193 //throw new IOException("Expected " + len + " bytes. Could only read " + numRead + " bytes."); 194 } 195 StringBuffer outputString = new StringBuffer(); 196 outputString.append('"'); 197 try { 198 Debug.log(Debug.TRACE,"Using LE encoding"); 199 outputString.append(new String(stringBytes, "UTF-16LE")); 200 } catch (IOException eIO) { 201 outputString.append(new String(stringBytes)); //fall back to default encoding 202 } 203 outputString.append('"'); 204 205 return (tf.getOperandToken(outputString.toString(), "STRING")); 206 } 207 208 /** 209 * Reads a Defined Name token from the <code>ByteArrayInputStream</code> 210 * 211 * @param bis The <code>ByteArrayInputStream</code> from which we read the 212 * bytes. 213 * @return The decoded Name <code>Token</code> 214 */ readNameToken(ByteArrayInputStream bis)215 private Token readNameToken(ByteArrayInputStream bis) { 216 byte buffer[] = new byte[2]; 217 buffer[0] = (byte) bis.read(); 218 buffer[1] = (byte) bis.read(); 219 int nameIndex = EndianConverter.readShort(buffer); 220 bis.skip(12); // the next 12 bytes are unused 221 Enumeration e = wb.getDefinedNames(); 222 int i = 1; 223 while(i<nameIndex) { 224 e.nextElement(); 225 i++; 226 } 227 Debug.log(Debug.TRACE,"Name index is " + nameIndex); 228 DefinedName dn = (DefinedName)e.nextElement(); 229 Debug.log(Debug.TRACE,"DefinedName is " + dn.getName()); 230 return (tf.getOperandToken(dn.getName(), "NAME")); 231 } 232 233 /** 234 * Reads a Cell Reference token from the <code>ByteArrayInputStream</code> 235 * 236 * @param bis The <code>ByteArrayInputStream</code> from which we read the 237 * bytes. 238 * @return The decoded Cell Reference <code>Token</code> 239 */ readCellRefToken(ByteArrayInputStream bis)240 private Token readCellRefToken(ByteArrayInputStream bis) { 241 242 byte buffer[] = new byte[2]; 243 String outputString = new String(); 244 245 buffer[0] = (byte) bis.read(); 246 buffer[1] = (byte) bis.read(); 247 int formulaRow = EndianConverter.readShort(buffer); 248 int relativeFlags = (formulaRow & 0xC000)>>14; 249 formulaRow &= 0x3FFF; 250 int formulaCol = (byte) bis.read(); 251 252 outputString = int2CellStr(formulaRow, formulaCol, relativeFlags); 253 254 return (tf.getOperandToken(outputString,"CELL_REFERENCE")); 255 } 256 257 /** 258 * Reads a Cell Reference token from the <code>ByteArrayInputStream</code> 259 * 260 * @param bis The <code>ByteArrayInputStream</code> from which we read the 261 * bytes. 262 * @return The decoded Cell Reference <code>Token</code> 263 */ read3DCellRefToken(ByteArrayInputStream bis)264 private Token read3DCellRefToken(ByteArrayInputStream bis) { 265 266 byte buffer[] = new byte[2]; 267 String outputString = new String(); 268 269 bis.skip(10); 270 271 buffer[0] = (byte) bis.read(); 272 buffer[1] = (byte) bis.read(); 273 int Sheet1 = EndianConverter.readShort(buffer); 274 buffer[0] = (byte) bis.read(); 275 buffer[1] = (byte) bis.read(); 276 int Sheet2 = EndianConverter.readShort(buffer); 277 278 buffer[0] = (byte) bis.read(); 279 buffer[1] = (byte) bis.read(); 280 int formulaRow = EndianConverter.readShort(buffer); 281 int relativeFlags = (formulaRow & 0xC000)>>14; 282 formulaRow &= 0x3FFF; 283 int formulaCol = (byte) bis.read(); 284 String cellRef = "." + int2CellStr(formulaRow, formulaCol, relativeFlags); 285 if(Sheet1 == Sheet2) { 286 outputString = "$" + wb.getSheetName(Sheet1) + cellRef; 287 } else { 288 outputString = "$" + wb.getSheetName(Sheet1) + cellRef + ":$" + wb.getSheetName(Sheet2) + cellRef; 289 } 290 291 return (tf.getOperandToken(outputString,"3D_CELL_REFERENCE")); 292 } 293 294 /** 295 * Reads a Cell Reference token from the <code>ByteArrayInputStream</code> 296 * 297 * @param bis The <code>ByteArrayInputStream</code> from which we read the 298 * bytes. 299 * @return The decoded Cell Reference <code>Token</code> 300 */ read3DCellAreaRefToken(ByteArrayInputStream bis)301 private Token read3DCellAreaRefToken(ByteArrayInputStream bis) { 302 303 byte buffer[] = new byte[2]; 304 String outputString = new String(); 305 306 bis.skip(10); 307 308 buffer[0] = (byte) bis.read(); 309 buffer[1] = (byte) bis.read(); 310 int Sheet1 = EndianConverter.readShort(buffer); 311 buffer[0] = (byte) bis.read(); 312 buffer[1] = (byte) bis.read(); 313 int Sheet2 = EndianConverter.readShort(buffer); 314 315 buffer[0] = (byte) bis.read(); 316 buffer[1] = (byte) bis.read(); 317 int formulaRow1 = EndianConverter.readShort(buffer); 318 int relativeFlags1 = (formulaRow1 & 0xC000)>>14; 319 formulaRow1 &= 0x3FFF; 320 321 buffer[0] = (byte) bis.read(); 322 buffer[1] = (byte) bis.read(); 323 int formulaRow2 = EndianConverter.readShort(buffer); 324 int relativeFlags2 = (formulaRow2 & 0xC000)>>14; 325 formulaRow2 &= 0x3FFF; 326 327 int formulaCol1 = (byte) bis.read(); 328 int formulaCol2 = (byte) bis.read(); 329 330 String cellRef1 = "." + int2CellStr(formulaRow1, formulaCol1, relativeFlags1); 331 String cellRef2 = int2CellStr(formulaRow2, formulaCol2, relativeFlags2); 332 333 if(Sheet1 == Sheet2) { 334 outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":" + cellRef2; 335 } else { 336 outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":$" + wb.getSheetName(Sheet2) + "." + cellRef2; 337 } 338 339 return (tf.getOperandToken(outputString,"3D_CELL_AREA_REFERENCE")); 340 } 341 342 /** 343 * Converts a row and col 0 based index to a spreadsheet cell reference. 344 * It also has a relativeFlags which indicates whether or not the 345 * Cell Reference is relative or absolute (Absolute is denoted with '$') 346 * 347 * 00 = absolute row, absolute col 348 * 01 = absolute row, relative col 349 * 10 = relative row, absolute col 350 * 11 = relative row, relative col 351 * 352 * @param row The cell reference 0 based index to the row 353 * @param col The cell reference 0 based index to the row 354 * @param relativeFlags Flags indicating addressing of row and column 355 * @return A <code>String</code> representing a cell reference 356 */ int2CellStr(int row, int col, int relativeFlags)357 private String int2CellStr(int row, int col, int relativeFlags) { 358 String outputString = ""; 359 int firstChar = (col + 1) / 26; 360 361 if((relativeFlags & 1) == 0) { 362 outputString += "$"; 363 } 364 365 if(firstChar>0) { 366 int secondChar = (col + 1) % 26; 367 outputString += Character.toString(int2Char(firstChar - 1)) + Character.toString(int2Char(secondChar - 1)); 368 } else { 369 outputString += Character.toString(int2Char(col)); 370 } 371 if((relativeFlags & 2) == 0) { 372 outputString += "$"; 373 } 374 outputString += Integer.toString(row+1); 375 return outputString; 376 } 377 378 /** 379 * Reads a Cell Area Reference (cell range) <code>Token</code> from 380 * the <code>ByteArrayInputStream</code> 381 * 382 * @param bis The <code>ByteArrayInputStream</code> from which we read the 383 * bytes. 384 * @return The equivalent Cell Area Reference (cell range) 385 * <code>Token</code> 386 */ readCellAreaRefToken(ByteArrayInputStream bis)387 private Token readCellAreaRefToken(ByteArrayInputStream bis) { 388 byte buffer[] = new byte[2]; 389 int formulaRow1, formulaRow2; 390 int formulaCol1, formulaCol2; 391 392 String outputString = new String(); 393 394 buffer[0] = (byte) bis.read(); 395 buffer[1] = (byte) bis.read(); 396 formulaRow1 = EndianConverter.readShort(buffer); 397 int relativeFlags1 = (formulaRow1 & 0xC000)>>14; 398 formulaRow1 &= 0x3FFF; 399 buffer[0] = (byte) bis.read(); 400 buffer[1] = (byte) bis.read(); 401 formulaRow2 = EndianConverter.readShort(buffer); 402 int relativeFlags2 = (formulaRow2 & 0xC000)>>14; 403 formulaRow2 &= 0x3FFF; 404 405 formulaCol1 = (byte) bis.read(); 406 formulaCol2 = (byte) bis.read(); 407 408 outputString = int2CellStr(formulaRow1, formulaCol1, relativeFlags1); 409 outputString += (":" + int2CellStr(formulaRow2, formulaCol2, relativeFlags2)); 410 411 return (tf.getOperandToken(outputString,"CELL_AREA_REFERENCE")); 412 } 413 414 415 /** 416 * Reads a Number (floating point) token from the <code>ByteArrayInputStream</code> 417 * 418 * @param bis The <code>ByteArrayInputStream</code> from which we read the 419 * bytes. 420 * @return The decoded Integer <code>Token</code> 421 */ readNumToken(ByteArrayInputStream bis)422 private Token readNumToken(ByteArrayInputStream bis) { 423 424 byte numBuffer[] = new byte[8]; 425 426 for(int j=0;j<8;j++) { 427 numBuffer[j]=(byte) bis.read(); 428 } 429 430 return (tf.getOperandToken(Double.toString(EndianConverter.readDouble(numBuffer)),"NUMBER")); 431 } 432 433 /** 434 * Read an Operator token from the <code>ByteArrayInputStream</code> 435 * 436 * @param b A Pocket Excel number representing an operator. 437 * @param args The number of arguments this operator takes. 438 * @return The decoded Operator <code>Token</code> 439 */ readOperatorToken(int b, int args)440 private Token readOperatorToken(int b, int args) { 441 442 Token t; 443 444 if(b==TokenConstants.TUPLUS) { 445 t = tf.getOperatorToken("+", args); 446 } else if(b==TokenConstants.TUMINUS) { 447 t = tf.getOperatorToken("-", args); 448 } else { 449 t = tf.getOperatorToken(operatorLookup.getStringFromID(b), args); 450 } 451 return t; 452 } 453 454 /** 455 * Read a Function token from the <code>ByteArrayInputStream</code> 456 * This function can have any number of arguments and this number is read 457 * in with the record 458 * 459 * @param bis The <code>ByteArrayInputStream</code> from which we read the 460 * bytes. 461 * @return The decoded variable argument Function <code>Token</code> 462 */ readFunctionVarToken(ByteArrayInputStream bis)463 private Token readFunctionVarToken(ByteArrayInputStream bis) { 464 465 int numArgs = 0; 466 numArgs = bis.read(); 467 byte buffer[] = new byte[2]; 468 buffer[0] = (byte) bis.read(); 469 buffer[1] = (byte) bis.read(); 470 int functionID = EndianConverter.readShort(buffer); 471 return (tf.getFunctionToken(fl.getStringFromID(functionID),numArgs)); 472 } 473 474 /** 475 * Read a Function token from the <code>ByteArrayInputStream</code> 476 * This function has a fixed number of arguments which it will get 477 * from <code>FunctionLookup</code>. 478 * 479 * @param bis The <code>ByteArrayInputStream</code> from which we read the 480 * bytes. 481 * @return The decoded fixed argument Function <code>Token</code> 482 */ readFunctionToken(ByteArrayInputStream bis)483 private Token readFunctionToken(ByteArrayInputStream bis) { 484 485 byte buffer[] = new byte[2]; 486 buffer[0] = (byte) bis.read(); 487 buffer[1] = (byte) bis.read(); 488 int functionID = EndianConverter.readShort(buffer); 489 String functionName = fl.getStringFromID(functionID); 490 return (tf.getFunctionToken(functionName,fl.getArgCountFromString(functionName))); 491 } 492 493 } 494