1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 /* 24 * XLIFFReader.java 25 * 26 * 27 */ 28 package com.sun.star.tooling.converter; 29 30 import java.io.IOException; 31 import java.util.Hashtable; 32 import java.util.Map; 33 34 import com.sun.star.tooling.languageResolver.LanguageResolver; 35 36 import org.xml.sax.Attributes; 37 import org.xml.sax.SAXException; 38 import org.xml.sax.SAXParseException; 39 import org.xml.sax.helpers.AttributesImpl; 40 import org.xml.sax.helpers.DefaultHandler; 41 42 43 /** 44 * Parse the given file and extract the content needed. 45 * <br/> 46 * This Reader understands the parts of the 47 * <a href="http://www.oasis-open.org/committees/xliff/documents/cs-xliff-core-1.1-20031031.htm">xliff</a> spezification used to translate 48 * the strings in Star-Office and Open-Office. 49 * <br/> 50 * The given file is parsed and the content is stored in a HashMap with those keys: 51 * <br/> 52 * "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/> 53 * "Project" first column in sdf file format.<br/> 54 * "SourceFile" second column in sdf file format.<br/> 55 * "Dummy" third column in sdf file format.<br/> 56 * "ResType" 4. column in sdf file format.<br/> 57 * "GID" 5. column in sdf file format. <br/> 58 * "LID" 6. column in sdf file format.<br/> 59 * "HID" 7. column in sdf file format.<br/> 60 * "Platform" 8. column in sdf file format. <br/> 61 * "Width", 9. column in sdf file format.<br/> 62 * "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/> 63 * "SourceText" 11. column in sdf file format(in the line with the source language).<br/> 64 * "SourceHText" 12. column in sdf file format(in the line with the source language).<br/> 65 * "SourceQText" 13. column in sdf file format(in the line with the source language).<br/> 66 * "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/> 67 * "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/> 68 * "TargetText" 11. column in sdf file format (in the line with the target language).<br/> 69 * "TargetHText" 12. column in sdf file format (in the line with the target language).<br/> 70 * "TargetQText" 13. column in sdf file format (in the line with the target language).<br/> 71 * "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/> 72 * "TimeStamp" 15. column in sdf file format.<br/> 73 * @ 74 * @author Christian Schmidt 2005 75 * 76 */ 77 public class XLIFFReader extends DefaultHandler { 78 79 /** 80 * A String array holding the keys used by the HashMap holding the Data 81 */ 82 private final String[] dataNames = { "BlockNr", "Project", 83 "SourceFile", "Dummy", "ResType", "GID", "LID", "HID", "Platform", 84 "Width", "SourceLanguageID", "SourceText", "SourceHText", 85 "SourceQText", "SourceTitle", "TargetLanguageID", "TargetText", 86 "TargetHText", "TargetQText", "TargetTitle", "TimeStamp" }; 87 88 /** 89 * Used to index in the data array 90 */ 91 static int index = 0; 92 93 /** 94 * The Map that holds the data returned by this class 95 */ 96 private Map moveData = new ExtMap(); 97 98 /** 99 * A Map that holds yet incomplete data 100 * until all depending transunits are found 101 */ 102 private Hashtable DataStore = new Hashtable(); 103 104 /** 105 * An Elements name 106 */ 107 private String name = new String(""); 108 109 /** 110 *List of Attributes used by an Element 111 */ 112 private Attributes attrs; 113 114 // private String tagElement = new String(""); 115 116 /** 117 * Indicates whether the next found content string should be printed 118 */ 119 private boolean printThis = false; 120 /** 121 * Indicates whether the next found content string should be stored 122 */ 123 private boolean storeIt = false; 124 125 /** 126 * data holds the information created while parsing 127 * 128 */ 129 private String[] data = new String[26]; 130 131 /** 132 * The handler used by this class 133 */ 134 private final DataHandler handler; 135 /** 136 * The target used by this class 137 */ 138 private final DataWriter target; 139 140 // private boolean searchForText = false; 141 142 /** 143 * counts how many dots are made 144 */ 145 private int dotCount; 146 147 /** 148 * Counts how many Trans Units are read 149 */ 150 private int transUnitCounter; 151 152 /** 153 * used source Language 154 */ 155 private String sourceLanguage; 156 157 /** 158 * used target language 159 */ 160 private String targetLanguage; 161 162 /** 163 * indicates whether this is the first Transunit 164 */ 165 private boolean isFirst = true; 166 167 private static final String EMPTY = new String(""); 168 169 /** 170 * the last index in data where something is written 171 */ 172 private int oldindex; 173 174 // private boolean isBptEptTag; 175 176 // private String innerString; 177 // 178 // private String key; 179 180 /** 181 * Index for the BlockNr in the data array 182 */ 183 private static final int BLOCKNR_IDX = 0; 184 /** 185 * Index for the Project in the data array 186 */ 187 private static final int PROJECT_IDX = 1; 188 /** 189 * Index for the Sourcefile name in the data array 190 */ 191 private static final int SOURCEFILE_IDX = 2; 192 /** 193 * Index for the 'dummy' in the data array 194 */ 195 private static final int DUMMY_IDX = 3; 196 /** 197 * Index for the Group Id in the data array 198 */ 199 private static final int GID_IDX = 4; 200 /** 201 * Index for the Local Id in the data array 202 */ 203 private static final int LID_IDX = 5; 204 /** 205 * Index for the Help Id in the data array 206 */ 207 private static final int HID_IDX = 6; 208 /** 209 * Index for the Platform in the data array 210 */ 211 private static final int PLATFORM_IDX = 7; 212 /** 213 * Index for the 'Width' in the data array 214 */ 215 private static final int WIDTH_IDX = 8; 216 /** 217 * Index for the Sourcelanguage Id in the data array 218 */ 219 private static final int SOURCE_LANGUAGE_ID_IDX = 10; 220 /** 221 * Index for the Source Text in the data array 222 */ 223 private static final int SOURCE_TEXT_IDX = 11; 224 /** 225 * Index for the Source Helptext in the data array 226 */ 227 private static final int SOURCE_HELPTEXT_IDX = 12; 228 /** 229 * Index for the Source Quickhelp Text in the data array 230 */ 231 private static final int SOURCE_QUICK_HELPTEXT_IDX = 13; 232 /** 233 * Index for the Source Titletext in the data array 234 */ 235 private static final int SOURCE_TITLETEXT_IDX = 14; 236 /** 237 * Index for the Timestamp in the data array 238 */ 239 private static final int TIMESTAMP_IDX = 15; 240 /** 241 * Index for the res type in the data array 242 */ 243 private static final int RESTYPE_IDX = 16; 244 /** 245 * Index for the Target Language Id in the data array 246 */ 247 private static final int TARGET_LANGUAGE_ID_IDX = 20; 248 /** 249 * Index for the Target Text in the data array 250 */ 251 private static final int TARGET_TEXT_IDX = 21; 252 /** 253 * Index for the Target Helptext in the data array 254 */ 255 private static final int TARGET_HELP_TEXT_IDX = 22; 256 /** 257 * Index for the Target Quickhelp Text in the data array 258 */ 259 private static final int TARGET_QUICKHELP_TEXT_IDX = 23; 260 /** 261 * Index for the Target Titletext in the data array 262 */ 263 private static final int TARGET_TITLE_TEXT_IDX = 24; 264 /** 265 * Index for the Found Parts Counter in the data array 266 */ 267 private static final int FOUND_PARTS_COUNTER_IDX = 18; 268 269 /** 270 * used to find the matching ISO or RFC3066 language code 271 */ 272 LanguageResolver languageResolver; 273 274 private boolean doBlockCompleteCheck=true; 275 276 277 278 /** 279 * Create a new Instance of XLIFFReader 280 * 281 * @param handler the DataHandler to use 282 * @param target the target used 283 * @throws IOException 284 */ XLIFFReader(DataHandler handler, DataWriter target)285 public XLIFFReader(DataHandler handler, DataWriter target) throws IOException { 286 this.languageResolver = new LanguageResolver(); 287 this.handler = handler; 288 this.target = target; 289 } 290 291 /** 292 * Create a new Instance of XLIFFReader 293 * 294 * @param handler the DataHandler to use 295 * @param target the target used 296 * @param doBlockCompleteCheck indicates whether every single transunit should be returned or the whole block data is to be collected 297 * 298 * @throws IOException 299 */ XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck)300 public XLIFFReader(DataHandler handler, DataWriter target,boolean doBlockCompleteCheck) throws IOException { 301 this(handler, target); 302 this.languageResolver = new LanguageResolver(); 303 this.doBlockCompleteCheck=doBlockCompleteCheck; 304 305 } 306 307 /** 308 * delete and initialize the data content 309 */ initData()310 public void initData() { 311 for (int i = BLOCKNR_IDX; i < SOURCE_LANGUAGE_ID_IDX; i++) { 312 data[i] = ""; 313 } 314 for (int i = SOURCE_TEXT_IDX; i < TIMESTAMP_IDX; i++) { // skip Time Stamp 315 data[i] = ""; 316 } 317 for (int i = RESTYPE_IDX; i < TARGET_LANGUAGE_ID_IDX; i++) { // skip Source language ID 318 data[i] = ""; 319 } 320 for (int i = TARGET_TEXT_IDX; i < 26; i++) {// skip Target language ID, 321 data[i] = ""; 322 } 323 324 data[DUMMY_IDX] = "0";//dummy 325 data[FOUND_PARTS_COUNTER_IDX] = "1";//parts found 326 327 } 328 329 /** (non-Javadoc) 330 * @see org.xml.sax.ContentHandler#startDocument() 331 */ startDocument()332 public void startDocument() { 333 initData(); 334 //System.out.print("Start"); 335 336 } 337 338 /** (non-Javadoc) 339 * @see org.xml.sax.ContentHandler#endDocument() 340 */ endDocument()341 public void endDocument() { 342 343 try { 344 showStatistic(); 345 } catch (IOException e) { 346 347 OutputHandler.log(e.getMessage()); 348 349 } 350 } 351 352 /** (non-Javadoc) 353 * @throws SAXException 354 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 355 */ startElement(String namespaceURI, String sName, String qName, Attributes attrs)356 public void startElement(String namespaceURI, String sName, String qName, 357 Attributes attrs) throws SAXException { 358 this.name = new String(qName); 359 this.attrs = new AttributesImpl(attrs); 360 String resType; 361 362 String attributeName = new String(""); 363 String attribute = new String(""); 364 String tagElement = new String(""); 365 int i; 366 367 if (qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex")) { 368 //ignore bpt, ept, ex and sub tags 369 // content of the tags will be stored 370 371 storeIt=true; 372 return; 373 374 } 375 if (qName.equals("target")) { 376 if ((resType = data[RESTYPE_IDX]) == null) { 377 378 } else { 379 if ("res".equals(resType)) { 380 index = TARGET_TEXT_IDX; 381 382 storeIt = true; 383 return; 384 } 385 // if("res-Help".equals(resType)){ 386 // index=TARGET_HELP_TEXT_IDX; 387 // storeIt=true; 388 // return; 389 // } 390 if ("res-QuickHelp".equals(resType)) { 391 index = TARGET_QUICKHELP_TEXT_IDX; 392 393 storeIt = true; 394 return; 395 } 396 if ("res-Title".equals(resType)) { 397 index = TARGET_TITLE_TEXT_IDX; 398 399 storeIt = true; 400 return; 401 } 402 } 403 404 } 405 if (qName.equals("source")) { 406 if ((resType = data[RESTYPE_IDX]) == null) { 407 //throw new SAXException("Ressource type not found"); 408 } else { 409 if ("res".equals(resType)) { 410 index = SOURCE_TEXT_IDX; 411 412 storeIt = true; 413 return; 414 } 415 // if("res-Help".equals(resType)){ 416 // index=SOURCEHELPTEXT_IDX; 417 // storeIt=true; 418 // return; 419 // } 420 if ("res-QuickHelp".equals(resType)) { 421 index = SOURCE_QUICK_HELPTEXT_IDX; 422 storeIt = true; 423 return; 424 } 425 if ("res-Title".equals(resType)) { 426 index = SOURCE_TITLETEXT_IDX; 427 storeIt = true; 428 return; 429 } 430 } 431 } 432 433 if (qName.equals("file")) { 434 data[TIMESTAMP_IDX] = attrs.getValue("date"); 435 //data[17]=(attrs.getValue("original")); 436 try{ 437 data[SOURCE_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("source-language"))); 438 if(languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))!=null){ 439 data[TARGET_LANGUAGE_ID_IDX] = (languageResolver.getISOFromRFC((String)attrs.getValue("target-language"))); 440 } 441 }catch(Exception e){ 442 OutputHandler.log(e.getMessage()); 443 } 444 return; 445 } 446 if (qName.equals("trans-unit")) { 447 String id = attrs.getValue("id"); 448 if ((DataStore.get(id)) != null) { 449 //TODO arraycopy might not be nessessary 450 System.arraycopy((String[]) DataStore.get(id), 0, data, 0, 451 data.length); 452 int help = (new Integer(data[FOUND_PARTS_COUNTER_IDX])).intValue(); //found one more part 453 help++; // refresh the actual found parts 454 data[FOUND_PARTS_COUNTER_IDX] = (new Integer(help)).toString(); // belonging to this information 455 456 DataStore.remove(attrs.getValue("id")); // TODO this can be deleted? 457 } else { 458 459 data[BLOCKNR_IDX] = (attrs.getValue("id")); // a new part 460 } 461 data[RESTYPE_IDX] = (attrs.getValue("restype")); 462 463 return; 464 } 465 466 if (qName.equals("context")) { 467 468 String value = attrs.getValue("context-type"); 469 470 if ("SourceHelpText".equals(value)) { 471 index = SOURCE_HELPTEXT_IDX; 472 storeIt = true; 473 return; 474 }else if ("TargetHelpText".equals(value)) { 475 index = TARGET_HELP_TEXT_IDX; 476 storeIt = true; 477 return; 478 }else if ("DBType".equals(value)) { 479 //index=SOURCEFILE_IDX; 480 //storeIt=true; 481 return; 482 }else if ("Project".equals(value)) { 483 index = PROJECT_IDX; 484 storeIt = true; 485 return; 486 }else if ("Filename".equals(value)) { 487 index = SOURCEFILE_IDX; 488 storeIt = true; 489 return; 490 }else if ("Type".equals(value)) { 491 index = RESTYPE_IDX; 492 storeIt = true; 493 return; 494 }else if ("GID".equals(value)) { 495 index = GID_IDX; 496 storeIt = true; 497 return; 498 }else if ("LID".equals(value)) { 499 index = LID_IDX; 500 storeIt = true; 501 return; 502 }else if ("HID".equals(value)) { 503 index = HID_IDX; 504 storeIt = true; 505 return; 506 }else if ("Platform".equals(value)) { 507 index = PLATFORM_IDX; 508 storeIt = true; 509 return; 510 }else if ("Width".equals(value)) { 511 index = WIDTH_IDX; 512 storeIt = true; 513 return; 514 } 515 516 } 517 518 } 519 520 /** (non-Javadoc) 521 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 522 */ endElement(String namespaceURI, String sName, String qName)523 public void endElement(String namespaceURI, String sName, String qName) 524 throws SAXException { 525 //we ignore bpt and ept tags 526 if(!(qName.equals("bpt")||qName.equals("ept")||qName.equals("sub")||qName.equals("ex"))){ 527 storeIt = false; 528 } 529 if (qName.equals("trans-unit")) { 530 showData(); 531 } 532 533 } 534 535 /** (non-Javadoc) 536 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 537 */ characters(char[] ch, int start, int length)538 public void characters(char[] ch, int start, int length) { 539 540 // checkContent(); 541 String str2 = new String(ch, start, length); 542 543 if (storeIt) { 544 545 String str = new String(ch, start, length); 546 if (index == oldindex) { 547 data[index] += str; 548 } else { 549 data[index] = str; 550 } 551 552 } 553 oldindex = index; 554 555 } 556 557 /** (non-Javadoc) 558 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 559 */ error(SAXParseException e)560 public void error(SAXParseException e) throws SAXParseException { 561 562 OutputHandler.log(e.getMessage()); 563 } 564 565 /** (non-Javadoc) 566 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 567 */ fatalError(SAXParseException e)568 public void fatalError(SAXParseException e) throws SAXParseException { 569 570 OutputHandler.log("PARSE ERROR in line " + e.getLineNumber() + ", " 571 + e.getMessage() ); 572 573 } 574 575 /** (non-Javadoc) 576 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 577 */ warning(SAXParseException e)578 public void warning(SAXParseException e) throws SAXParseException { 579 //throw e; 580 OutputHandler.log(e.getMessage()); 581 } 582 583 /** 584 * Put the Data to the DataHandler 585 * tell the Writer to write it 586 * 587 * @throws SAXException 588 */ showData()589 public void showData() throws SAXException { 590 transUnitCounter++; 591 makeDot(); 592 if (isComplete()) { 593 594 try { 595 moveData(); 596 if (isFirst == true) { 597 this.sourceLanguage = (String) this.moveData 598 .get("SourceLanguageID"); 599 this.targetLanguage = (String) this.moveData 600 .get("TargetLanguageID"); 601 OutputHandler.out(EMPTY); 602 OutputHandler.out("Source Language is: " 603 + this.sourceLanguage); 604 OutputHandler.out("Target Language is: " 605 + this.targetLanguage); 606 OutputHandler.out(EMPTY); 607 OutputHandler.out("Start"); 608 OutputHandler.out(EMPTY); 609 isFirst = false; 610 } 611 target.getDataFrom(handler); 612 target.writeData(); 613 614 } catch (java.io.IOException e) { 615 throw new SAXException(e); 616 } 617 618 } else { 619 DataStore.put(data[BLOCKNR_IDX], data.clone()); 620 initData(); 621 622 } 623 initData(); 624 } 625 626 627 /** 628 * put the data in an Map in the format that 629 * DataHandler can handle it 630 */ moveData()631 final public void moveData() { 632 633 moveData.put("BlockNr", data[BLOCKNR_IDX]); 634 635 moveData.put("Project", data[PROJECT_IDX]); 636 637 moveData.put("SourceFile", data[SOURCEFILE_IDX]); 638 639 moveData.put("Dummy", "0"); 640 641 moveData.put("ResType", data[RESTYPE_IDX]); 642 643 moveData.put("GID", data[GID_IDX]); 644 645 moveData.put("LID", data[LID_IDX]); 646 647 moveData.put("HID", data[HID_IDX]); 648 649 moveData.put("Platform", data[PLATFORM_IDX]); 650 651 if (EMPTY.equals(data[WIDTH_IDX])) 652 data[WIDTH_IDX] = "0"; 653 moveData.put("Width", data[WIDTH_IDX]); 654 655 moveData.put("SourceLanguageID", data[SOURCE_LANGUAGE_ID_IDX]); 656 657 moveData.put("SourceText", data[SOURCE_TEXT_IDX]); 658 659 moveData.put("SourceHText", data[SOURCE_HELPTEXT_IDX]); 660 661 moveData.put("SourceQText", data[SOURCE_QUICK_HELPTEXT_IDX]); 662 663 moveData.put("SourceTitle", data[SOURCE_TITLETEXT_IDX]); 664 665 moveData.put("TargetLanguageID", data[TARGET_LANGUAGE_ID_IDX]); 666 667 moveData.put("TargetText", data[TARGET_TEXT_IDX]); 668 669 moveData.put("TargetHText", data[TARGET_HELP_TEXT_IDX]); 670 671 moveData.put("TargetQText", data[TARGET_QUICKHELP_TEXT_IDX]); 672 673 moveData.put("TargetTitle", data[TARGET_TITLE_TEXT_IDX]); 674 675 moveData.put("TimeStamp", data[TIMESTAMP_IDX]); 676 677 //and give it to the data handler 678 this.handler.fillDataWith(moveData); 679 } 680 681 /** 682 * complete means all depending parts have been found esp. all res types 683 * that belong to the same SDF Line 684 * 685 * @return true if the data is complete 686 * 687 */ isComplete()688 final public boolean isComplete() { 689 690 if(!doBlockCompleteCheck){ 691 return true; 692 } 693 694 String sParts; 695 if (data[FOUND_PARTS_COUNTER_IDX] == EMPTY) 696 data[FOUND_PARTS_COUNTER_IDX] = "1"; //this is the first part 697 698 String sFoundParts = data[FOUND_PARTS_COUNTER_IDX]; 699 //create the new 'id' 700 sParts = data[BLOCKNR_IDX].substring(data[BLOCKNR_IDX].lastIndexOf(":") + 1); 701 702 if (sFoundParts.equals(sParts)) { 703 return true; 704 } 705 return false; 706 } 707 708 // TODO this belongs in OutputHandler 709 /** 710 * show the user that it is going 711 * on by printing dots on the screen 712 * 713 */ makeDot()714 private void makeDot() { 715 int count = 0; 716 if ((count = (int) this.transUnitCounter / 1000) > this.dotCount) { 717 this.dotCount = count; 718 OutputHandler.printDot(); 719 } 720 } 721 722 /** 723 * show the statistic data found while parse this file 724 * 725 * @throws IOException 726 */ showStatistic()727 final void showStatistic() throws IOException { 728 OutputHandler.out(EMPTY); 729 OutputHandler.out("TransUnits found: " + this.transUnitCounter); 730 // every data in DataStore is 731 // skipped 'cause its not complete 732 // TODO count really every transunit not only the data (might consist of 733 // more than one 734 OutputHandler.dbg("TransUnits skip : " + this.DataStore.size()); 735 //Converter.out(EMPTY); 736 } 737 } 738 739