1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 /*
24  * SDFReader.java
25  *
26  *
27  */
28 
29 package com.sun.star.tooling.converter;
30 
31 import java.io.File;
32 import java.io.FileInputStream;
33 import java.io.IOException;
34 import java.io.InputStreamReader;
35 import java.text.DecimalFormat;
36 import java.util.*;
37 
38 /**
39  *
40  * SDFReader is a FileReader that knows about
41  * the content of SDFFiles
42  *
43  * A SDFBlock is read of the given file. A SDFBlock
44  * consists of all SDFLines that are traanslations of the
45  * same String and the SDFLine containing the source string
46  * itself. SDFFile lines are read and checked whether they
47  * have the allowed column count and  don't contain illeagal
48  * characters (like most unprintable characters below 0x00df).
49  * If the given source language is not found in the first
50  * block of SDFLines a ConverterException is thrown at runtime.
51  * If the given target language is "" (that means not given)
52  * the first language that is not the given source language
53  * is taken for target language. The found values are returned in HashMaps that
54  * use the following keys:
55  * <br/>
56  *  "BlockNr" originally coming from reading the sdf file, contains 'block nr in sdf file'+'-'+'hash value of the sdf id fields'.<br/>
57  *  "Project"  first column in sdf file format.<br/>
58  *  "SourceFile" second column in sdf file format.<br/>
59  *  "Dummy" third column in sdf file format.<br/>
60  *  "ResType" 4. column in sdf file format.<br/>
61  *  "GID" 5. column in sdf file format. <br/>
62  *  "LID" 6. column in sdf file format.<br/>
63  *  "HID" 7. column in sdf file format.<br/>
64  *  "Platform" 8. column in sdf file format. <br/>
65  *  "Width", 9. column in sdf file format.<br/>
66  *  "SourceLanguageID" 10. column in sdf file format(in the line with the source language).<br/>
67  *  "SourceText"  11. column in sdf file format(in the line with the source language).<br/>
68  *  "SourceHText" 12. column in sdf file format(in the line with the source language).<br/>
69  *  "SourceQText" 13. column in sdf file format(in the line with the source language).<br/>
70  *  "SourceTitle" 14. column in sdf file format(in the line with the source language).<br/>
71  *  "TargetLanguageID" 10. column in sdf file format (in the line with the target language).<br/>
72  *  "TargetText" 11. column in sdf file format (in the line with the target language).<br/>
73  *  "TargetHText" 12. column in sdf file format (in the line with the target language).<br/>
74  *  "TargetQText"  13. column in sdf file format (in the line with the target language).<br/>
75  *  "TargetTitle", 14. column in sdf file format (in the line with the target language).<br/>
76  *  "TimeStamp" 15. column in sdf file format.<br/>
77  *
78  * @author Christian Schmidt 2005
79  *
80  */
81 
82 public class SDFReader extends DataReader {
83 
84     /**
85      * an array of the SDF files column names
86      */
87     final static String[] fieldnames      = { "Project", "SourceFile", "Dummy",
88             "ResType", "GID", "LID", "HID", "Platform", "Width", "LanguageID",
89             "Text", "HText", "QText", "Title", "TimeStamp" };
90 
91     /**
92      * an array of the SDF files column names if the source language is in
93      */
94     final static String[] sourceLineNames = { "Project", "SourceFile", "Dummy",
95             "ResType", "GID", "LID", "HID", "Platform", "Width",
96             "SourceLanguageID", "SourceText", "SourceHText", "SourceQText",
97             "SourceTitle", "TimeStamp"   };
98 
99     /**
100      * an array of the SDF files column names if the target language is in
101      */
102     final static String[] targetLineNames = { "Project", "SourceFile", "Dummy",
103             "ResType", "GID", "LID", "HID", "Platform", "Width",
104             "TargetLanguageID", "TargetText", "TargetHText", "TargetQText",
105             "TargetTitle", "TimeStamp"   };
106 
107     final static String   EMPTY           = new String("");
108 
109     private int           dotCount        = 0;
110 
111     /**
112      * a Map containing an SDF line with source language
113      */
114     private Map           sourceMap;
115 
116     /**
117      * a Map containing an SDF line with target language
118      */
119     private Map           targetMap;
120 
121     /**
122      * a Map containing an SDF
123      */
124     private Map           SDFMap;
125 
126     /**
127      * a Map Array containing one SDF source language line and one SDF target
128      * language line
129      */
130     private Map[]         data            = { sourceMap, targetMap };
131 
132     /**
133      * The Id of the current SDFBlock
134      */
135     private String        CurrentBlockId;
136 
137     /**
138      * The SDF file to read from
139      */
140     private File          sourceFile;
141 
142     /**
143      * The language in the source file that should be handelt as source language
144      */
145     protected String        sourceLanguage;
146 
147     /**
148      * The language in the source file that should be handelt as target language
149      */
150     protected String      targetLanguage;
151 
152     /**
153      * A counter holding the number of blocks just read
154      * from this file
155      */
156     private long           blockNr         = 0;// If we use Integer, more then numbers greater than 128k would be signed
157 
158     /**
159      * A counter holding the number of skipped lines that means
160      * lines that can not be worked with because they contain an error
161      */
162     private int           skippedLines    = 0;
163 
164     /**
165      * This switch is set for indicating that all source file lines
166      * are read and no lines remain buffered. Finding this like 'true'
167      * means the source file is finished
168      */
169     private boolean       endIt           = false;
170 
171     /**
172      * Indicates whether the targetLanguage is found in this source file so far
173      */
174     private boolean       foundTarget     = false;
175     /**
176      * Indicates whether the sourceLanguage is found in this source file so far
177      */
178     private boolean       foundSource     = false;
179 
180     /**
181      * Counts how many lines were skipped because the language is
182      * neither sourceLanguage nor targetLanguage
183      */
184     private int           langMiss;
185 
186     /**
187      *  Indicates whether there is a line in the read buffer or not
188      */
189     private boolean       useBuffer       = false;
190 
191     /**
192      * A buffer for SDFLines
193      */
194     private String        lineBuffer;
195 
196     /**
197      * The buffer for the already splitted SDFLines
198      */
199     private String[]      splittedLineBuffer;
200 
201     /**
202      * Counts how many Blocks were skipped
203      * f.e. because no sourceLanguage is found
204      * in it
205      */
206     private int           skippedBlocks;
207 
208     /**
209      * Counts the blocks without targetLanguage
210      */
211     private int           targetLangMiss;
212 
213     /**
214      * Counts the blocks without sourceLanguage
215      */
216     private int           sourceLangMiss;
217 
218     /**
219      * Counts the lines where no targetLanguage line was found
220      * and so empty lines were created
221      */
222     private int           targetLangCreate;
223 
224 
225     DecimalFormat blockNrFormatter = new DecimalFormat("000000");
226 
227     /**
228      * The hashcode of the current block
229      */
230     private int CurrentBlockHash;
231 
232     private boolean skip;
233 
234     /**
235      * Create a new Instance of SDFREader
236      *
237      * @param source                the file to read from
238      * @param sourceLanguage        the sourceLanguage (must not be empty)
239      * @param targetLanguage        the targetLanguage
240      * @param charset               the charset used to read source
241      * @throws java.io.IOException
242      * @throws Exception
243      */
SDFReader(File source, String sourceLanguage, String targetLanguage, String charset)244     public SDFReader(File source, String sourceLanguage, String targetLanguage,
245             String charset) throws java.io.IOException {
246         super(new InputStreamReader(new FileInputStream(source), charset));
247         sourceFile = source;
248         this.sourceLanguage = sourceLanguage;
249         this.targetLanguage = targetLanguage;
250         String line;
251         String[] splitLine;
252         //read first line to get the first
253         //SDF block id
254         mark(16000);
255         if ((line = readLine()) != null) {
256             if ((splitLine = split(line)) != null){
257                 this.CurrentBlockId = getSDFBlockId(splitLine);
258                 this.CurrentBlockHash=this.CurrentBlockId.hashCode();
259                 //found the first
260                 this.blockNr++;
261                 }
262             this.splittedLineBuffer = splitLine;
263         }
264         reset();
265 
266     }
267 
268     /* (non-Javadoc)
269      * @see com.sun.star.tooling.converter.DataReader#getData()
270      */
getData()271     public Map getData()throws IOException {
272 
273         Map map=new HashMap();
274 //        do {
275             this.skip=false;
276             Map[] help=readBlock();
277             if(help==null||help[1]==null||help[0]==null){
278                 return null;
279 //            }else if (help[1].get("TargetLanguageID")==null||help[0].get("SourceLanguageID")==null) {
280 //                OutputHandler.log("Missing Language Id in block "+blockNr+"\nthe block is skipped." );
281 //                this.skippedBlocks++;
282 //                this.skip=true;
283             }else{
284                 map.putAll(help[1]);
285                 map.putAll(help[0]);
286 
287             }
288 //        }while(this.skip=true);
289         return map;
290     }
291 
292 
293     /**
294      * Read a Block from the sdf file and return
295      * @return a Map[] where [0] holds the source and [1] the target language data.
296      *
297      * @throws java.io.IOException
298      */
readBlock()299     public Map[] readBlock() throws java.io.IOException {
300         String line = EMPTY;
301         String blockId = EMPTY;
302         String[] splittedLine = null;
303         data[0]=new ExtMap();
304         data[1]=new ExtMap();
305         String help;
306         String c = null;
307         //read next line or use buffered line
308         while (useBuffer || (line = readLine()) != null) { //works because '||' is shortcut
309 
310             try {
311                 //              buffer used?
312                 if (useBuffer) {
313                     line = this.lineBuffer;
314                     splittedLine = this.splittedLineBuffer;
315                     this.SDFMap = new ExtMap(SDFReader.fieldnames, splittedLine);
316 
317                     try {
318                         checkLanguage(splittedLine);
319                     } catch (ConverterException e) {
320                         throw e;
321                     }finally{
322 
323                         useBuffer = false;
324                     }
325                 } else {
326                     //...are there wrong characters?
327                     if ((check(line)).length() < line.length()) {
328                         throw new LineErrorException(getLineNumber()
329                                 + " : Line contains wrong character "
330                                 //+ Integer.toHexString(Integer.parseInt(c))
331                                 + " \n" + line);
332                     }
333                     //...is the number of columns ok?
334                     if ((splittedLine = split(line)) == null) {
335 
336                         throw new LineErrorException(super.getLineNumber()
337                                 + " : Line has wrong column number \n" + line);
338                         //continue;
339                     } else {
340                         // TODO makeDot is better in Data Handler
341                         makeDot();
342                         // ...is this line in a new SDF block ?
343                         if ((blockId = getSDFBlockId(splittedLine))
344                                 .equals(CurrentBlockId)) {
345 
346                             this.SDFMap = new ExtMap(SDFReader.fieldnames,
347                                     splittedLine);
348                             //what language is in it ?
349                             checkLanguage(splittedLine);
350 
351                         } else {
352 
353                             /*
354                              * we found the next block , but do we have the
355                              * target text?
356                              */
357 
358                             if (!foundTarget) {
359                                 createTargetLine();
360                             }
361 
362                             blockNr++;
363                             splittedLineBuffer = splittedLine;//read one line
364                             // too much so
365                             // buffer it
366                             lineBuffer = line;
367                             useBuffer = true;//reset();
368 
369                             this.CurrentBlockId = blockId;
370                             this.CurrentBlockHash=this.CurrentBlockId.hashCode();
371 
372                             /* ...and what about the source text ? */
373                             if (!foundSource) {
374 
375                                 OutputHandler
376                                         .log("Error in Line:"
377                                                 + getLineNumber()
378                                                 + "Source Language is missing maybe "
379                                                 + "previous block has an error.\nBlock "
380                                                 + (blockNr - 1)
381                                                 + " is skipped. before line: \n"
382                                                 + line);
383 
384                                 foundTarget = false;//no target without source
385                                 skippedBlocks++;
386                                 skippedLines++;
387                                 sourceLangMiss++;
388                                 continue;// skip output of this block if no
389                                          // source language is found
390 
391                             }
392 
393                             break;
394 
395                         }
396                     }
397 
398                 }
399 
400             } catch (LineErrorException e) {
401 
402                 OutputHandler.log(e.getMessage());
403                 this.skippedLines++;
404             } catch (ConverterException e) {
405                 OutputHandler.log(e.getMessage());
406             }
407 
408         }
409         // did we read the whole stuff?
410         if (null != line) {
411             // no
412             foundSource = false;
413             foundTarget = false;
414 
415             return this.data;
416 
417         } else {
418             // ok , its the end but is everything written now?
419             if (!endIt) {
420                 // there is something to write
421                 // but next time we can end it
422                 endIt = true;
423                 if(!foundTarget){
424                     createTargetLine();
425                 }
426                 // write
427                 return this.data;//last lines
428             } else {
429 
430                 showStat();
431                 return null;
432             }
433         }
434         //        }catch(ConverterException e) {
435         //            Converter.log(e.getMessage());
436         //            return null;
437         //        }
438     }
439 
440     /**
441      *
442      */
createTargetLine()443     private void createTargetLine() {
444         targetLangMiss++;
445         // if not, create one ...
446         data[1] = new ExtMap(SDFReader.targetLineNames,
447                 splittedLineBuffer);
448         data[1].put("TargetLanguageID",
449                 this.targetLanguage);
450         if ((String) data[1].get("TargetText") != EMPTY)
451             data[1].put("TargetText", EMPTY);
452         if ((String) data[1].get("TargetHText") != EMPTY)
453             data[1].put("TargetHText", EMPTY);
454         if ((String) data[1].get("TargetQText") != EMPTY)
455             data[1].put("TargetQText", EMPTY);
456         if ((String) data[1].get("TargetTitle") != EMPTY)
457             data[1].put("TargetTitle", EMPTY);
458         this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toString(this.CurrentBlockHash));
459         targetLangCreate++;
460     }
461 
462     /**
463      * Show the statistic information got while
464      * reading the file
465      *
466      * @throws IOException
467      */
showStat()468     private void showStat() throws IOException {
469         OutputHandler.out(EMPTY);OutputHandler.out(EMPTY);
470  //       OutputHandler.out("Hashes:              " + (theHashes.size()) + " ");
471         OutputHandler.out("Blocks found:           " + blockNr + " ");
472         OutputHandler.out(EMPTY);
473         OutputHandler.out("Lines read:             " + (getLineNumber()) + " ");
474         OutputHandler
475                 .dbg("Lines created           " + (targetLangCreate) + " ");
476         OutputHandler.dbg("                        -------");
477         OutputHandler.dbg("Lines total:            "
478                 + (getLineNumber() + targetLangCreate) + " ");
479         OutputHandler.dbg("Lines skipped:          " + skippedLines + " ");
480 
481         OutputHandler.dbg("Source Language misses: " + sourceLangMiss + " ");
482         OutputHandler.dbg("Target Language misses: " + targetLangMiss + " ");
483         OutputHandler.dbg("Blocks found:           " + blockNr + " ");
484         OutputHandler.dbg("Blocks skipped:         " + skippedBlocks + " ");
485         if ((sourceLangMiss + skippedBlocks + skippedLines) > 0)
486             OutputHandler.out("\n---! Errors found !--- view Logfile.\n\n"
487                     + "To enable logfile use -l option at command line.\n"
488                     + "For help type 'convert -h {Enter}'.\n");
489     }
490 
491     /**
492      * Check the current line  whether the source language
493      * or target language is in it
494      *
495      * @throws ConverterException if a not needed language or no target language is found
496      *              in this block
497      * @throws IOException
498      *
499      */
checkLanguage(String[] splittedLine)500     final private void checkLanguage(String[] splittedLine)
501             throws ConverterException, IOException {
502         String langID = (String) SDFMap.get("LanguageID");
503 
504         //maybe the source language is in this line
505         if (!foundSource && this.sourceLanguage.equals(langID)) {
506 //          found the next source language line
507             this.data[0] = new ExtMap(SDFReader.sourceLineNames, splittedLine);
508 
509 //            this.data[0].put("BlockNr", Integer.toHexString(blockNr));
510 //            this.data[0].put("BlockHash", Integer.toHexString(this.CurrentBlockHash));
511             this.data[0].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash));
512 //            this.data[0].put("BlockHash", blockHashFormatter.format(this.CurrentBlockHash));
513             foundSource = true;
514             return;
515 
516         } else {
517             // or the target language is in this line
518             if (!foundTarget) {
519                 //no target language is given at command line
520                 if (this.targetLanguage.equals(EMPTY)) {
521                     //try if we can use the current lines language for target
522                     // language
523                     if (!langID.equals(this.sourceLanguage)) {
524                         //yes , we can use this lines laanguage as target
525                         this.targetLanguage = langID;
526 
527                         //source and target language both are known: show it
528                         OutputHandler.out("Source Language is: "
529                                 + this.sourceLanguage + " ");
530                         OutputHandler.out("Target Language is: "
531                                 + this.targetLanguage + " ");
532                         OutputHandler.out(EMPTY);
533                         System.out.println("Start");
534 
535                     } else {
536                         throw new ConverterException("(" + getLineNumber()
537                                 + ") No target language found: "
538                                 + this.targetLanguage);
539                     }
540                 }
541                 if (this.targetLanguage.equals(langID)) {
542                     this.data[1] = new ExtMap(SDFReader.targetLineNames,
543                             splittedLine);// found the next target language line
544                     this.data[1].put("BlockNr", blockNrFormatter.format(blockNr)+'-'+Integer.toHexString(this.CurrentBlockHash));
545                     foundTarget = true;
546 
547                     return;
548                 }
549             }//end !foundTarget
550         }
551         //if we go here we dont need the found language...
552         throw new ConverterException("found not needed language '"
553                 + this.SDFMap.get("LanguageID") + "' in Line: "
554                 + getLineNumber());
555 
556     }
557 
558     /**
559      *  Make a dot on the screen to show the user that it is going on
560      */
makeDot()561     private void makeDot() {
562         int count = 0;
563         if ((count = (int) super.getLineNumber() / 1000) > this.dotCount) {
564             this.dotCount = count;
565             OutputHandler.printDot();
566         }
567 
568     }
569 
570     /**
571      * split the SDFLine in its columns
572      *
573      * @param   line the current SDFLine
574      * @return  the splitted SDFLine as array of String
575      *          or null if an error occours
576      * @throws  IOException
577      */
split(String line)578     private String[] split(String line) throws IOException {
579         check(line);
580         String[] splitLine;
581         if ((splitLine = line.split("\t")).length == 15)
582             return splitLine;
583         else
584             //an error occured
585             return null;
586     }
587 
588     /**
589      * create a block Id from a splitted SDFLine
590      * the blockId consists of the column one to eight of an SDF File
591      *
592      * @param splitLine    the line to create a block id from
593      * @return the blockId as String
594      */
getSDFBlockId(String[] splitLine)595     private String getSDFBlockId(String[] splitLine) {
596         StringBuffer BlockId = new StringBuffer("");
597         for (int i = 0; i < 8; i++) {
598             BlockId.append(splitLine[i]);
599         }
600         return BlockId.toString();
601     }
602 
603 //    public final boolean canRead() {
604 //        return this.sourceFile.canRead();
605 //    }
606 
607     /**
608      * Check if there are not allowed characters in this line
609      *
610      * @param line      the SDFLine to check
611      * @return          if everything, ok the original
612      *                   else the wrong character as String
613      *
614      * @throws java.io.IOException
615      */
check(String line)616     private String check(String line) throws java.io.IOException {
617         char c = ' ';
618         for (int i = 0; i < line.length(); i++) {
619             c = line.charAt(i);
620             if (c < 30 && c != 9) {
621                 return (new Character(c)).toString();
622             }
623 
624         }
625         return line;
626 
627     }
628 }