1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package com.sun.star.help;
25 
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.util.Arrays;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.zip.ZipEntry;
32 import java.util.zip.ZipOutputStream;
33 import java.util.zip.CRC32;
34 import org.apache.lucene.analysis.standard.StandardAnalyzer;
35 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.index.IndexWriter;
38 import org.apache.lucene.util.Version;
39 import org.apache.lucene.store.NIOFSDirectory;
40 
41 import java.io.File;
42 import java.io.FileNotFoundException;
43 import java.io.IOException;
44 import java.util.Date;
45 
46 /**
47    When this tool is used with long path names on Windows, that is paths which start
48    with \\?\, then the caller must make sure that the path is unique. This is achieved
49    by removing '.' and '..' from the path. Paths which are created by
50    osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
51    lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
52    module.
53  */
54 
55 public class HelpIndexerTool
56 {
HelpIndexerTool()57     public HelpIndexerTool()
58 	{
59     }
60 
61 
62     /**
63      * @param args the command line arguments
64      */
main( String[] args )65     public static void main( String[] args )
66 	{
67 		boolean bExtensionMode = false;
68 		mainImpl( args, bExtensionMode );
69 	}
70 
mainImpl( String[] args, boolean bExtensionMode )71     public static void mainImpl( String[] args, boolean bExtensionMode )
72 	{
73         String aDirToZipStr = "";
74         String aSrcDirStr = "";
75         String aLanguageStr = "";
76         String aModule = "";
77         String aTargetZipFileStr = "";
78         String aCfsName = "";
79         String aSegmentName = "";
80 
81         // Scan arguments
82         //If this tool is invoked in the build process for extensions help,
83         //then -extension must be set.
84         boolean bExtension = false;
85         boolean bLang = false;
86         boolean bMod = false;
87         boolean bZipDir = false;
88         boolean bSrcDir = false;
89         boolean bOutput = false;
90         boolean bCfsName = false;
91         boolean bSegmentName = false;
92 
93         int nArgCount = args.length;
94         for( int i = 0 ; i < nArgCount ; i++ )
95 		{
96             if( "-extension".equals(args[i]) )
97 			{
98                 bExtension = true;
99             }
100             else if( "-lang".equals(args[i]) )
101 			{
102                 if( i + 1 < nArgCount )
103 				{
104                     aLanguageStr = args[i + 1];
105                     bLang = true;
106                 }
107                 i++;
108             }
109 			else if( "-mod".equals(args[i]) )
110 			{
111                 if( i + 1 < nArgCount )
112 				{
113                     aModule = args[i + 1];
114                     bMod = true;
115                 }
116                 i++;
117             }
118 			else if( "-zipdir".equals(args[i]) )
119 			{
120                 if( i + 1 < nArgCount )
121 				{
122                     aDirToZipStr = args[i + 1];
123                     bZipDir = true;
124                 }
125                 i++;
126             }
127 			else if( "-srcdir".equals(args[i]) )
128 			{
129                 if( i + 1 < nArgCount )
130 				{
131                     aSrcDirStr = args[i + 1];
132                     bSrcDir = true;
133                 }
134                 i++;
135             }
136 			else if( "-o".equals(args[i]) )
137 			{
138                 if( i + 1 < nArgCount )
139 				{
140                     aTargetZipFileStr = args[i + 1];
141                     bOutput = true;
142                 }
143                 i++;
144             }
145 			else if( "-checkcfsandsegname".equals(args[i]) )
146 			{
147                 if( i + 1 < nArgCount )
148 				{
149                     aCfsName = args[i + 1] + ".cfs";
150                     bCfsName = true;
151                 }
152                 i++;
153                 if( i + 1 < nArgCount )
154 				{
155                     aSegmentName = "segments" + args[i + 1];
156                     bSegmentName = true;
157                 }
158                 i++;
159                 if (!(bCfsName && bSegmentName))
160                 {
161                     System.out.println("Usage: HelpIndexer -checkcfsandsegname _0 _3 (2 arguments needed)");
162                     System.exit( -1 );
163                 }
164             }
165         }
166 
167         if( !bLang || !bMod || !bZipDir || (!bOutput && !bExtensionMode && !bExtension) )
168 		{
169 			if( bExtensionMode )
170 				return;
171 
172 			System.out.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
173             System.out.println("Usage: HelpIndexer -extension -lang ISOLangCode -mod HelpModule -zipdir PathToLangDir");
174 			System.exit( -1 );
175         }
176 
177         String aIndexDirName = aModule + ".idxl";
178         File aIndexDir = new File( aDirToZipStr + File.separator + aIndexDirName );
179 		if( !bSrcDir )
180 			aSrcDirStr = aDirToZipStr;
181         File aCaptionFilesDir = new File( aSrcDirStr + File.separator + "caption" );
182         File aContentFilesDir = new File( aSrcDirStr + File.separator + "content" );
183 
184         try
185 		{
186             Date start = new Date();
187 	    Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer(Version.LUCENE_29) : (Analyzer)new StandardAnalyzer(Version.LUCENE_29);
188 	    IndexWriter writer = new IndexWriter( NIOFSDirectory.open(aIndexDir), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED );
189 			if( !bExtensionMode )
190 	            System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
191             int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
192             if( nRet != -1 )
193 			{
194 				if( !bExtensionMode )
195 				{
196 					System.out.println();
197 					System.out.println( "Optimizing ..." );
198 				}
199                 writer.optimize();
200             }
201             writer.close();
202 
203 			boolean bCfsFileOk = true;
204 			boolean bSegmentFileOk = true;
205 			if( bCfsName && bSegmentName && !bExtensionMode && nRet != -1 )
206 			{
207 				String aCompleteCfsFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aCfsName;
208 				String aCompleteSegmentFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aSegmentName;
209 				File aCfsFile = new File( aCompleteCfsFileName );
210 				File aSegmentFile = new File( aCompleteSegmentFileName );
211 				bCfsFileOk = aCfsFile.exists();
212 				bSegmentFileOk = aSegmentFile.exists();
213 				System.out.println( "Checking cfs file " + aCfsName+ ": " + (bCfsFileOk ? "Found" : "Not found") );
214 				System.out.println( "Checking segment file " + aSegmentName+ ": " + (bSegmentFileOk ? "Found" : "Not found") );
215 			}
216 
217 			if( bExtensionMode || bExtension)
218 			{
219 				if( !bSrcDir )
220 				{
221 					deleteRecursively( aCaptionFilesDir );
222 					deleteRecursively( aContentFilesDir );
223 				}
224 			}
225 			else
226 			{
227 				if( nRet == -1 )
228 					deleteRecursively( aIndexDir );
229 
230 				if( bCfsFileOk && bSegmentFileOk )
231 					System.out.println( "Zipping ..." );
232 				File aDirToZipFile = new File( aDirToZipStr );
233 				createZipFile( aDirToZipFile, aTargetZipFileStr );
234 				deleteRecursively( aDirToZipFile );
235 			}
236 
237 			if( !bCfsFileOk )
238 			{
239 				System.out.println( "cfs file check failed, terminating..." );
240 				System.exit( -1 );
241 			}
242 
243 			if( !bSegmentFileOk )
244 			{
245 				System.out.println( "segment file check failed, terminating..." );
246 				System.exit( -1 );
247 			}
248 
249 			Date end = new Date();
250 			if( !bExtensionMode )
251 				System.out.println(end.getTime() - start.getTime() + " total milliseconds");
252         }
253 		catch (IOException e)
254 		{
255 			if( bExtensionMode )
256 				return;
257 
258 			System.out.println(" caught a " + e.getClass() +
259 				"\n with message: " + e.getMessage());
260 			System.exit( -1 );
261         }
262     }
263 
indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode, File aCaptionFilesDir, File aContentFilesDir)264 	private static int indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode,
265 		File aCaptionFilesDir, File aContentFilesDir) throws IOException
266 	{
267         if( !aCaptionFilesDir.canRead() || !aCaptionFilesDir.isDirectory() )
268 		{
269 			if( !bExtensionMode )
270 	            System.out.println( "Not found: " + aCaptionFilesDir );
271             return -1;
272         }
273         if( !aContentFilesDir.canRead() || !aContentFilesDir.isDirectory() )
274 		{
275 			if( !bExtensionMode )
276 	            System.out.println( "Not found: " + aContentFilesDir );
277             return -1;
278         }
279 
280         String[] aCaptionFiles = aCaptionFilesDir.list();
281         List aCaptionFilesList = Arrays.asList( aCaptionFiles );
282         HashSet aCaptionFilesHashSet = new HashSet( aCaptionFilesList );
283 
284         String[] aContentFiles = aContentFilesDir.list();
285         List aContentFilesList = Arrays.asList( aContentFiles );
286         HashSet aContentFilesHashSet = new HashSet( aContentFilesList );
287 
288         // Loop over caption files and find corresponding content file
289 		if( !bExtensionMode )
290 	        System.out.println( "Indexing, adding files" );
291         int nCaptionFilesLen = aCaptionFiles.length;
292         for( int i = 0 ; i < nCaptionFilesLen ; i++ )
293 		{
294             String aCaptionFileStr = aCaptionFiles[i];
295             File aCaptionFile = new File( aCaptionFilesDir, aCaptionFileStr );
296             File aContentFile = null;
297             if( aContentFilesHashSet.contains( aCaptionFileStr ) )
298                 aContentFile = new File( aContentFilesDir, aCaptionFileStr );
299 
300 			if( !bExtensionMode )
301 				System.out.print( "." );
302             writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
303         }
304 
305         // Loop over content files to find remaining files not mapped to caption files
306         int nContentFilesLen = aContentFiles.length;
307         for( int i = 0 ; i < nContentFilesLen ; i++ )
308 		{
309             String aContentFileStr = aContentFiles[i];
310             if( !aCaptionFilesHashSet.contains( aContentFileStr ) )
311 			{
312                 // Not already handled in caption files loop
313                 File aCaptionFile = null;
314                 File aContentFile = new File( aContentFilesDir, aContentFileStr );
315 				if( !bExtensionMode )
316 					System.out.print( "." );
317                 writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
318             }
319         }
320         return 0;
321     }
322 
createZipFile( File aDirToZip, String aTargetZipFileStr )323     public static void createZipFile( File aDirToZip, String aTargetZipFileStr )
324             throws FileNotFoundException, IOException
325 	{
326         FileOutputStream fos = new FileOutputStream( aTargetZipFileStr );
327         ZipOutputStream zos = new ZipOutputStream( fos );
328 
329         File[] aChildrenFiles = aDirToZip.listFiles();
330         int nFileCount = aChildrenFiles.length;
331         for( int i = 0 ; i < nFileCount ; i++ )
332             addToZipRecursively( zos, aChildrenFiles[i], null );
333 
334         zos.close();
335     }
336 
addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )337     public static void addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )
338             throws FileNotFoundException, IOException
339 	{
340         if( aFile.isDirectory() )
341 		{
342             String aDirName = aFile.getName();
343             if( aDirName.equalsIgnoreCase( "caption" ) || aDirName.equalsIgnoreCase( "content" ) )
344                 return;
345 
346             File[] aChildrenFiles = aFile.listFiles();
347             String aNewBasePath = "";
348             if( aBasePath != null )
349                 aNewBasePath += aBasePath + File.separator;
350             aNewBasePath += aDirName;
351 
352             int nFileCount = aChildrenFiles.length;
353             for( int i = 0 ; i < nFileCount ; i++ )
354                 addToZipRecursively( zos, aChildrenFiles[i], aNewBasePath );
355 
356             return;
357         }
358 
359         // No directory
360         // read contents of file we are going to put in the zip
361         int fileLength = (int) aFile.length();
362         FileInputStream fis = new FileInputStream( aFile );
363         byte[] wholeFile = new byte[fileLength];
364         int bytesRead = fis.read( wholeFile, 0, fileLength );
365         fis.close();
366 
367         String aFileName = aFile.getName();
368         String aEntryName = "";
369         if( aBasePath != null )
370             aEntryName += aBasePath + "/";
371         aEntryName += aFileName;
372         ZipEntry aZipEntry = new ZipEntry( aEntryName );
373         aZipEntry.setTime( aFile.lastModified() );
374         aZipEntry.setSize( fileLength );
375 
376         int nMethod = ( aFileName.toLowerCase().endsWith( ".jar" ) )
377                 ? ZipEntry.STORED : ZipEntry.DEFLATED;
378         aZipEntry.setMethod( nMethod );
379 
380         CRC32 tempCRC = new CRC32();
381         tempCRC.update( wholeFile, 0, wholeFile.length );
382         aZipEntry.setCrc( tempCRC.getValue() );
383 
384         // write the contents into the zip element
385         zos.putNextEntry( aZipEntry );
386         zos.write( wholeFile, 0, fileLength );
387         zos.closeEntry();
388     }
389 
deleteRecursively( File aFile )390     static public boolean deleteRecursively( File aFile )
391 	{
392         if( aFile.isDirectory() )
393 		{
394             File[] aChildrenFiles = aFile.listFiles();
395             int nFileCount = aChildrenFiles.length;
396             for( int i = 0 ; i < nFileCount ; i++ )
397 			{
398                 File aChildrenFile = aChildrenFiles[i];
399                 boolean bSuccess = deleteRecursively( aChildrenFile );
400                 if( !bSuccess )
401                     return false;
402             }
403         }
404 
405         return aFile.delete();
406     }
407 }
408 
409