1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 package com.sun.star.help;
25 
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.util.Arrays;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.zip.ZipEntry;
32 import java.util.zip.ZipOutputStream;
33 import java.util.zip.CRC32;
34 import org.apache.lucene.analysis.standard.StandardAnalyzer;
35 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
36 import org.apache.lucene.analysis.Analyzer;
37 import org.apache.lucene.index.IndexWriter;
38 
39 import java.io.File;
40 import java.io.FileNotFoundException;
41 import java.io.IOException;
42 import java.util.Date;
43 
44 
45 /**
46    When this tool is used with long path names on Windows, that is paths which start
47    with \\?\, then the caller must make sure that the path is unique. This is achieved
48    by removing '.' and '..' from the path. Paths which are created by
49    osl_getSystemPathFromFileURL fulfill this requirement. This is necessary because
50    lucene is patched to not use File.getCanonicalPath. See long_path.patch in the lucene
51    module.
52  */
53 public class HelpIndexerTool
54 {
55     public HelpIndexerTool()
56 	{
57     }
58 
59 
60     /**
61      * @param args the command line arguments
62      */
63     public static void main( String[] args )
64 	{
65 		boolean bExtensionMode = false;
66 		mainImpl( args, bExtensionMode );
67 	}
68 
69     public static void mainImpl( String[] args, boolean bExtensionMode )
70 	{
71         String aDirToZipStr = "";
72         String aSrcDirStr = "";
73         String aLanguageStr = "";
74         String aModule = "";
75         String aTargetZipFileStr = "";
76         String aCfsName = "";
77         String aSegmentName = "";
78 
79         // Scan arguments
80         //If this tool is invoked in the build process for extensions help,
81         //then -extension must be set.
82         boolean bExtension = false;
83         boolean bLang = false;
84         boolean bMod = false;
85         boolean bZipDir = false;
86         boolean bSrcDir = false;
87         boolean bOutput = false;
88         boolean bCfsName = false;
89         boolean bSegmentName = false;
90 
91         int nArgCount = args.length;
92         for( int i = 0 ; i < nArgCount ; i++ )
93 		{
94             if( "-extension".equals(args[i]) )
95 			{
96                 bExtension = true;
97             }
98             else if( "-lang".equals(args[i]) )
99 			{
100                 if( i + 1 < nArgCount )
101 				{
102                     aLanguageStr = args[i + 1];
103                     bLang = true;
104                 }
105                 i++;
106             }
107 			else if( "-mod".equals(args[i]) )
108 			{
109                 if( i + 1 < nArgCount )
110 				{
111                     aModule = args[i + 1];
112                     bMod = true;
113                 }
114                 i++;
115             }
116 			else if( "-zipdir".equals(args[i]) )
117 			{
118                 if( i + 1 < nArgCount )
119 				{
120                     aDirToZipStr = args[i + 1];
121                     bZipDir = true;
122                 }
123                 i++;
124             }
125 			else if( "-srcdir".equals(args[i]) )
126 			{
127                 if( i + 1 < nArgCount )
128 				{
129                     aSrcDirStr = args[i + 1];
130                     bSrcDir = true;
131                 }
132                 i++;
133             }
134 			else if( "-o".equals(args[i]) )
135 			{
136                 if( i + 1 < nArgCount )
137 				{
138                     aTargetZipFileStr = args[i + 1];
139                     bOutput = true;
140                 }
141                 i++;
142             }
143 			else if( "-checkcfsandsegname".equals(args[i]) )
144 			{
145                 if( i + 1 < nArgCount )
146 				{
147                     aCfsName = args[i + 1] + ".cfs";
148                     bCfsName = true;
149                 }
150                 i++;
151                 if( i + 1 < nArgCount )
152 				{
153                     aSegmentName = "segments" + args[i + 1];
154                     bSegmentName = true;
155                 }
156                 i++;
157                 if (!(bCfsName && bSegmentName))
158                 {
159                     System.out.println("Usage: HelpIndexer -checkcfsandsegname _0 _3 (2 arguments needed)");
160                     System.exit( -1 );
161                 }
162             }
163         }
164 
165         if( !bLang || !bMod || !bZipDir || (!bOutput && !bExtensionMode && !bExtension) )
166 		{
167 			if( bExtensionMode )
168 				return;
169 
170 			System.out.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
171             System.out.println("Usage: HelpIndexer -extension -lang ISOLangCode -mod HelpModule -zipdir PathToLangDir");
172 			System.exit( -1 );
173         }
174 
175         String aIndexDirName = aModule + ".idxl";
176         File aIndexDir = new File( aDirToZipStr + File.separator + aIndexDirName );
177 		if( !bSrcDir )
178 			aSrcDirStr = aDirToZipStr;
179         File aCaptionFilesDir = new File( aSrcDirStr + File.separator + "caption" );
180         File aContentFilesDir = new File( aSrcDirStr + File.separator + "content" );
181 
182         try
183 		{
184             Date start = new Date();
185             Analyzer analyzer = aLanguageStr.equals("ja") ? (Analyzer)new CJKAnalyzer() : (Analyzer)new StandardAnalyzer();
186             IndexWriter writer = new IndexWriter( aIndexDir, analyzer, true );
187 			if( !bExtensionMode )
188 	            System.out.println( "Lucene: Indexing to directory '" + aIndexDir + "'..." );
189             int nRet = indexDocs( writer, aModule, bExtensionMode, aCaptionFilesDir, aContentFilesDir );
190             if( nRet != -1 )
191 			{
192 				if( !bExtensionMode )
193 				{
194 					System.out.println();
195 					System.out.println( "Optimizing ..." );
196 				}
197                 writer.optimize();
198             }
199             writer.close();
200 
201 			boolean bCfsFileOk = true;
202 			boolean bSegmentFileOk = true;
203 			if( bCfsName && bSegmentName && !bExtensionMode && nRet != -1 )
204 			{
205 				String aCompleteCfsFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aCfsName;
206 				String aCompleteSegmentFileName = aDirToZipStr + File.separator + aIndexDirName + File.separator + aSegmentName;
207 				File aCfsFile = new File( aCompleteCfsFileName );
208 				File aSegmentFile = new File( aCompleteSegmentFileName );
209 				bCfsFileOk = aCfsFile.exists();
210 				bSegmentFileOk = aSegmentFile.exists();
211 				System.out.println( "Checking cfs file " + aCfsName+ ": " + (bCfsFileOk ? "Found" : "Not found") );
212 				System.out.println( "Checking segment file " + aSegmentName+ ": " + (bSegmentFileOk ? "Found" : "Not found") );
213 			}
214 
215 			if( bExtensionMode || bExtension)
216 			{
217 				if( !bSrcDir )
218 				{
219 					deleteRecursively( aCaptionFilesDir );
220 					deleteRecursively( aContentFilesDir );
221 				}
222 			}
223 			else
224 			{
225 				if( nRet == -1 )
226 					deleteRecursively( aIndexDir );
227 
228 				if( bCfsFileOk && bSegmentFileOk )
229 					System.out.println( "Zipping ..." );
230 				File aDirToZipFile = new File( aDirToZipStr );
231 				createZipFile( aDirToZipFile, aTargetZipFileStr );
232 				deleteRecursively( aDirToZipFile );
233 			}
234 
235 			if( !bCfsFileOk )
236 			{
237 				System.out.println( "cfs file check failed, terminating..." );
238 				System.exit( -1 );
239 			}
240 
241 			if( !bSegmentFileOk )
242 			{
243 				System.out.println( "segment file check failed, terminating..." );
244 				System.exit( -1 );
245 			}
246 
247 			Date end = new Date();
248 			if( !bExtensionMode )
249 				System.out.println(end.getTime() - start.getTime() + " total milliseconds");
250         }
251 		catch (IOException e)
252 		{
253 			if( bExtensionMode )
254 				return;
255 
256 			System.out.println(" caught a " + e.getClass() +
257 				"\n with message: " + e.getMessage());
258 			System.exit( -1 );
259         }
260     }
261 
262 	private static int indexDocs(IndexWriter writer, String aModule, boolean bExtensionMode,
263 		File aCaptionFilesDir, File aContentFilesDir) throws IOException
264 	{
265         if( !aCaptionFilesDir.canRead() || !aCaptionFilesDir.isDirectory() )
266 		{
267 			if( !bExtensionMode )
268 	            System.out.println( "Not found: " + aCaptionFilesDir );
269             return -1;
270         }
271         if( !aContentFilesDir.canRead() || !aContentFilesDir.isDirectory() )
272 		{
273 			if( !bExtensionMode )
274 	            System.out.println( "Not found: " + aContentFilesDir );
275             return -1;
276         }
277 
278         String[] aCaptionFiles = aCaptionFilesDir.list();
279         List aCaptionFilesList = Arrays.asList( aCaptionFiles );
280         HashSet aCaptionFilesHashSet = new HashSet( aCaptionFilesList );
281 
282         String[] aContentFiles = aContentFilesDir.list();
283         List aContentFilesList = Arrays.asList( aContentFiles );
284         HashSet aContentFilesHashSet = new HashSet( aContentFilesList );
285 
286         // Loop over caption files and find corresponding content file
287 		if( !bExtensionMode )
288 	        System.out.println( "Indexing, adding files" );
289         int nCaptionFilesLen = aCaptionFiles.length;
290         for( int i = 0 ; i < nCaptionFilesLen ; i++ )
291 		{
292             String aCaptionFileStr = aCaptionFiles[i];
293             File aCaptionFile = new File( aCaptionFilesDir, aCaptionFileStr );
294             File aContentFile = null;
295             if( aContentFilesHashSet.contains( aCaptionFileStr ) )
296                 aContentFile = new File( aContentFilesDir, aCaptionFileStr );
297 
298 			if( !bExtensionMode )
299 				System.out.print( "." );
300             writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
301         }
302 
303         // Loop over content files to find remaining files not mapped to caption files
304         int nContentFilesLen = aContentFiles.length;
305         for( int i = 0 ; i < nContentFilesLen ; i++ )
306 		{
307             String aContentFileStr = aContentFiles[i];
308             if( !aCaptionFilesHashSet.contains( aContentFileStr ) )
309 			{
310                 // Not already handled in caption files loop
311                 File aCaptionFile = null;
312                 File aContentFile = new File( aContentFilesDir, aContentFileStr );
313 				if( !bExtensionMode )
314 					System.out.print( "." );
315                 writer.addDocument( HelpFileDocument.Document( aModule, aCaptionFile, aContentFile ) );
316             }
317         }
318         return 0;
319     }
320 
321     public static void createZipFile( File aDirToZip, String aTargetZipFileStr )
322             throws FileNotFoundException, IOException
323 	{
324         FileOutputStream fos = new FileOutputStream( aTargetZipFileStr );
325         ZipOutputStream zos = new ZipOutputStream( fos );
326 
327         File[] aChildrenFiles = aDirToZip.listFiles();
328         int nFileCount = aChildrenFiles.length;
329         for( int i = 0 ; i < nFileCount ; i++ )
330             addToZipRecursively( zos, aChildrenFiles[i], null );
331 
332         zos.close();
333     }
334 
335     public static void addToZipRecursively( ZipOutputStream zos, File aFile, String aBasePath )
336             throws FileNotFoundException, IOException
337 	{
338         if( aFile.isDirectory() )
339 		{
340             String aDirName = aFile.getName();
341             if( aDirName.equalsIgnoreCase( "caption" ) || aDirName.equalsIgnoreCase( "content" ) )
342                 return;
343 
344             File[] aChildrenFiles = aFile.listFiles();
345             String aNewBasePath = "";
346             if( aBasePath != null )
347                 aNewBasePath += aBasePath + File.separator;
348             aNewBasePath += aDirName;
349 
350             int nFileCount = aChildrenFiles.length;
351             for( int i = 0 ; i < nFileCount ; i++ )
352                 addToZipRecursively( zos, aChildrenFiles[i], aNewBasePath );
353 
354             return;
355         }
356 
357         // No directory
358         // read contents of file we are going to put in the zip
359         int fileLength = (int) aFile.length();
360         FileInputStream fis = new FileInputStream( aFile );
361         byte[] wholeFile = new byte[fileLength];
362         int bytesRead = fis.read( wholeFile, 0, fileLength );
363         fis.close();
364 
365         String aFileName = aFile.getName();
366         String aEntryName = "";
367         if( aBasePath != null )
368             aEntryName += aBasePath + "/";
369         aEntryName += aFileName;
370         ZipEntry aZipEntry = new ZipEntry( aEntryName );
371         aZipEntry.setTime( aFile.lastModified() );
372         aZipEntry.setSize( fileLength );
373 
374         int nMethod = ( aFileName.toLowerCase().endsWith( ".jar" ) )
375                 ? ZipEntry.STORED : ZipEntry.DEFLATED;
376         aZipEntry.setMethod( nMethod );
377 
378         CRC32 tempCRC = new CRC32();
379         tempCRC.update( wholeFile, 0, wholeFile.length );
380         aZipEntry.setCrc( tempCRC.getValue() );
381 
382         // write the contents into the zip element
383         zos.putNextEntry( aZipEntry );
384         zos.write( wholeFile, 0, fileLength );
385         zos.closeEntry();
386     }
387 
388     static public boolean deleteRecursively( File aFile )
389 	{
390         if( aFile.isDirectory() )
391 		{
392             File[] aChildrenFiles = aFile.listFiles();
393             int nFileCount = aChildrenFiles.length;
394             for( int i = 0 ; i < nFileCount ; i++ )
395 			{
396                 File aChildrenFile = aChildrenFiles[i];
397                 boolean bSuccess = deleteRecursively( aChildrenFile );
398                 if( !bSuccess )
399                     return false;
400             }
401         }
402 
403         return aFile.delete();
404     }
405 }
406 
407