xref: /trunk/main/unotools/source/i18n/textsearch.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_unotools.hxx"
30 #include <i18npool/mslangid.hxx>
31 #include <tools/debug.hxx>
32 #ifndef _INTN_HXX //autogen
33 //#include <tools/intn.hxx>
34 #endif
35 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
36 #ifndef _COM_SUN_STAR_UTIL_SEARCHFLAGS_HDL_
37 #include <com/sun/star/util/SearchFlags.hdl>
38 #endif
39 #include <com/sun/star/i18n/TransliterationModules.hpp>
40 #include <unotools/charclass.hxx>
41 #include <comphelper/processfactory.hxx>
42 #include <unotools/textsearch.hxx>
43 #include <rtl/instance.hxx>
44 
45 using namespace ::com::sun::star::util;
46 using namespace ::com::sun::star::uno;
47 using namespace ::com::sun::star::lang;
48 
49 // ............................................................................
50 namespace utl
51 {
52 // ............................................................................
53 
54 SearchParam::SearchParam( const String &rText,
55 								SearchType eType,
56 								sal_Bool bCaseSensitive,
57 								sal_Bool bWrdOnly,
58 								sal_Bool bSearchInSel )
59 {
60 	sSrchStr        = rText;
61 	eSrchType       = eType;
62 
63 	bWordOnly       = bWrdOnly;
64 	bSrchInSel      = bSearchInSel;
65 	bCaseSense      = bCaseSensitive;
66 
67 	nTransliterationFlags = 0;
68 
69 	// Werte fuer "Gewichtete Levenshtein-Distanz"
70 	bLEV_Relaxed    = sal_True;
71 	nLEV_OtherX     = 2;
72 	nLEV_ShorterY   = 1;
73 	nLEV_LongerZ    = 3;
74 }
75 
76 SearchParam::SearchParam( const SearchParam& rParam )
77 {
78 	sSrchStr        = rParam.sSrchStr;
79 	sReplaceStr     = rParam.sReplaceStr;
80 	eSrchType       = rParam.eSrchType;
81 
82 	bWordOnly       = rParam.bWordOnly;
83 	bSrchInSel      = rParam.bSrchInSel;
84 	bCaseSense      = rParam.bCaseSense;
85 
86 	bLEV_Relaxed    = rParam.bLEV_Relaxed;
87 	nLEV_OtherX     = rParam.nLEV_OtherX;
88 	nLEV_ShorterY   = rParam.nLEV_ShorterY;
89 	nLEV_LongerZ    = rParam.nLEV_LongerZ;
90 
91 	nTransliterationFlags = rParam.nTransliterationFlags;
92 }
93 
94 static bool lcl_Equals( const SearchOptions& rSO1, const SearchOptions& rSO2 )
95 {
96     return rSO1.algorithmType == rSO2.algorithmType &&
97         rSO1.searchFlag == rSO2.searchFlag &&
98         rSO1.searchString.equals(rSO2.searchString) &&
99         rSO1.replaceString.equals(rSO2.replaceString) &&
100         rSO1.changedChars == rSO2.changedChars &&
101         rSO1.deletedChars == rSO2.deletedChars &&
102         rSO1.insertedChars == rSO2.insertedChars &&
103         rSO1.Locale.Language == rSO2.Locale.Language &&
104         rSO1.Locale.Country == rSO2.Locale.Country &&
105         rSO1.Locale.Variant == rSO2.Locale.Variant &&
106         rSO1.transliterateFlags == rSO2.transliterateFlags;
107 }
108 
109 namespace
110 {
111     struct CachedTextSearch
112     {
113         ::osl::Mutex mutex;
114         ::com::sun::star::util::SearchOptions Options;
115         ::com::sun::star::uno::Reference< ::com::sun::star::util::XTextSearch > xTextSearch;
116     };
117 
118     struct theCachedTextSearch
119         : public rtl::Static< CachedTextSearch, theCachedTextSearch > {};
120 }
121 
122 Reference<XTextSearch> TextSearch::getXTextSearch( const SearchOptions& rPara )
123 {
124     CachedTextSearch &rCache = theCachedTextSearch::get();
125 
126     osl::MutexGuard aGuard(rCache.mutex);
127 
128     if ( lcl_Equals(rCache.Options, rPara) )
129         return rCache.xTextSearch;
130 
131     try
132     {
133         Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
134         rCache.xTextSearch.set( xMSF->createInstance(
135             ::rtl::OUString( RTL_CONSTASCII_USTRINGPARAM(
136                         "com.sun.star.util.TextSearch" ) ) ), UNO_QUERY_THROW );
137         rCache.xTextSearch->setOptions( rPara );
138         rCache.Options = rPara;
139     }
140     catch ( Exception& )
141     {
142         DBG_ERRORFILE( "TextSearch ctor: Exception caught!" );
143     }
144     return rCache.xTextSearch;
145 }
146 
147 TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
148 {
149 	if( LANGUAGE_NONE == eLang )
150 		eLang = LANGUAGE_SYSTEM;
151     ::com::sun::star::lang::Locale aLocale(
152             MsLangId::convertLanguageToLocale( LanguageType(eLang)));
153 
154 	Init( rParam, aLocale);
155 }
156 
157 TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
158 {
159 	Init( rParam, rCClass.getLocale() );
160 }
161 
162 TextSearch::TextSearch( const SearchOptions& rPara )
163 {
164     xTextSearch = getXTextSearch( rPara );
165 }
166 
167 void TextSearch::Init( const SearchParam & rParam,
168 						const ::com::sun::star::lang::Locale& rLocale )
169 {
170 	// convert SearchParam to the UNO SearchOptions
171 	SearchOptions aSOpt;
172 
173 	switch( rParam.GetSrchType() )
174 	{
175 	case SearchParam::SRCH_REGEXP:
176 		aSOpt.algorithmType = SearchAlgorithms_REGEXP;
177 		if( rParam.IsSrchInSelection() )
178 			aSOpt.searchFlag |= SearchFlags::REG_NOT_BEGINOFLINE |
179 								SearchFlags::REG_NOT_ENDOFLINE;
180 		break;
181 
182 	case SearchParam::SRCH_LEVDIST:
183 		aSOpt.algorithmType = SearchAlgorithms_APPROXIMATE;
184 		aSOpt.changedChars = rParam.GetLEVOther();
185 		aSOpt.deletedChars = rParam.GetLEVLonger();
186 		aSOpt.insertedChars = rParam.GetLEVShorter();
187 		if( rParam.IsSrchRelaxed() )
188 			aSOpt.searchFlag |= SearchFlags::LEV_RELAXED;
189 		break;
190 
191 //	case SearchParam::SRCH_NORMAL:
192 	default:
193 		aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
194 		if( rParam.IsSrchWordOnly() )
195 			aSOpt.searchFlag |= SearchFlags::NORM_WORD_ONLY;
196 		break;
197 	}
198 	aSOpt.searchString = rParam.GetSrchStr();
199 	aSOpt.replaceString = rParam.GetReplaceStr();
200 	aSOpt.Locale = rLocale;
201 	aSOpt.transliterateFlags = rParam.GetTransliterationFlags();
202 	if( !rParam.IsCaseSensitive() )
203     {
204 		aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
205         aSOpt.transliterateFlags |= ::com::sun::star::i18n::TransliterationModules_IGNORE_CASE;
206     }
207 
208     xTextSearch = getXTextSearch( aSOpt );
209 }
210 
211 void TextSearch::SetLocale( const ::com::sun::star::util::SearchOptions& rOptions,
212                             const ::com::sun::star::lang::Locale& rLocale )
213 {
214 	// convert SearchParam to the UNO SearchOptions
215     SearchOptions aSOpt( rOptions );
216     aSOpt.Locale = rLocale;
217 
218     xTextSearch = getXTextSearch( aSOpt );
219 }
220 
221 
222 TextSearch::~TextSearch()
223 {
224 }
225 
226 /*
227  * Die allgemeinen Methoden zu Suchen. Diese rufen dann die entpsrecheden
228  * Methoden fuer die normale Suche oder der Suche nach Regular-Expressions
229  * ueber die MethodenPointer auf.
230  */
231 #if defined _MSC_VER
232 #pragma optimize("", off)
233 #pragma warning(push)
234 #pragma warning(disable: 4748)
235 #endif
236 int TextSearch::SearchFrwrd( const String & rStr, xub_StrLen* pStart,
237 							xub_StrLen* pEnde, SearchResult* pRes )
238 {
239 	int nRet = 0;
240 	try
241 	{
242 		if( xTextSearch.is() )
243 		{
244 			SearchResult aRet( xTextSearch->searchForward(
245 													rStr, *pStart, *pEnde ));
246 			if( aRet.subRegExpressions > 0 )
247 			{
248 				nRet = 1;
249 				// the XTextsearch returns in startOffset the higher position
250 				// and the endposition is allways exclusive.
251 				// The caller of this function will have in startPos the
252 				// lower pos. and end
253 				*pStart = (xub_StrLen)aRet.startOffset[ 0 ];
254 				*pEnde = (xub_StrLen)aRet.endOffset[ 0 ];
255 				if( pRes )
256 					*pRes = aRet;
257 			}
258 		}
259 	}
260 	catch ( Exception& )
261 	{
262 		DBG_ERRORFILE( "SearchForward: Exception caught!" );
263 	}
264 	return nRet;
265 }
266 
267 int TextSearch::SearchBkwrd( const String & rStr, xub_StrLen* pStart,
268 							xub_StrLen* pEnde, SearchResult* pRes )
269 {
270 	int nRet = 0;
271 	try
272 	{
273 		if( xTextSearch.is() )
274 		{
275 			SearchResult aRet( xTextSearch->searchBackward(
276 													rStr, *pStart, *pEnde ));
277 			if( aRet.subRegExpressions )
278 			{
279 				nRet = 1;
280 				// the XTextsearch returns in startOffset the higher position
281 				// and the endposition is allways exclusive.
282 				// The caller of this function will have in startPos the
283 				// lower pos. and end
284 				*pEnde = (xub_StrLen)aRet.startOffset[ 0 ];
285 				*pStart = (xub_StrLen)aRet.endOffset[ 0 ];
286 				if( pRes )
287 					*pRes = aRet;
288 			}
289 		}
290 	}
291 	catch ( Exception& )
292 	{
293 		DBG_ERRORFILE( "SearchBackward: Exception caught!" );
294 	}
295 	return nRet;
296 }
297 
298 void TextSearch::ReplaceBackReferences( String& rReplaceStr, const String &rStr, const SearchResult& rResult )
299 {
300     if( rResult.subRegExpressions > 0 )
301     {
302         String sTab( '\t' );
303         sal_Unicode sSrchChrs[] = {'\\', '&', '$', 0};
304         String sTmp;
305         xub_StrLen nPos = 0;
306         sal_Unicode sFndChar;
307         while( STRING_NOTFOUND != ( nPos = rReplaceStr.SearchChar( sSrchChrs, nPos )) )
308         {
309             if( rReplaceStr.GetChar( nPos ) == '&')
310             {
311                 sal_uInt16 nStart = (sal_uInt16)(rResult.startOffset[0]);
312                 sal_uInt16 nLength = (sal_uInt16)(rResult.endOffset[0] - rResult.startOffset[0]);
313                 rReplaceStr.Erase( nPos, 1 );	// delete ampersand
314                 // replace by found string
315                 rReplaceStr.Insert( rStr, nStart, nLength, nPos );
316                 // jump over
317                 nPos = nPos + nLength;
318             }
319             else if( rReplaceStr.GetChar( nPos ) == '$')
320             {
321                 if( nPos + 1 < rReplaceStr.Len())
322                 {
323                     sFndChar = rReplaceStr.GetChar( nPos + 1 );
324                     switch(sFndChar)
325                     {   // placeholder for a backward reference?
326                         case '0':
327                         case '1':
328                         case '2':
329                         case '3':
330                         case '4':
331                         case '5':
332                         case '6':
333                         case '7':
334                         case '8':
335                         case '9':
336                         {
337                             rReplaceStr.Erase( nPos, 2 );	// delete both
338                             int i = sFndChar - '0';	// index
339                             if(i < rResult.subRegExpressions)
340                             {
341                                 sal_uInt16 nSttReg = (sal_uInt16)(rResult.startOffset[i]);
342                                 sal_uInt16 nRegLen = (sal_uInt16)(rResult.endOffset[i]);
343                                 if( nRegLen > nSttReg )
344                                     nRegLen = nRegLen - nSttReg;
345                                 else
346                                 {
347                                     nRegLen = nSttReg - nRegLen;
348                                     nSttReg = (sal_uInt16)(rResult.endOffset[i]);
349                                 }
350                                 // Copy reference from found string
351                                 sTmp = rStr.Copy((sal_uInt16)nSttReg, (sal_uInt16)nRegLen);
352                                 // insert
353                                 rReplaceStr.Insert( sTmp, nPos );
354                                 // and step over
355                                 nPos = nPos + sTmp.Len();
356                             }
357                         }
358                         break;
359                         default:
360                             nPos += 2; // leave both chars unchanged
361                             break;
362                     }
363                 }
364                 else
365                     ++nPos;
366             }
367             else
368             {
369                 // at least another character?
370                 if( nPos + 1 < rReplaceStr.Len())
371                 {
372                     sFndChar = rReplaceStr.GetChar( nPos + 1 );
373                     switch(sFndChar)
374                     {
375                         case '\\':
376                         case '&':
377                         case '$':
378                             rReplaceStr.Erase( nPos, 1 );
379                             nPos++;
380                         break;
381                         case 't':
382                             rReplaceStr.Erase( nPos, 2 ); // delete both
383                             rReplaceStr.Insert( sTab, nPos ); // insert tabulator
384                             nPos++;	// step over
385                         break;
386                         default:
387                             nPos += 2; // ignore both characters
388                         break;
389                     }
390                 }
391                 else
392                     ++nPos;
393             }
394         }
395     }
396 }
397 
398 
399 #if defined _MSC_VER
400 #pragma optimize("", on)
401 #pragma warning(pop)
402 #endif
403 
404 // ............................................................................
405 }	// namespace utl
406 // ............................................................................
407 
408