1cdf0e10cSrcweir /*************************************************************************
2cdf0e10cSrcweir  *
3cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4cdf0e10cSrcweir  *
5cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6cdf0e10cSrcweir  *
7cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8cdf0e10cSrcweir  *
9cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10cdf0e10cSrcweir  *
11cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14cdf0e10cSrcweir  *
15cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20cdf0e10cSrcweir  *
21cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25cdf0e10cSrcweir  *
26cdf0e10cSrcweir  ************************************************************************/
27cdf0e10cSrcweir 
28cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30cdf0e10cSrcweir 
31cdf0e10cSrcweir #include "textsearch.hxx"
32cdf0e10cSrcweir #include "levdis.hxx"
33cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp>
34cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
35cdf0e10cSrcweir #include <comphelper/processfactory.hxx>
36cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
37cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp>
38cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
39cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp>
40cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
41cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
42cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp>
43cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
44cdf0e10cSrcweir #include <cppuhelper/weak.hxx>
45cdf0e10cSrcweir 
46cdf0e10cSrcweir #ifdef _MSC_VER
47cdf0e10cSrcweir // get rid of that dumb compiler warning
48cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information
49cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created
50cdf0e10cSrcweir #pragma warning( disable: 4786 )
51cdf0e10cSrcweir #endif
52cdf0e10cSrcweir 
53cdf0e10cSrcweir #include <string.h>
54cdf0e10cSrcweir 
55cdf0e10cSrcweir using namespace ::com::sun::star::util;
56cdf0e10cSrcweir using namespace ::com::sun::star::uno;
57cdf0e10cSrcweir using namespace ::com::sun::star::lang;
58cdf0e10cSrcweir using namespace ::com::sun::star::i18n;
59cdf0e10cSrcweir using namespace ::rtl;
60cdf0e10cSrcweir 
61cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP =
62cdf0e10cSrcweir     TransliterationModules_ignoreBaFa_ja_JP |
63cdf0e10cSrcweir     TransliterationModules_ignoreIterationMark_ja_JP |
64cdf0e10cSrcweir     TransliterationModules_ignoreTiJi_ja_JP |
65cdf0e10cSrcweir     TransliterationModules_ignoreHyuByu_ja_JP |
66cdf0e10cSrcweir     TransliterationModules_ignoreSeZe_ja_JP |
67cdf0e10cSrcweir     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
68cdf0e10cSrcweir     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
69cdf0e10cSrcweir     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
70*cc450e3aSHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH;
71*cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
72cdf0e10cSrcweir     // Above 2 transliteration is simple but need to take effect in
73cdf0e10cSrcweir     // complex transliteration
74cdf0e10cSrcweir 
75cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
76cdf0e10cSrcweir         : xMSF( rxMSF )
77cdf0e10cSrcweir         , pJumpTable( 0 )
78cdf0e10cSrcweir         , pJumpTable2( 0 )
79*cc450e3aSHerbert Dürr         , pRegexMatcher( NULL )
80cdf0e10cSrcweir         , pWLD( 0 )
81cdf0e10cSrcweir {
82cdf0e10cSrcweir     SearchOptions aOpt;
83cdf0e10cSrcweir     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
84cdf0e10cSrcweir     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
85cdf0e10cSrcweir     //aOpt.Locale = ???;
86cdf0e10cSrcweir     setOptions( aOpt );
87cdf0e10cSrcweir }
88cdf0e10cSrcweir 
89cdf0e10cSrcweir TextSearch::~TextSearch()
90cdf0e10cSrcweir {
91*cc450e3aSHerbert Dürr     delete pRegexMatcher;
92cdf0e10cSrcweir     delete pWLD;
93cdf0e10cSrcweir     delete pJumpTable;
94cdf0e10cSrcweir     delete pJumpTable2;
95cdf0e10cSrcweir }
96cdf0e10cSrcweir 
97cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
98cdf0e10cSrcweir {
99cdf0e10cSrcweir     aSrchPara = rOptions;
100cdf0e10cSrcweir 
101*cc450e3aSHerbert Dürr     delete pRegexMatcher, pRegexMatcher = NULL;
102cdf0e10cSrcweir     delete pWLD, pWLD = 0;
103cdf0e10cSrcweir     delete pJumpTable, pJumpTable = 0;
104cdf0e10cSrcweir     delete pJumpTable2, pJumpTable2 = 0;
105cdf0e10cSrcweir 
106cdf0e10cSrcweir     // Create Transliteration class
107cdf0e10cSrcweir     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
108cdf0e10cSrcweir     {
109cdf0e10cSrcweir         if( !xTranslit.is() )
110cdf0e10cSrcweir         {
111cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
112cdf0e10cSrcweir                     OUString::createFromAscii(
113cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
114cdf0e10cSrcweir             if ( xI.is() )
115cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
116cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
117cdf0e10cSrcweir                     >>= xTranslit;
118cdf0e10cSrcweir         }
119cdf0e10cSrcweir         // Load transliteration module
120cdf0e10cSrcweir         if( xTranslit.is() )
121cdf0e10cSrcweir             xTranslit->loadModule(
122cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
123cdf0e10cSrcweir                     aSrchPara.Locale);
124cdf0e10cSrcweir     }
125cdf0e10cSrcweir     else if( xTranslit.is() )
126cdf0e10cSrcweir         xTranslit = 0;
127cdf0e10cSrcweir 
128cdf0e10cSrcweir     // Create Transliteration for 2<->1, 2<->2 transliteration
129cdf0e10cSrcweir     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
130cdf0e10cSrcweir     {
131cdf0e10cSrcweir         if( !xTranslit2.is() )
132cdf0e10cSrcweir         {
133cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
134cdf0e10cSrcweir                     OUString::createFromAscii(
135cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
136cdf0e10cSrcweir             if ( xI.is() )
137cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
138cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
139cdf0e10cSrcweir                     >>= xTranslit2;
140cdf0e10cSrcweir         }
141cdf0e10cSrcweir         // Load transliteration module
142cdf0e10cSrcweir         if( xTranslit2.is() )
143cdf0e10cSrcweir             xTranslit2->loadModule(
144cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
145cdf0e10cSrcweir                     aSrchPara.Locale);
146cdf0e10cSrcweir     }
147cdf0e10cSrcweir 
148cdf0e10cSrcweir     if ( !xBreak.is() )
149cdf0e10cSrcweir     {
150cdf0e10cSrcweir         Reference < XInterface > xI = xMSF->createInstance(
151cdf0e10cSrcweir                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
152cdf0e10cSrcweir         if( xI.is() )
153cdf0e10cSrcweir             xI->queryInterface( ::getCppuType(
154cdf0e10cSrcweir                         (const Reference< XBreakIterator >*)0))
155cdf0e10cSrcweir                 >>= xBreak;
156cdf0e10cSrcweir     }
157cdf0e10cSrcweir 
158cdf0e10cSrcweir     sSrchStr = aSrchPara.searchString;
159cdf0e10cSrcweir 
160*cc450e3aSHerbert Dürr     // use transliteration here
161*cc450e3aSHerbert Dürr     if ( xTranslit.is() &&
162cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
163cdf0e10cSrcweir         sSrchStr = xTranslit->transliterateString2String(
164cdf0e10cSrcweir                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
165cdf0e10cSrcweir 
166*cc450e3aSHerbert Dürr     if ( xTranslit2.is() &&
167cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
168cdf0e10cSrcweir 	sSrchStr2 = xTranslit2->transliterateString2String(
169cdf0e10cSrcweir 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
170cdf0e10cSrcweir 
171cdf0e10cSrcweir     // When start or end of search string is a complex script type, we need to
172cdf0e10cSrcweir     // make sure the result boundary is not located in the middle of cell.
173cdf0e10cSrcweir     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
174cdf0e10cSrcweir                 ScriptType::COMPLEX));
175cdf0e10cSrcweir     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
176cdf0e10cSrcweir                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
177cdf0e10cSrcweir 
178*cc450e3aSHerbert Dürr     switch( aSrchPara.algorithmType)
179cdf0e10cSrcweir     {
180*cc450e3aSHerbert Dürr 		case SearchAlgorithms_REGEXP:
181*cc450e3aSHerbert Dürr 			fnForward = &TextSearch::RESrchFrwrd;
182*cc450e3aSHerbert Dürr 			fnBackward = &TextSearch::RESrchBkwrd;
183*cc450e3aSHerbert Dürr 
184*cc450e3aSHerbert Dürr 			{
185*cc450e3aSHerbert Dürr 			sal_uInt32 nIcuSearchFlags = 0;
186*cc450e3aSHerbert Dürr 			// map com::sun::star::util::SearchFlags to ICU uregex.h flags
187*cc450e3aSHerbert Dürr 			// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
188*cc450e3aSHerbert Dürr 			// REG_NEWLINE is neither defined properly nor used anywhere => not implemented
189*cc450e3aSHerbert Dürr 			// REG_NOSUB is not used anywhere => not implemented
190*cc450e3aSHerbert Dürr 			// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
191*cc450e3aSHerbert Dürr 			// LEV_RELAXED is only used for SearchAlgorithm==Approximate
192*cc450e3aSHerbert Dürr 			// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
193*cc450e3aSHerbert Dürr 			if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
194*cc450e3aSHerbert Dürr 				nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
195*cc450e3aSHerbert Dürr 			UErrorCode nIcuErr = U_ZERO_ERROR;
196*cc450e3aSHerbert Dürr 			// assumption: transliteration doesn't mangle regexp control chars
197*cc450e3aSHerbert Dürr 			OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
198*cc450e3aSHerbert Dürr 					: ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
199*cc450e3aSHerbert Dürr 			const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength());
200*cc450e3aSHerbert Dürr 			pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
201*cc450e3aSHerbert Dürr 			if( nIcuErr)
202*cc450e3aSHerbert Dürr 				{ delete pRegexMatcher; pRegexMatcher = NULL;}
203*cc450e3aSHerbert Dürr 			} break;
204*cc450e3aSHerbert Dürr 
205*cc450e3aSHerbert Dürr 		case SearchAlgorithms_APPROXIMATE:
206cdf0e10cSrcweir             fnForward = &TextSearch::ApproxSrchFrwrd;
207cdf0e10cSrcweir             fnBackward = &TextSearch::ApproxSrchBkwrd;
208cdf0e10cSrcweir 
209cdf0e10cSrcweir             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
210cdf0e10cSrcweir                     aSrchPara.insertedChars, aSrchPara.deletedChars,
211cdf0e10cSrcweir                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
212cdf0e10cSrcweir 
213cdf0e10cSrcweir             nLimit = pWLD->GetLimit();
214*cc450e3aSHerbert Dürr 			break;
215*cc450e3aSHerbert Dürr 
216*cc450e3aSHerbert Dürr 		default:
217cdf0e10cSrcweir             fnForward = &TextSearch::NSrchFrwrd;
218cdf0e10cSrcweir             fnBackward = &TextSearch::NSrchBkwrd;
219*cc450e3aSHerbert Dürr 			break;
220cdf0e10cSrcweir     }
221cdf0e10cSrcweir }
222cdf0e10cSrcweir 
223cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
224cdf0e10cSrcweir {
225cdf0e10cSrcweir     sal_Int32 nRet = 0, nEnd = rOff.getLength();
226cdf0e10cSrcweir     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
227cdf0e10cSrcweir     return nRet;
228cdf0e10cSrcweir }
229cdf0e10cSrcweir 
230cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
231cdf0e10cSrcweir         throw( RuntimeException )
232cdf0e10cSrcweir {
233cdf0e10cSrcweir     sal_Int32 nDone;
234cdf0e10cSrcweir     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
235cdf0e10cSrcweir             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
236cdf0e10cSrcweir }
237cdf0e10cSrcweir 
238cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
239cdf0e10cSrcweir         throw( RuntimeException )
240cdf0e10cSrcweir {
241cdf0e10cSrcweir     SearchResult sres;
242cdf0e10cSrcweir 
243cdf0e10cSrcweir     OUString in_str(searchStr);
244cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
245cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
246cdf0e10cSrcweir 
247cdf0e10cSrcweir     bUsePrimarySrchStr = true;
248cdf0e10cSrcweir 
249cdf0e10cSrcweir     if ( xTranslit.is() )
250cdf0e10cSrcweir     {
251cdf0e10cSrcweir         // apply normal transliteration (1<->1, 1<->0)
252cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
253cdf0e10cSrcweir         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
254cdf0e10cSrcweir 
255cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
256cdf0e10cSrcweir         if( startPos )
257cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
258cdf0e10cSrcweir 
259cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
260cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
261cdf0e10cSrcweir         else
262cdf0e10cSrcweir             newEndPos = in_str.getLength();
263cdf0e10cSrcweir 
264cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
265cdf0e10cSrcweir 
266cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
267cdf0e10cSrcweir         {
268cdf0e10cSrcweir             if (sres.startOffset[k])
269cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k]];
270cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
271cdf0e10cSrcweir             //               the position of the next character - return the
272cdf0e10cSrcweir             //               next position behind the last found character!
273cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
274cdf0e10cSrcweir             if (sres.endOffset[k])
275cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
276cdf0e10cSrcweir         }
277cdf0e10cSrcweir     }
278cdf0e10cSrcweir     else
279cdf0e10cSrcweir     {
280cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, startPos, endPos );
281cdf0e10cSrcweir     }
282cdf0e10cSrcweir 
283cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
284cdf0e10cSrcweir     {
285cdf0e10cSrcweir         SearchResult sres2;
286cdf0e10cSrcweir 
287cdf0e10cSrcweir 	in_str = OUString(searchStr);
288cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
289cdf0e10cSrcweir 
290cdf0e10cSrcweir         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
291cdf0e10cSrcweir 
292cdf0e10cSrcweir         if( startPos )
293cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
294cdf0e10cSrcweir 
295cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
296cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
297cdf0e10cSrcweir         else
298cdf0e10cSrcweir             endPos = in_str.getLength();
299cdf0e10cSrcweir 
300cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
301cdf0e10cSrcweir         sres2 = (this->*fnForward)( in_str, startPos, endPos );
302cdf0e10cSrcweir 
303cdf0e10cSrcweir         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
304cdf0e10cSrcweir         {
305cdf0e10cSrcweir             if (sres2.startOffset[k])
306cdf0e10cSrcweir 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
307cdf0e10cSrcweir             if (sres2.endOffset[k])
308cdf0e10cSrcweir 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
309cdf0e10cSrcweir         }
310cdf0e10cSrcweir 
311cdf0e10cSrcweir 	// pick first and long one
312cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0)
313cdf0e10cSrcweir 	    return sres2;
314cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1)
315cdf0e10cSrcweir 	{
316cdf0e10cSrcweir 	    if ( sres.startOffset[0] > sres2.startOffset[0])
317cdf0e10cSrcweir 	        return sres2;
318cdf0e10cSrcweir 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
319cdf0e10cSrcweir 	        sres.endOffset[0] < sres2.endOffset[0])
320cdf0e10cSrcweir 	        return sres2;
321cdf0e10cSrcweir 	}
322cdf0e10cSrcweir     }
323cdf0e10cSrcweir 
324cdf0e10cSrcweir     return sres;
325cdf0e10cSrcweir }
326cdf0e10cSrcweir 
327cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
328cdf0e10cSrcweir         throw(RuntimeException)
329cdf0e10cSrcweir {
330cdf0e10cSrcweir     SearchResult sres;
331cdf0e10cSrcweir 
332cdf0e10cSrcweir     OUString in_str(searchStr);
333cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
334cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
335cdf0e10cSrcweir 
336cdf0e10cSrcweir     bUsePrimarySrchStr = true;
337cdf0e10cSrcweir 
338cdf0e10cSrcweir     if ( xTranslit.is() )
339cdf0e10cSrcweir     {
340cdf0e10cSrcweir         // apply only simple 1<->1 transliteration here
341cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
342cdf0e10cSrcweir 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
343cdf0e10cSrcweir 
344cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
345cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
346cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
347cdf0e10cSrcweir 	else
348cdf0e10cSrcweir 	    newStartPos = in_str.getLength();
349cdf0e10cSrcweir 
350cdf0e10cSrcweir         if( endPos )
351cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
352cdf0e10cSrcweir 
353cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
354cdf0e10cSrcweir 
355cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
356cdf0e10cSrcweir         {
357cdf0e10cSrcweir             if (sres.startOffset[k])
358cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
359cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
360cdf0e10cSrcweir             //               the position of the next character - return the
361cdf0e10cSrcweir             //               next position behind the last found character!
362cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
363cdf0e10cSrcweir             if (sres.endOffset[k])
364cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]];
365cdf0e10cSrcweir         }
366cdf0e10cSrcweir     }
367cdf0e10cSrcweir     else
368cdf0e10cSrcweir     {
369cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, startPos, endPos );
370cdf0e10cSrcweir     }
371cdf0e10cSrcweir 
372cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
373cdf0e10cSrcweir     {
374cdf0e10cSrcweir 	SearchResult sres2;
375cdf0e10cSrcweir 
376cdf0e10cSrcweir 	in_str = OUString(searchStr);
377cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
378cdf0e10cSrcweir 
379cdf0e10cSrcweir         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
380cdf0e10cSrcweir 
381cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
382cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
383cdf0e10cSrcweir         else
384cdf0e10cSrcweir             startPos = in_str.getLength();
385cdf0e10cSrcweir 
386cdf0e10cSrcweir         if( endPos )
387cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
388cdf0e10cSrcweir 
389cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
390cdf0e10cSrcweir 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
391cdf0e10cSrcweir 
392cdf0e10cSrcweir         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
393cdf0e10cSrcweir         {
394cdf0e10cSrcweir             if (sres2.startOffset[k])
395cdf0e10cSrcweir                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
396cdf0e10cSrcweir             if (sres2.endOffset[k])
397cdf0e10cSrcweir                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
398cdf0e10cSrcweir         }
399cdf0e10cSrcweir 
400cdf0e10cSrcweir 	// pick last and long one
401cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0 )
402cdf0e10cSrcweir 	    return sres2;
403cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1 )
404cdf0e10cSrcweir 	{
405cdf0e10cSrcweir 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
406cdf0e10cSrcweir 	        return sres2;
407cdf0e10cSrcweir 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
408cdf0e10cSrcweir 		sres.endOffset[0] > sres2.endOffset[0] )
409cdf0e10cSrcweir 	        return sres2;
410cdf0e10cSrcweir 	}
411cdf0e10cSrcweir     }
412cdf0e10cSrcweir 
413cdf0e10cSrcweir     return sres;
414cdf0e10cSrcweir }
415cdf0e10cSrcweir 
416*cc450e3aSHerbert Dürr //---------------------------------------------------------------------
417cdf0e10cSrcweir 
418cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
419cdf0e10cSrcweir {
420cdf0e10cSrcweir     bool bRet = 1;
421cdf0e10cSrcweir     if( '\x7f' != rStr[nPos])
422cdf0e10cSrcweir     {
423cdf0e10cSrcweir         if ( !xCharClass.is() )
424cdf0e10cSrcweir         {
425cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
426cdf0e10cSrcweir                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
427cdf0e10cSrcweir             if( xI.is() )
428cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
429cdf0e10cSrcweir                             (const Reference< XCharacterClassification >*)0))
430cdf0e10cSrcweir                     >>= xCharClass;
431cdf0e10cSrcweir         }
432cdf0e10cSrcweir         if ( xCharClass.is() )
433cdf0e10cSrcweir         {
434cdf0e10cSrcweir             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
435cdf0e10cSrcweir                     aSrchPara.Locale );
436cdf0e10cSrcweir             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
437cdf0e10cSrcweir                             KCharacterType::LETTER ) & nCType ) )
438cdf0e10cSrcweir                 bRet = 0;
439cdf0e10cSrcweir         }
440cdf0e10cSrcweir     }
441cdf0e10cSrcweir     return bRet;
442cdf0e10cSrcweir }
443cdf0e10cSrcweir 
444*cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ----------
445*cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
446cdf0e10cSrcweir 
447cdf0e10cSrcweir void TextSearch::MakeForwardTab()
448cdf0e10cSrcweir {
449cdf0e10cSrcweir     // create the jumptable for the search text
450cdf0e10cSrcweir     if( pJumpTable )
451cdf0e10cSrcweir     {
452cdf0e10cSrcweir         if( bIsForwardTab )
453cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
454cdf0e10cSrcweir         delete pJumpTable;
455cdf0e10cSrcweir     }
456cdf0e10cSrcweir     bIsForwardTab = true;
457cdf0e10cSrcweir 
458cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
459cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
460cdf0e10cSrcweir 
461cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
462cdf0e10cSrcweir     {
463cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
464cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
465cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
466cdf0e10cSrcweir 
467cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
468cdf0e10cSrcweir             pJumpTable->insert( aEntry );
469cdf0e10cSrcweir         if ( !aPair.second )
470cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
471cdf0e10cSrcweir     }
472cdf0e10cSrcweir }
473cdf0e10cSrcweir 
474cdf0e10cSrcweir void TextSearch::MakeForwardTab2()
475cdf0e10cSrcweir {
476cdf0e10cSrcweir     // create the jumptable for the search text
477cdf0e10cSrcweir     if( pJumpTable2 )
478cdf0e10cSrcweir     {
479cdf0e10cSrcweir         if( bIsForwardTab )
480cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
481cdf0e10cSrcweir         delete pJumpTable2;
482cdf0e10cSrcweir     }
483cdf0e10cSrcweir     bIsForwardTab = true;
484cdf0e10cSrcweir 
485cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
486cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
487cdf0e10cSrcweir 
488cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
489cdf0e10cSrcweir     {
490cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
491cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
492cdf0e10cSrcweir 
493cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
494cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
495cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
496cdf0e10cSrcweir         if ( !aPair.second )
497cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
498cdf0e10cSrcweir     }
499cdf0e10cSrcweir }
500cdf0e10cSrcweir 
501cdf0e10cSrcweir void TextSearch::MakeBackwardTab()
502cdf0e10cSrcweir {
503cdf0e10cSrcweir     // create the jumptable for the search text
504cdf0e10cSrcweir     if( pJumpTable )
505cdf0e10cSrcweir     {
506cdf0e10cSrcweir         if( !bIsForwardTab )
507cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
508cdf0e10cSrcweir         delete pJumpTable;
509cdf0e10cSrcweir     }
510cdf0e10cSrcweir     bIsForwardTab = false;
511cdf0e10cSrcweir 
512cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
513cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
514cdf0e10cSrcweir 
515cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
516cdf0e10cSrcweir     {
517cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
518cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
519cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
520cdf0e10cSrcweir             pJumpTable->insert( aEntry );
521cdf0e10cSrcweir         if ( !aPair.second )
522cdf0e10cSrcweir             (*(aPair.first)).second = n;
523cdf0e10cSrcweir     }
524cdf0e10cSrcweir }
525cdf0e10cSrcweir 
526cdf0e10cSrcweir void TextSearch::MakeBackwardTab2()
527cdf0e10cSrcweir {
528cdf0e10cSrcweir     // create the jumptable for the search text
529cdf0e10cSrcweir     if( pJumpTable2 )
530cdf0e10cSrcweir     {
531cdf0e10cSrcweir         if( !bIsForwardTab )
532cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
533cdf0e10cSrcweir         delete pJumpTable2;
534cdf0e10cSrcweir     }
535cdf0e10cSrcweir     bIsForwardTab = false;
536cdf0e10cSrcweir 
537cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
538cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
539cdf0e10cSrcweir 
540cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
541cdf0e10cSrcweir     {
542cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
543cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
544cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
545cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
546cdf0e10cSrcweir         if ( !aPair.second )
547cdf0e10cSrcweir             (*(aPair.first)).second = n;
548cdf0e10cSrcweir     }
549cdf0e10cSrcweir }
550cdf0e10cSrcweir 
551cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
552cdf0e10cSrcweir {
553cdf0e10cSrcweir     TextSearchJumpTable *pJump;
554cdf0e10cSrcweir     OUString sSearchKey;
555cdf0e10cSrcweir 
556cdf0e10cSrcweir     if ( bUsePrimarySrchStr ) {
557cdf0e10cSrcweir       pJump = pJumpTable;
558cdf0e10cSrcweir       sSearchKey = sSrchStr;
559cdf0e10cSrcweir     } else {
560cdf0e10cSrcweir       pJump = pJumpTable2;
561cdf0e10cSrcweir       sSearchKey = sSrchStr2;
562cdf0e10cSrcweir     }
563cdf0e10cSrcweir 
564cdf0e10cSrcweir     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
565cdf0e10cSrcweir     if ( iLook == pJump->end() )
566cdf0e10cSrcweir         return sSearchKey.getLength();
567cdf0e10cSrcweir     return (*iLook).second;
568cdf0e10cSrcweir }
569cdf0e10cSrcweir 
570cdf0e10cSrcweir 
571cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
572cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
573cdf0e10cSrcweir         throw(RuntimeException)
574cdf0e10cSrcweir {
575cdf0e10cSrcweir     SearchResult aRet;
576cdf0e10cSrcweir     aRet.subRegExpressions = 0;
577cdf0e10cSrcweir 
578cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
579cdf0e10cSrcweir 
580cdf0e10cSrcweir     OUString aStr( searchStr );
581cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
582cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
583cdf0e10cSrcweir     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
584cdf0e10cSrcweir         return aRet;
585cdf0e10cSrcweir 
586cdf0e10cSrcweir 
587cdf0e10cSrcweir     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
588cdf0e10cSrcweir         return aRet;
589cdf0e10cSrcweir 
590cdf0e10cSrcweir     nEnde -= sSearchKey.getLength();
591cdf0e10cSrcweir 
592cdf0e10cSrcweir     if (bUsePrimarySrchStr)
593cdf0e10cSrcweir       MakeForwardTab();                   // create the jumptable
594cdf0e10cSrcweir     else
595cdf0e10cSrcweir       MakeForwardTab2();
596cdf0e10cSrcweir 
597cdf0e10cSrcweir     for (sal_Int32 nCmpIdx = startPos; // start position for the search
598cdf0e10cSrcweir             nCmpIdx <= nEnde;
599cdf0e10cSrcweir             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
600cdf0e10cSrcweir     {
601cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
602cdf0e10cSrcweir         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
603cdf0e10cSrcweir                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
604cdf0e10cSrcweir             continue;
605cdf0e10cSrcweir 
606cdf0e10cSrcweir         nSuchIdx = sSearchKey.getLength() - 1;
607cdf0e10cSrcweir         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
608cdf0e10cSrcweir         {
609cdf0e10cSrcweir             if( nSuchIdx == 0 )
610cdf0e10cSrcweir             {
611cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
612cdf0e10cSrcweir                 {
613cdf0e10cSrcweir                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
614cdf0e10cSrcweir                     bool bAtStart = !nCmpIdx;
615cdf0e10cSrcweir                     bool bAtEnd = nFndEnd == endPos;
616cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
617cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
618cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
619cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
620cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
621cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
622cdf0e10cSrcweir                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
623cdf0e10cSrcweir                                 ( bAtStart && bDelimBehind ) ||     // 2
624cdf0e10cSrcweir                                 ( bAtEnd && bDelimBefore ) ||       // 3
625cdf0e10cSrcweir                                 ( bDelimBefore && bDelimBehind )))  // 4
626cdf0e10cSrcweir                         break;
627cdf0e10cSrcweir                 }
628cdf0e10cSrcweir 
629cdf0e10cSrcweir                 aRet.subRegExpressions = 1;
630cdf0e10cSrcweir                 aRet.startOffset.realloc( 1 );
631cdf0e10cSrcweir                 aRet.startOffset[ 0 ] = nCmpIdx;
632cdf0e10cSrcweir                 aRet.endOffset.realloc( 1 );
633cdf0e10cSrcweir                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
634cdf0e10cSrcweir 
635cdf0e10cSrcweir                 return aRet;
636cdf0e10cSrcweir             }
637cdf0e10cSrcweir             else
638cdf0e10cSrcweir                 nSuchIdx--;
639cdf0e10cSrcweir         }
640cdf0e10cSrcweir     }
641cdf0e10cSrcweir     return aRet;
642cdf0e10cSrcweir }
643cdf0e10cSrcweir 
644cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
645cdf0e10cSrcweir         throw(RuntimeException)
646cdf0e10cSrcweir {
647cdf0e10cSrcweir     SearchResult aRet;
648cdf0e10cSrcweir     aRet.subRegExpressions = 0;
649cdf0e10cSrcweir 
650cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
651cdf0e10cSrcweir 
652cdf0e10cSrcweir     OUString aStr( searchStr );
653cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
654cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
655cdf0e10cSrcweir     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
656cdf0e10cSrcweir         return aRet;
657cdf0e10cSrcweir 
658cdf0e10cSrcweir     if (bUsePrimarySrchStr)
659cdf0e10cSrcweir       MakeBackwardTab();                      // create the jumptable
660cdf0e10cSrcweir     else
661cdf0e10cSrcweir       MakeBackwardTab2();
662cdf0e10cSrcweir 
663cdf0e10cSrcweir     if( nEnde == nSuchIdx )                 // end position for the search
664cdf0e10cSrcweir         nEnde = sSearchKey.getLength();
665cdf0e10cSrcweir     else
666cdf0e10cSrcweir         nEnde += sSearchKey.getLength();
667cdf0e10cSrcweir 
668cdf0e10cSrcweir     sal_Int32 nCmpIdx = startPos;          // start position for the search
669cdf0e10cSrcweir 
670cdf0e10cSrcweir     while (nCmpIdx >= nEnde)
671cdf0e10cSrcweir     {
672cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
673cdf0e10cSrcweir         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
674cdf0e10cSrcweir                         sSearchKey.getLength() )) && (!checkCTLEnd ||
675cdf0e10cSrcweir                     isCellStart( aStr, nCmpIdx)))
676cdf0e10cSrcweir         {
677cdf0e10cSrcweir             nSuchIdx = 0;
678cdf0e10cSrcweir             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
679cdf0e10cSrcweir                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
680cdf0e10cSrcweir                 nSuchIdx++;
681cdf0e10cSrcweir             if( nSuchIdx >= sSearchKey.getLength() )
682cdf0e10cSrcweir             {
683cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
684cdf0e10cSrcweir                 {
685cdf0e10cSrcweir                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
686cdf0e10cSrcweir                     bool bAtStart = !nFndStt;
687cdf0e10cSrcweir                     bool bAtEnd = nCmpIdx == startPos;
688cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
689cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || // begin of paragraph
690cdf0e10cSrcweir                         IsDelimiter( aStr, nFndStt-1 );
691cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
692cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
693cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
694cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
695cdf0e10cSrcweir                     if( ( bAtStart && bAtEnd ) ||           // 1
696cdf0e10cSrcweir                             ( bAtStart && bDelimBehind ) ||     // 2
697cdf0e10cSrcweir                             ( bAtEnd && bDelimBefore ) ||       // 3
698cdf0e10cSrcweir                             ( bDelimBefore && bDelimBehind ))   // 4
699cdf0e10cSrcweir                     {
700cdf0e10cSrcweir                         aRet.subRegExpressions = 1;
701cdf0e10cSrcweir                         aRet.startOffset.realloc( 1 );
702cdf0e10cSrcweir                         aRet.startOffset[ 0 ] = nCmpIdx;
703cdf0e10cSrcweir                         aRet.endOffset.realloc( 1 );
704cdf0e10cSrcweir                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
705cdf0e10cSrcweir                         return aRet;
706cdf0e10cSrcweir                     }
707cdf0e10cSrcweir                 }
708cdf0e10cSrcweir                 else
709cdf0e10cSrcweir                 {
710cdf0e10cSrcweir                     aRet.subRegExpressions = 1;
711cdf0e10cSrcweir                     aRet.startOffset.realloc( 1 );
712cdf0e10cSrcweir                     aRet.startOffset[ 0 ] = nCmpIdx;
713cdf0e10cSrcweir                     aRet.endOffset.realloc( 1 );
714cdf0e10cSrcweir                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
715cdf0e10cSrcweir                     return aRet;
716cdf0e10cSrcweir                 }
717cdf0e10cSrcweir             }
718cdf0e10cSrcweir         }
719cdf0e10cSrcweir         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
720cdf0e10cSrcweir         if( nCmpIdx < nSuchIdx )
721cdf0e10cSrcweir             return aRet;
722cdf0e10cSrcweir         nCmpIdx -= nSuchIdx;
723cdf0e10cSrcweir     }
724cdf0e10cSrcweir     return aRet;
725cdf0e10cSrcweir }
726cdf0e10cSrcweir 
727cdf0e10cSrcweir //---------------------------------------------------------------------------
728cdf0e10cSrcweir 
729cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
730cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
731cdf0e10cSrcweir             throw(RuntimeException)
732cdf0e10cSrcweir {
733*cc450e3aSHerbert Dürr 	SearchResult aRet;
734*cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
735*cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
736*cc450e3aSHerbert Dürr 		return aRet;
737*cc450e3aSHerbert Dürr 
738*cc450e3aSHerbert Dürr 	if( endPos > searchStr.getLength())
739*cc450e3aSHerbert Dürr 		endPos = searchStr.getLength();
740*cc450e3aSHerbert Dürr 
741*cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
742*cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
743*cc450e3aSHerbert Dürr 	const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos);
744*cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
745*cc450e3aSHerbert Dürr 	if( !pRegexMatcher->find( startPos, nIcuErr))
746*cc450e3aSHerbert Dürr 		return aRet;
747*cc450e3aSHerbert Dürr 
748*cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 1;
749*cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
750*cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
751*cc450e3aSHerbert Dürr 	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
752*cc450e3aSHerbert Dürr 	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
753*cc450e3aSHerbert Dürr 
754*cc450e3aSHerbert Dürr 	return aRet;
755cdf0e10cSrcweir }
756cdf0e10cSrcweir 
757cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
758cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
759cdf0e10cSrcweir             throw(RuntimeException)
760cdf0e10cSrcweir {
761*cc450e3aSHerbert Dürr 	// NOTE: for backwards search callers provide startPos/endPos inverted!
762*cc450e3aSHerbert Dürr 	SearchResult aRet;
763*cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
764*cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
765*cc450e3aSHerbert Dürr 		return aRet;
766*cc450e3aSHerbert Dürr 
767*cc450e3aSHerbert Dürr 	if( startPos > searchStr.getLength())
768*cc450e3aSHerbert Dürr 		startPos = searchStr.getLength();
769*cc450e3aSHerbert Dürr 
770*cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
771*cc450e3aSHerbert Dürr 	// TODO: use ICU's backward searching once it becomes available
772*cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
773*cc450e3aSHerbert Dürr 	const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos);
774*cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
775*cc450e3aSHerbert Dürr 	if( !pRegexMatcher->find( endPos, nIcuErr))
776*cc450e3aSHerbert Dürr 		return aRet;
777*cc450e3aSHerbert Dürr 
778*cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 1;
779*cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
780*cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
781*cc450e3aSHerbert Dürr 
782*cc450e3aSHerbert Dürr 	do {
783*cc450e3aSHerbert Dürr 		// NOTE: backward search seems to be expected to have startOfs/endOfs inverted!
784*cc450e3aSHerbert Dürr 		aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
785*cc450e3aSHerbert Dürr 		aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
786*cc450e3aSHerbert Dürr 	} while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr));
787*cc450e3aSHerbert Dürr 
788*cc450e3aSHerbert Dürr 	return aRet;
789cdf0e10cSrcweir }
790cdf0e10cSrcweir 
791*cc450e3aSHerbert Dürr //---------------------------------------------------------------------------
792*cc450e3aSHerbert Dürr 
793*cc450e3aSHerbert Dürr // search for words phonetically
794cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
795cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
796cdf0e10cSrcweir             throw(RuntimeException)
797cdf0e10cSrcweir {
798cdf0e10cSrcweir     SearchResult aRet;
799cdf0e10cSrcweir     aRet.subRegExpressions = 0;
800cdf0e10cSrcweir 
801cdf0e10cSrcweir     if( !xBreak.is() )
802cdf0e10cSrcweir         return aRet;
803cdf0e10cSrcweir 
804cdf0e10cSrcweir     OUString aWTemp( searchStr );
805cdf0e10cSrcweir 
806cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
807cdf0e10cSrcweir 
808cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
809cdf0e10cSrcweir             aSrchPara.Locale,
810cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
811cdf0e10cSrcweir 
812cdf0e10cSrcweir     do
813cdf0e10cSrcweir     {
814cdf0e10cSrcweir         if( aWBnd.startPos >= endPos )
815cdf0e10cSrcweir             break;
816cdf0e10cSrcweir         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
817cdf0e10cSrcweir         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
818cdf0e10cSrcweir 
819cdf0e10cSrcweir         if( nStt < nEnd &&
820cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
821cdf0e10cSrcweir         {
822cdf0e10cSrcweir             aRet.subRegExpressions = 1;
823cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
824cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nStt;
825cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
826cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nEnd;
827cdf0e10cSrcweir             break;
828cdf0e10cSrcweir         }
829cdf0e10cSrcweir 
830cdf0e10cSrcweir         nStt = nEnd - 1;
831cdf0e10cSrcweir         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
832cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
833cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos ||
834cdf0e10cSrcweir             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
835cdf0e10cSrcweir     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
836cdf0e10cSrcweir     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
837cdf0e10cSrcweir     // and nextWord() does also => don't loop forever.
838cdf0e10cSrcweir     return aRet;
839cdf0e10cSrcweir }
840cdf0e10cSrcweir 
841cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
842cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
843cdf0e10cSrcweir             throw(RuntimeException)
844cdf0e10cSrcweir {
845cdf0e10cSrcweir     SearchResult aRet;
846cdf0e10cSrcweir     aRet.subRegExpressions = 0;
847cdf0e10cSrcweir 
848cdf0e10cSrcweir     if( !xBreak.is() )
849cdf0e10cSrcweir         return aRet;
850cdf0e10cSrcweir 
851cdf0e10cSrcweir     OUString aWTemp( searchStr );
852cdf0e10cSrcweir 
853cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
854cdf0e10cSrcweir 
855cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
856cdf0e10cSrcweir             aSrchPara.Locale,
857cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
858cdf0e10cSrcweir 
859cdf0e10cSrcweir     do
860cdf0e10cSrcweir     {
861cdf0e10cSrcweir         if( aWBnd.endPos <= endPos )
862cdf0e10cSrcweir             break;
863cdf0e10cSrcweir         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
864cdf0e10cSrcweir         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
865cdf0e10cSrcweir 
866cdf0e10cSrcweir         if( nStt < nEnd &&
867cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
868cdf0e10cSrcweir         {
869cdf0e10cSrcweir             aRet.subRegExpressions = 1;
870cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
871cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nEnd;
872cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
873cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nStt;
874cdf0e10cSrcweir             break;
875cdf0e10cSrcweir         }
876cdf0e10cSrcweir         if( !nStt )
877cdf0e10cSrcweir             break;
878cdf0e10cSrcweir 
879cdf0e10cSrcweir         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
880cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
881cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
882cdf0e10cSrcweir     return aRet;
883cdf0e10cSrcweir }
884cdf0e10cSrcweir 
885cdf0e10cSrcweir 
886cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
887cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
888cdf0e10cSrcweir 
889cdf0e10cSrcweir static OUString getServiceName_Static()
890cdf0e10cSrcweir {
891cdf0e10cSrcweir     return OUString::createFromAscii( cSearchName );
892cdf0e10cSrcweir }
893cdf0e10cSrcweir 
894cdf0e10cSrcweir static OUString getImplementationName_Static()
895cdf0e10cSrcweir {
896cdf0e10cSrcweir     return OUString::createFromAscii( cSearchImpl );
897cdf0e10cSrcweir }
898cdf0e10cSrcweir 
899cdf0e10cSrcweir OUString SAL_CALL
900cdf0e10cSrcweir TextSearch::getImplementationName()
901cdf0e10cSrcweir                 throw( RuntimeException )
902cdf0e10cSrcweir {
903cdf0e10cSrcweir     return getImplementationName_Static();
904cdf0e10cSrcweir }
905cdf0e10cSrcweir 
906cdf0e10cSrcweir sal_Bool SAL_CALL
907cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName)
908cdf0e10cSrcweir                 throw( RuntimeException )
909cdf0e10cSrcweir {
910cdf0e10cSrcweir     return !rServiceName.compareToAscii( cSearchName );
911cdf0e10cSrcweir }
912cdf0e10cSrcweir 
913cdf0e10cSrcweir Sequence< OUString > SAL_CALL
914cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
915cdf0e10cSrcweir {
916cdf0e10cSrcweir     Sequence< OUString > aRet(1);
917cdf0e10cSrcweir     aRet[0] = getServiceName_Static();
918cdf0e10cSrcweir     return aRet;
919cdf0e10cSrcweir }
920cdf0e10cSrcweir 
921cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
922cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance(
923cdf0e10cSrcweir         const ::com::sun::star::uno::Reference<
924cdf0e10cSrcweir         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
925cdf0e10cSrcweir {
926cdf0e10cSrcweir     return ::com::sun::star::uno::Reference<
927cdf0e10cSrcweir         ::com::sun::star::uno::XInterface >(
928cdf0e10cSrcweir                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
929cdf0e10cSrcweir }
930cdf0e10cSrcweir 
931cdf0e10cSrcweir extern "C"
932cdf0e10cSrcweir {
933cdf0e10cSrcweir 
934cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment(
935cdf0e10cSrcweir         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
936cdf0e10cSrcweir {
937cdf0e10cSrcweir     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
938cdf0e10cSrcweir }
939cdf0e10cSrcweir 
940cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
941cdf0e10cSrcweir         void* _pServiceManager, void* /*_pRegistryKey*/ )
942cdf0e10cSrcweir {
943cdf0e10cSrcweir     void* pRet = NULL;
944cdf0e10cSrcweir 
945cdf0e10cSrcweir     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
946cdf0e10cSrcweir         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
947cdf0e10cSrcweir             ( _pServiceManager );
948cdf0e10cSrcweir     ::com::sun::star::uno::Reference<
949cdf0e10cSrcweir             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
950cdf0e10cSrcweir 
951cdf0e10cSrcweir     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
952cdf0e10cSrcweir     {
953cdf0e10cSrcweir         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
954cdf0e10cSrcweir         aServiceNames[0] = getServiceName_Static();
955cdf0e10cSrcweir         xFactory = ::cppu::createSingleFactory(
956cdf0e10cSrcweir                 pServiceManager, getImplementationName_Static(),
957cdf0e10cSrcweir                 &TextSearch_CreateInstance, aServiceNames );
958cdf0e10cSrcweir     }
959cdf0e10cSrcweir 
960cdf0e10cSrcweir     if ( xFactory.is() )
961cdf0e10cSrcweir     {
962cdf0e10cSrcweir         xFactory->acquire();
963cdf0e10cSrcweir         pRet = xFactory.get();
964cdf0e10cSrcweir     }
965cdf0e10cSrcweir 
966cdf0e10cSrcweir     return pRet;
967cdf0e10cSrcweir }
968cdf0e10cSrcweir 
969cdf0e10cSrcweir } // extern "C"
970