1449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5449ab281SAndrew Rist  * distributed with this work for additional information
6449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10449ab281SAndrew Rist  *
11449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12449ab281SAndrew Rist  *
13449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14449ab281SAndrew Rist  * software distributed under the License is distributed on an
15449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17449ab281SAndrew Rist  * specific language governing permissions and limitations
18449ab281SAndrew Rist  * under the License.
19449ab281SAndrew Rist  *
20449ab281SAndrew Rist  *************************************************************/
21449ab281SAndrew Rist 
22449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include "textsearch.hxx"
28cdf0e10cSrcweir #include "levdis.hxx"
29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp>
30cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
31cdf0e10cSrcweir #include <comphelper/processfactory.hxx>
32cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
33cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp>
34cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
35cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp>
36cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
37cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
38cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp>
39cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
40cdf0e10cSrcweir #include <cppuhelper/weak.hxx>
41cdf0e10cSrcweir 
42cdf0e10cSrcweir #ifdef _MSC_VER
43cdf0e10cSrcweir // get rid of that dumb compiler warning
44cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information
45cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created
46cdf0e10cSrcweir #pragma warning( disable: 4786 )
47cdf0e10cSrcweir #endif
48cdf0e10cSrcweir 
49cdf0e10cSrcweir #include <string.h>
50cdf0e10cSrcweir 
51cdf0e10cSrcweir using namespace ::com::sun::star::util;
52cdf0e10cSrcweir using namespace ::com::sun::star::uno;
53cdf0e10cSrcweir using namespace ::com::sun::star::lang;
54cdf0e10cSrcweir using namespace ::com::sun::star::i18n;
55cdf0e10cSrcweir using namespace ::rtl;
56cdf0e10cSrcweir 
57cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP =
58cdf0e10cSrcweir     TransliterationModules_ignoreBaFa_ja_JP |
59cdf0e10cSrcweir     TransliterationModules_ignoreIterationMark_ja_JP |
60cdf0e10cSrcweir     TransliterationModules_ignoreTiJi_ja_JP |
61cdf0e10cSrcweir     TransliterationModules_ignoreHyuByu_ja_JP |
62cdf0e10cSrcweir     TransliterationModules_ignoreSeZe_ja_JP |
63cdf0e10cSrcweir     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
64cdf0e10cSrcweir     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
65cdf0e10cSrcweir     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
66cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
67*e2630f2cSHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK;
68*e2630f2cSHerbert Dürr static const sal_Int32 REGEX_TRANS_MASK = ~(COMPLEX_TRANS_MASK | TransliterationModules_IGNORE_CASE | TransliterationModules_UPPERCASE_LOWERCASE | TransliterationModules_LOWERCASE_UPPERCASE);
69cdf0e10cSrcweir     // Above 2 transliteration is simple but need to take effect in
70cdf0e10cSrcweir     // complex transliteration
71cdf0e10cSrcweir 
72cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
73cdf0e10cSrcweir         : xMSF( rxMSF )
74cdf0e10cSrcweir         , pJumpTable( 0 )
75cdf0e10cSrcweir         , pJumpTable2( 0 )
76cc450e3aSHerbert Dürr         , pRegexMatcher( NULL )
77cdf0e10cSrcweir         , pWLD( 0 )
78cdf0e10cSrcweir {
79cdf0e10cSrcweir     SearchOptions aOpt;
80cdf0e10cSrcweir     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
81cdf0e10cSrcweir     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
82cdf0e10cSrcweir     //aOpt.Locale = ???;
83cdf0e10cSrcweir     setOptions( aOpt );
84cdf0e10cSrcweir }
85cdf0e10cSrcweir 
86cdf0e10cSrcweir TextSearch::~TextSearch()
87cdf0e10cSrcweir {
88cc450e3aSHerbert Dürr     delete pRegexMatcher;
89cdf0e10cSrcweir     delete pWLD;
90cdf0e10cSrcweir     delete pJumpTable;
91cdf0e10cSrcweir     delete pJumpTable2;
92cdf0e10cSrcweir }
93cdf0e10cSrcweir 
94cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
95cdf0e10cSrcweir {
96cdf0e10cSrcweir     aSrchPara = rOptions;
97cdf0e10cSrcweir 
98cc450e3aSHerbert Dürr     delete pRegexMatcher, pRegexMatcher = NULL;
99cdf0e10cSrcweir     delete pWLD, pWLD = 0;
100cdf0e10cSrcweir     delete pJumpTable, pJumpTable = 0;
101cdf0e10cSrcweir     delete pJumpTable2, pJumpTable2 = 0;
102cdf0e10cSrcweir 
103cdf0e10cSrcweir     // Create Transliteration class
104cdf0e10cSrcweir     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
105cdf0e10cSrcweir     {
106cdf0e10cSrcweir         if( !xTranslit.is() )
107cdf0e10cSrcweir         {
108cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
109cdf0e10cSrcweir                     OUString::createFromAscii(
110cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
111cdf0e10cSrcweir             if ( xI.is() )
112cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
113cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
114cdf0e10cSrcweir                     >>= xTranslit;
115cdf0e10cSrcweir         }
116cdf0e10cSrcweir         // Load transliteration module
117cdf0e10cSrcweir         if( xTranslit.is() )
118cdf0e10cSrcweir             xTranslit->loadModule(
119cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
120cdf0e10cSrcweir                     aSrchPara.Locale);
121cdf0e10cSrcweir     }
122cdf0e10cSrcweir     else if( xTranslit.is() )
123cdf0e10cSrcweir         xTranslit = 0;
124cdf0e10cSrcweir 
125cdf0e10cSrcweir     // Create Transliteration for 2<->1, 2<->2 transliteration
126cdf0e10cSrcweir     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
127cdf0e10cSrcweir     {
128cdf0e10cSrcweir         if( !xTranslit2.is() )
129cdf0e10cSrcweir         {
130cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
131cdf0e10cSrcweir                     OUString::createFromAscii(
132cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
133cdf0e10cSrcweir             if ( xI.is() )
134cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
135cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
136cdf0e10cSrcweir                     >>= xTranslit2;
137cdf0e10cSrcweir         }
138cdf0e10cSrcweir         // Load transliteration module
139cdf0e10cSrcweir         if( xTranslit2.is() )
140cdf0e10cSrcweir             xTranslit2->loadModule(
141cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
142cdf0e10cSrcweir                     aSrchPara.Locale);
143cdf0e10cSrcweir     }
144cdf0e10cSrcweir 
145cdf0e10cSrcweir     if ( !xBreak.is() )
146cdf0e10cSrcweir     {
147cdf0e10cSrcweir         Reference < XInterface > xI = xMSF->createInstance(
148cdf0e10cSrcweir                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
149cdf0e10cSrcweir         if( xI.is() )
150cdf0e10cSrcweir             xI->queryInterface( ::getCppuType(
151cdf0e10cSrcweir                         (const Reference< XBreakIterator >*)0))
152cdf0e10cSrcweir                 >>= xBreak;
153cdf0e10cSrcweir     }
154cdf0e10cSrcweir 
155cdf0e10cSrcweir     sSrchStr = aSrchPara.searchString;
156cdf0e10cSrcweir 
157cc450e3aSHerbert Dürr     // use transliteration here
158cc450e3aSHerbert Dürr     if ( xTranslit.is() &&
159cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
160cdf0e10cSrcweir         sSrchStr = xTranslit->transliterateString2String(
161cdf0e10cSrcweir                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
162cdf0e10cSrcweir 
163cc450e3aSHerbert Dürr     if ( xTranslit2.is() &&
164cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
165cdf0e10cSrcweir 	sSrchStr2 = xTranslit2->transliterateString2String(
166cdf0e10cSrcweir 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
167cdf0e10cSrcweir 
168cdf0e10cSrcweir     // When start or end of search string is a complex script type, we need to
169cdf0e10cSrcweir     // make sure the result boundary is not located in the middle of cell.
170cdf0e10cSrcweir     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
171cdf0e10cSrcweir                 ScriptType::COMPLEX));
172cdf0e10cSrcweir     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
173cdf0e10cSrcweir                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
174cdf0e10cSrcweir 
175cc450e3aSHerbert Dürr     switch( aSrchPara.algorithmType)
176cdf0e10cSrcweir     {
177cc450e3aSHerbert Dürr 		case SearchAlgorithms_REGEXP:
178cc450e3aSHerbert Dürr 			fnForward = &TextSearch::RESrchFrwrd;
179cc450e3aSHerbert Dürr 			fnBackward = &TextSearch::RESrchBkwrd;
1807f9f793fSHerbert Dürr 			RESrchPrepare( aSrchPara);
1817f9f793fSHerbert Dürr 			break;
182cc450e3aSHerbert Dürr 
183cc450e3aSHerbert Dürr 		case SearchAlgorithms_APPROXIMATE:
184cdf0e10cSrcweir             fnForward = &TextSearch::ApproxSrchFrwrd;
185cdf0e10cSrcweir             fnBackward = &TextSearch::ApproxSrchBkwrd;
186cdf0e10cSrcweir 
187cdf0e10cSrcweir             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
188cdf0e10cSrcweir                     aSrchPara.insertedChars, aSrchPara.deletedChars,
189cdf0e10cSrcweir                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
190cdf0e10cSrcweir 
191cdf0e10cSrcweir             nLimit = pWLD->GetLimit();
192cc450e3aSHerbert Dürr 			break;
193cc450e3aSHerbert Dürr 
194cc450e3aSHerbert Dürr 		default:
195cdf0e10cSrcweir             fnForward = &TextSearch::NSrchFrwrd;
196cdf0e10cSrcweir             fnBackward = &TextSearch::NSrchBkwrd;
197cc450e3aSHerbert Dürr 			break;
198cdf0e10cSrcweir     }
199cdf0e10cSrcweir }
200cdf0e10cSrcweir 
201cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
202cdf0e10cSrcweir {
203cdf0e10cSrcweir     sal_Int32 nRet = 0, nEnd = rOff.getLength();
204cdf0e10cSrcweir     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
205cdf0e10cSrcweir     return nRet;
206cdf0e10cSrcweir }
207cdf0e10cSrcweir 
208cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
209cdf0e10cSrcweir         throw( RuntimeException )
210cdf0e10cSrcweir {
211cdf0e10cSrcweir     sal_Int32 nDone;
212cdf0e10cSrcweir     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
213cdf0e10cSrcweir             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
214cdf0e10cSrcweir }
215cdf0e10cSrcweir 
216cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
217cdf0e10cSrcweir         throw( RuntimeException )
218cdf0e10cSrcweir {
219cdf0e10cSrcweir     SearchResult sres;
220cdf0e10cSrcweir 
221cdf0e10cSrcweir     OUString in_str(searchStr);
222cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
223cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
224cdf0e10cSrcweir 
225cdf0e10cSrcweir     bUsePrimarySrchStr = true;
226cdf0e10cSrcweir 
227cdf0e10cSrcweir     if ( xTranslit.is() )
228cdf0e10cSrcweir     {
229cdf0e10cSrcweir         // apply normal transliteration (1<->1, 1<->0)
230cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
231cdf0e10cSrcweir         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
232cdf0e10cSrcweir 
233cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
234cdf0e10cSrcweir         if( startPos )
235cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
236cdf0e10cSrcweir 
237cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
238cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
239cdf0e10cSrcweir         else
240cdf0e10cSrcweir             newEndPos = in_str.getLength();
241cdf0e10cSrcweir 
242cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
243cdf0e10cSrcweir 
244cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
245cdf0e10cSrcweir         {
246cdf0e10cSrcweir             if (sres.startOffset[k])
247cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k]];
248cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
249cdf0e10cSrcweir             //               the position of the next character - return the
250cdf0e10cSrcweir             //               next position behind the last found character!
251cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
252cdf0e10cSrcweir             if (sres.endOffset[k])
253cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
254cdf0e10cSrcweir         }
255cdf0e10cSrcweir     }
256cdf0e10cSrcweir     else
257cdf0e10cSrcweir     {
258cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, startPos, endPos );
259cdf0e10cSrcweir     }
260cdf0e10cSrcweir 
261cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
262cdf0e10cSrcweir     {
263cdf0e10cSrcweir         SearchResult sres2;
264cdf0e10cSrcweir 
265cdf0e10cSrcweir 	in_str = OUString(searchStr);
266cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
267cdf0e10cSrcweir 
268cdf0e10cSrcweir         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
269cdf0e10cSrcweir 
270cdf0e10cSrcweir         if( startPos )
271cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
272cdf0e10cSrcweir 
273cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
274cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
275cdf0e10cSrcweir         else
276cdf0e10cSrcweir             endPos = in_str.getLength();
277cdf0e10cSrcweir 
278cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
279cdf0e10cSrcweir         sres2 = (this->*fnForward)( in_str, startPos, endPos );
280cdf0e10cSrcweir 
281cdf0e10cSrcweir         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
282cdf0e10cSrcweir         {
283cdf0e10cSrcweir             if (sres2.startOffset[k])
284cdf0e10cSrcweir 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
285cdf0e10cSrcweir             if (sres2.endOffset[k])
286cdf0e10cSrcweir 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
287cdf0e10cSrcweir         }
288cdf0e10cSrcweir 
289cdf0e10cSrcweir 	// pick first and long one
290cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0)
291cdf0e10cSrcweir 	    return sres2;
292cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1)
293cdf0e10cSrcweir 	{
294cdf0e10cSrcweir 	    if ( sres.startOffset[0] > sres2.startOffset[0])
295cdf0e10cSrcweir 	        return sres2;
296cdf0e10cSrcweir 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
297cdf0e10cSrcweir 	        sres.endOffset[0] < sres2.endOffset[0])
298cdf0e10cSrcweir 	        return sres2;
299cdf0e10cSrcweir 	}
300cdf0e10cSrcweir     }
301cdf0e10cSrcweir 
302cdf0e10cSrcweir     return sres;
303cdf0e10cSrcweir }
304cdf0e10cSrcweir 
305cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
306cdf0e10cSrcweir         throw(RuntimeException)
307cdf0e10cSrcweir {
308cdf0e10cSrcweir     SearchResult sres;
309cdf0e10cSrcweir 
310cdf0e10cSrcweir     OUString in_str(searchStr);
311cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
312cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
313cdf0e10cSrcweir 
314cdf0e10cSrcweir     bUsePrimarySrchStr = true;
315cdf0e10cSrcweir 
316cdf0e10cSrcweir     if ( xTranslit.is() )
317cdf0e10cSrcweir     {
318cdf0e10cSrcweir         // apply only simple 1<->1 transliteration here
319cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
320cdf0e10cSrcweir 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
321cdf0e10cSrcweir 
322cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
323cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
324cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
325cdf0e10cSrcweir 	else
326cdf0e10cSrcweir 	    newStartPos = in_str.getLength();
327cdf0e10cSrcweir 
328cdf0e10cSrcweir         if( endPos )
329cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
330cdf0e10cSrcweir 
331cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
332cdf0e10cSrcweir 
333cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
334cdf0e10cSrcweir         {
335cdf0e10cSrcweir             if (sres.startOffset[k])
336cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
337cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
338cdf0e10cSrcweir             //               the position of the next character - return the
339cdf0e10cSrcweir             //               next position behind the last found character!
340cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
341cdf0e10cSrcweir             if (sres.endOffset[k])
342cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]];
343cdf0e10cSrcweir         }
344cdf0e10cSrcweir     }
345cdf0e10cSrcweir     else
346cdf0e10cSrcweir     {
347cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, startPos, endPos );
348cdf0e10cSrcweir     }
349cdf0e10cSrcweir 
350cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
351cdf0e10cSrcweir     {
352cdf0e10cSrcweir 	SearchResult sres2;
353cdf0e10cSrcweir 
354cdf0e10cSrcweir 	in_str = OUString(searchStr);
355cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
356cdf0e10cSrcweir 
357cdf0e10cSrcweir         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
358cdf0e10cSrcweir 
359cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
360cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
361cdf0e10cSrcweir         else
362cdf0e10cSrcweir             startPos = in_str.getLength();
363cdf0e10cSrcweir 
364cdf0e10cSrcweir         if( endPos )
365cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
366cdf0e10cSrcweir 
367cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
368cdf0e10cSrcweir 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
369cdf0e10cSrcweir 
370cdf0e10cSrcweir         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
371cdf0e10cSrcweir         {
372cdf0e10cSrcweir             if (sres2.startOffset[k])
373cdf0e10cSrcweir                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
374cdf0e10cSrcweir             if (sres2.endOffset[k])
375cdf0e10cSrcweir                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
376cdf0e10cSrcweir         }
377cdf0e10cSrcweir 
378cdf0e10cSrcweir 	// pick last and long one
379cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0 )
380cdf0e10cSrcweir 	    return sres2;
381cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1 )
382cdf0e10cSrcweir 	{
383cdf0e10cSrcweir 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
384cdf0e10cSrcweir 	        return sres2;
385cdf0e10cSrcweir 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
386cdf0e10cSrcweir 		sres.endOffset[0] > sres2.endOffset[0] )
387cdf0e10cSrcweir 	        return sres2;
388cdf0e10cSrcweir 	}
389cdf0e10cSrcweir     }
390cdf0e10cSrcweir 
391cdf0e10cSrcweir     return sres;
392cdf0e10cSrcweir }
393cdf0e10cSrcweir 
394cc450e3aSHerbert Dürr //---------------------------------------------------------------------
395cdf0e10cSrcweir 
396cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
397cdf0e10cSrcweir {
398cdf0e10cSrcweir     bool bRet = 1;
399cdf0e10cSrcweir     if( '\x7f' != rStr[nPos])
400cdf0e10cSrcweir     {
401cdf0e10cSrcweir         if ( !xCharClass.is() )
402cdf0e10cSrcweir         {
403cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
404cdf0e10cSrcweir                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
405cdf0e10cSrcweir             if( xI.is() )
406cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
407cdf0e10cSrcweir                             (const Reference< XCharacterClassification >*)0))
408cdf0e10cSrcweir                     >>= xCharClass;
409cdf0e10cSrcweir         }
410cdf0e10cSrcweir         if ( xCharClass.is() )
411cdf0e10cSrcweir         {
412cdf0e10cSrcweir             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
413cdf0e10cSrcweir                     aSrchPara.Locale );
414cdf0e10cSrcweir             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
415cdf0e10cSrcweir                             KCharacterType::LETTER ) & nCType ) )
416cdf0e10cSrcweir                 bRet = 0;
417cdf0e10cSrcweir         }
418cdf0e10cSrcweir     }
419cdf0e10cSrcweir     return bRet;
420cdf0e10cSrcweir }
421cdf0e10cSrcweir 
422cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ----------
423cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
424cdf0e10cSrcweir 
425cdf0e10cSrcweir void TextSearch::MakeForwardTab()
426cdf0e10cSrcweir {
427cdf0e10cSrcweir     // create the jumptable for the search text
428cdf0e10cSrcweir     if( pJumpTable )
429cdf0e10cSrcweir     {
430cdf0e10cSrcweir         if( bIsForwardTab )
431cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
432cdf0e10cSrcweir         delete pJumpTable;
433cdf0e10cSrcweir     }
434cdf0e10cSrcweir     bIsForwardTab = true;
435cdf0e10cSrcweir 
436cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
437cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
438cdf0e10cSrcweir 
439cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
440cdf0e10cSrcweir     {
441cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
442cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
443cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
444cdf0e10cSrcweir 
445cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
446cdf0e10cSrcweir             pJumpTable->insert( aEntry );
447cdf0e10cSrcweir         if ( !aPair.second )
448cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
449cdf0e10cSrcweir     }
450cdf0e10cSrcweir }
451cdf0e10cSrcweir 
452cdf0e10cSrcweir void TextSearch::MakeForwardTab2()
453cdf0e10cSrcweir {
454cdf0e10cSrcweir     // create the jumptable for the search text
455cdf0e10cSrcweir     if( pJumpTable2 )
456cdf0e10cSrcweir     {
457cdf0e10cSrcweir         if( bIsForwardTab )
458cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
459cdf0e10cSrcweir         delete pJumpTable2;
460cdf0e10cSrcweir     }
461cdf0e10cSrcweir     bIsForwardTab = true;
462cdf0e10cSrcweir 
463cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
464cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
465cdf0e10cSrcweir 
466cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
467cdf0e10cSrcweir     {
468cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
469cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
470cdf0e10cSrcweir 
471cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
472cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
473cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
474cdf0e10cSrcweir         if ( !aPair.second )
475cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
476cdf0e10cSrcweir     }
477cdf0e10cSrcweir }
478cdf0e10cSrcweir 
479cdf0e10cSrcweir void TextSearch::MakeBackwardTab()
480cdf0e10cSrcweir {
481cdf0e10cSrcweir     // create the jumptable for the search text
482cdf0e10cSrcweir     if( pJumpTable )
483cdf0e10cSrcweir     {
484cdf0e10cSrcweir         if( !bIsForwardTab )
485cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
486cdf0e10cSrcweir         delete pJumpTable;
487cdf0e10cSrcweir     }
488cdf0e10cSrcweir     bIsForwardTab = false;
489cdf0e10cSrcweir 
490cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
491cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
492cdf0e10cSrcweir 
493cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
494cdf0e10cSrcweir     {
495cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
496cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
497cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
498cdf0e10cSrcweir             pJumpTable->insert( aEntry );
499cdf0e10cSrcweir         if ( !aPair.second )
500cdf0e10cSrcweir             (*(aPair.first)).second = n;
501cdf0e10cSrcweir     }
502cdf0e10cSrcweir }
503cdf0e10cSrcweir 
504cdf0e10cSrcweir void TextSearch::MakeBackwardTab2()
505cdf0e10cSrcweir {
506cdf0e10cSrcweir     // create the jumptable for the search text
507cdf0e10cSrcweir     if( pJumpTable2 )
508cdf0e10cSrcweir     {
509cdf0e10cSrcweir         if( !bIsForwardTab )
510cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
511cdf0e10cSrcweir         delete pJumpTable2;
512cdf0e10cSrcweir     }
513cdf0e10cSrcweir     bIsForwardTab = false;
514cdf0e10cSrcweir 
515cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
516cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
517cdf0e10cSrcweir 
518cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
519cdf0e10cSrcweir     {
520cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
521cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
522cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
523cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
524cdf0e10cSrcweir         if ( !aPair.second )
525cdf0e10cSrcweir             (*(aPair.first)).second = n;
526cdf0e10cSrcweir     }
527cdf0e10cSrcweir }
528cdf0e10cSrcweir 
529cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
530cdf0e10cSrcweir {
531cdf0e10cSrcweir     TextSearchJumpTable *pJump;
532cdf0e10cSrcweir     OUString sSearchKey;
533cdf0e10cSrcweir 
534cdf0e10cSrcweir     if ( bUsePrimarySrchStr ) {
535cdf0e10cSrcweir       pJump = pJumpTable;
536cdf0e10cSrcweir       sSearchKey = sSrchStr;
537cdf0e10cSrcweir     } else {
538cdf0e10cSrcweir       pJump = pJumpTable2;
539cdf0e10cSrcweir       sSearchKey = sSrchStr2;
540cdf0e10cSrcweir     }
541cdf0e10cSrcweir 
542cdf0e10cSrcweir     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
543cdf0e10cSrcweir     if ( iLook == pJump->end() )
544cdf0e10cSrcweir         return sSearchKey.getLength();
545cdf0e10cSrcweir     return (*iLook).second;
546cdf0e10cSrcweir }
547cdf0e10cSrcweir 
548cdf0e10cSrcweir 
549cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
550cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
551cdf0e10cSrcweir         throw(RuntimeException)
552cdf0e10cSrcweir {
553cdf0e10cSrcweir     SearchResult aRet;
554cdf0e10cSrcweir     aRet.subRegExpressions = 0;
555cdf0e10cSrcweir 
556cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
557cdf0e10cSrcweir 
558cdf0e10cSrcweir     OUString aStr( searchStr );
559cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
560cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
561cdf0e10cSrcweir     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
562cdf0e10cSrcweir         return aRet;
563cdf0e10cSrcweir 
564cdf0e10cSrcweir 
565cdf0e10cSrcweir     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
566cdf0e10cSrcweir         return aRet;
567cdf0e10cSrcweir 
568cdf0e10cSrcweir     nEnde -= sSearchKey.getLength();
569cdf0e10cSrcweir 
570cdf0e10cSrcweir     if (bUsePrimarySrchStr)
571cdf0e10cSrcweir       MakeForwardTab();                   // create the jumptable
572cdf0e10cSrcweir     else
573cdf0e10cSrcweir       MakeForwardTab2();
574cdf0e10cSrcweir 
575cdf0e10cSrcweir     for (sal_Int32 nCmpIdx = startPos; // start position for the search
576cdf0e10cSrcweir             nCmpIdx <= nEnde;
577cdf0e10cSrcweir             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
578cdf0e10cSrcweir     {
579cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
580cdf0e10cSrcweir         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
581cdf0e10cSrcweir                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
582cdf0e10cSrcweir             continue;
583cdf0e10cSrcweir 
584cdf0e10cSrcweir         nSuchIdx = sSearchKey.getLength() - 1;
585cdf0e10cSrcweir         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
586cdf0e10cSrcweir         {
587cdf0e10cSrcweir             if( nSuchIdx == 0 )
588cdf0e10cSrcweir             {
589cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
590cdf0e10cSrcweir                 {
591cdf0e10cSrcweir                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
592cdf0e10cSrcweir                     bool bAtStart = !nCmpIdx;
593cdf0e10cSrcweir                     bool bAtEnd = nFndEnd == endPos;
594cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
595cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
596cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
597cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
598cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
599cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
600cdf0e10cSrcweir                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
601cdf0e10cSrcweir                                 ( bAtStart && bDelimBehind ) ||     // 2
602cdf0e10cSrcweir                                 ( bAtEnd && bDelimBefore ) ||       // 3
603cdf0e10cSrcweir                                 ( bDelimBefore && bDelimBehind )))  // 4
604cdf0e10cSrcweir                         break;
605cdf0e10cSrcweir                 }
606cdf0e10cSrcweir 
607cdf0e10cSrcweir                 aRet.subRegExpressions = 1;
608cdf0e10cSrcweir                 aRet.startOffset.realloc( 1 );
609cdf0e10cSrcweir                 aRet.startOffset[ 0 ] = nCmpIdx;
610cdf0e10cSrcweir                 aRet.endOffset.realloc( 1 );
611cdf0e10cSrcweir                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
612cdf0e10cSrcweir 
613cdf0e10cSrcweir                 return aRet;
614cdf0e10cSrcweir             }
615cdf0e10cSrcweir             else
616cdf0e10cSrcweir                 nSuchIdx--;
617cdf0e10cSrcweir         }
618cdf0e10cSrcweir     }
619cdf0e10cSrcweir     return aRet;
620cdf0e10cSrcweir }
621cdf0e10cSrcweir 
622cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
623cdf0e10cSrcweir         throw(RuntimeException)
624cdf0e10cSrcweir {
625cdf0e10cSrcweir     SearchResult aRet;
626cdf0e10cSrcweir     aRet.subRegExpressions = 0;
627cdf0e10cSrcweir 
628cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
629cdf0e10cSrcweir 
630cdf0e10cSrcweir     OUString aStr( searchStr );
631cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
632cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
633cdf0e10cSrcweir     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
634cdf0e10cSrcweir         return aRet;
635cdf0e10cSrcweir 
636cdf0e10cSrcweir     if (bUsePrimarySrchStr)
637cdf0e10cSrcweir       MakeBackwardTab();                      // create the jumptable
638cdf0e10cSrcweir     else
639cdf0e10cSrcweir       MakeBackwardTab2();
640cdf0e10cSrcweir 
641cdf0e10cSrcweir     if( nEnde == nSuchIdx )                 // end position for the search
642cdf0e10cSrcweir         nEnde = sSearchKey.getLength();
643cdf0e10cSrcweir     else
644cdf0e10cSrcweir         nEnde += sSearchKey.getLength();
645cdf0e10cSrcweir 
646cdf0e10cSrcweir     sal_Int32 nCmpIdx = startPos;          // start position for the search
647cdf0e10cSrcweir 
648cdf0e10cSrcweir     while (nCmpIdx >= nEnde)
649cdf0e10cSrcweir     {
650cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
651cdf0e10cSrcweir         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
652cdf0e10cSrcweir                         sSearchKey.getLength() )) && (!checkCTLEnd ||
653cdf0e10cSrcweir                     isCellStart( aStr, nCmpIdx)))
654cdf0e10cSrcweir         {
655cdf0e10cSrcweir             nSuchIdx = 0;
656cdf0e10cSrcweir             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
657cdf0e10cSrcweir                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
658cdf0e10cSrcweir                 nSuchIdx++;
659cdf0e10cSrcweir             if( nSuchIdx >= sSearchKey.getLength() )
660cdf0e10cSrcweir             {
661cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
662cdf0e10cSrcweir                 {
663cdf0e10cSrcweir                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
664cdf0e10cSrcweir                     bool bAtStart = !nFndStt;
665cdf0e10cSrcweir                     bool bAtEnd = nCmpIdx == startPos;
666cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
667cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || // begin of paragraph
668cdf0e10cSrcweir                         IsDelimiter( aStr, nFndStt-1 );
669cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
670cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
671cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
672cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
673cdf0e10cSrcweir                     if( ( bAtStart && bAtEnd ) ||           // 1
674cdf0e10cSrcweir                             ( bAtStart && bDelimBehind ) ||     // 2
675cdf0e10cSrcweir                             ( bAtEnd && bDelimBefore ) ||       // 3
676cdf0e10cSrcweir                             ( bDelimBefore && bDelimBehind ))   // 4
677cdf0e10cSrcweir                     {
678cdf0e10cSrcweir                         aRet.subRegExpressions = 1;
679cdf0e10cSrcweir                         aRet.startOffset.realloc( 1 );
680cdf0e10cSrcweir                         aRet.startOffset[ 0 ] = nCmpIdx;
681cdf0e10cSrcweir                         aRet.endOffset.realloc( 1 );
682cdf0e10cSrcweir                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
683cdf0e10cSrcweir                         return aRet;
684cdf0e10cSrcweir                     }
685cdf0e10cSrcweir                 }
686cdf0e10cSrcweir                 else
687cdf0e10cSrcweir                 {
688cdf0e10cSrcweir                     aRet.subRegExpressions = 1;
689cdf0e10cSrcweir                     aRet.startOffset.realloc( 1 );
690cdf0e10cSrcweir                     aRet.startOffset[ 0 ] = nCmpIdx;
691cdf0e10cSrcweir                     aRet.endOffset.realloc( 1 );
692cdf0e10cSrcweir                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
693cdf0e10cSrcweir                     return aRet;
694cdf0e10cSrcweir                 }
695cdf0e10cSrcweir             }
696cdf0e10cSrcweir         }
697cdf0e10cSrcweir         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
698cdf0e10cSrcweir         if( nCmpIdx < nSuchIdx )
699cdf0e10cSrcweir             return aRet;
700cdf0e10cSrcweir         nCmpIdx -= nSuchIdx;
701cdf0e10cSrcweir     }
702cdf0e10cSrcweir     return aRet;
703cdf0e10cSrcweir }
704cdf0e10cSrcweir 
7057f9f793fSHerbert Dürr void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
7067f9f793fSHerbert Dürr {
7077f9f793fSHerbert Dürr 	// select the transliterated pattern string
7087f9f793fSHerbert Dürr 	const OUString& rPatternStr =
709*e2630f2cSHerbert Dürr 		(rOptions.transliterateFlags & REGEX_TRANS_MASK) ? sSrchStr
7107f9f793fSHerbert Dürr 		: ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : rOptions.searchString);
7117f9f793fSHerbert Dürr 
7127c5e76a7SHerbert Dürr 	sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
7137f9f793fSHerbert Dürr 	// map com::sun::star::util::SearchFlags to ICU uregex.h flags
7147f9f793fSHerbert Dürr 	// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
7157f9f793fSHerbert Dürr 	// REG_NEWLINE is neither properly defined nor used anywhere => not implemented
7167f9f793fSHerbert Dürr 	// REG_NOSUB is not used anywhere => not implemented
7177f9f793fSHerbert Dürr 	// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
7187f9f793fSHerbert Dürr 	// LEV_RELAXED is only used for SearchAlgorithm==Approximate
71922c9c6f7SHerbert Dürr 	// Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
72022c9c6f7SHerbert Dürr 	// probably because the transliteration flag IGNORE_CASE handles it as well.
72122c9c6f7SHerbert Dürr 	if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
72222c9c6f7SHerbert Dürr 	||  (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
7237f9f793fSHerbert Dürr 		nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
7247f9f793fSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
7257f9f793fSHerbert Dürr 	// assumption: transliteration didn't mangle regexp control chars
72603c97e34SYuri Dario 	IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
727ee131020SHerbert Dürr #ifndef DISABLE_WORDBOUND_EMULATION
7287f9f793fSHerbert Dürr 	// for conveniance specific syntax elements of the old regex engine are emulated
7296a7366bcSHerbert Dürr 	// - by replacing \< with "word-break followed by a look-ahead word-char"
7306a7366bcSHerbert Dürr 	static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
7316a7366bcSHerbert Dürr 	static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
7326a7366bcSHerbert Dürr 	static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
7336a7366bcSHerbert Dürr 	aChevronMatcherB.reset( aIcuSearchPatStr);
7346a7366bcSHerbert Dürr 	aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
7356a7366bcSHerbert Dürr 	aChevronMatcherB.reset();
7366a7366bcSHerbert Dürr 	// - by replacing \> with "look-behind word-char followed by a word-break"
7376a7366bcSHerbert Dürr 	static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
7386a7366bcSHerbert Dürr 	static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
7396a7366bcSHerbert Dürr 	static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
7406a7366bcSHerbert Dürr 	aChevronMatcherE.reset( aIcuSearchPatStr);
7416a7366bcSHerbert Dürr 	aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
7426a7366bcSHerbert Dürr 	aChevronMatcherE.reset();
7437f9f793fSHerbert Dürr #endif
7447f9f793fSHerbert Dürr 	pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
7457f9f793fSHerbert Dürr 	if( nIcuErr)
7467f9f793fSHerbert Dürr 		{ delete pRegexMatcher; pRegexMatcher = NULL;}
7477f9f793fSHerbert Dürr }
7487f9f793fSHerbert Dürr 
749cdf0e10cSrcweir //---------------------------------------------------------------------------
750cdf0e10cSrcweir 
751cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
752cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
753cdf0e10cSrcweir             throw(RuntimeException)
754cdf0e10cSrcweir {
755cc450e3aSHerbert Dürr 	SearchResult aRet;
756cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
757cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
758cc450e3aSHerbert Dürr 		return aRet;
75919ee98b9SHerbert Dürr 
760cc450e3aSHerbert Dürr 	if( endPos > searchStr.getLength())
761cc450e3aSHerbert Dürr 		endPos = searchStr.getLength();
762cc450e3aSHerbert Dürr 
763cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
764cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
76519716b0aSHerbert Dürr 	const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), endPos);
766cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
76716b8677bSHerbert Dürr 	// search until there is a valid match
76816b8677bSHerbert Dürr 	for(;;)
76916b8677bSHerbert Dürr 	{
77016b8677bSHerbert Dürr 		if( !pRegexMatcher->find( startPos, nIcuErr))
77116b8677bSHerbert Dürr 			return aRet;
77216b8677bSHerbert Dürr 
77316b8677bSHerbert Dürr 		// #i118887# ignore zero-length matches e.g. "a*" in "bc"
77416b8677bSHerbert Dürr 		int nStartOfs = pRegexMatcher->start( nIcuErr);
77516b8677bSHerbert Dürr 		int nEndOfs = pRegexMatcher->end( nIcuErr);
77616b8677bSHerbert Dürr 		if( nStartOfs < nEndOfs)
77716b8677bSHerbert Dürr 			break;
77816b8677bSHerbert Dürr 		// try at next position if there was a zero-length match
77916b8677bSHerbert Dürr 		if( ++startPos >= endPos)
78016b8677bSHerbert Dürr 			return aRet;
78116b8677bSHerbert Dürr 	}
782cc450e3aSHerbert Dürr 
78316b8677bSHerbert Dürr 	// extract the result of the search
7840c7ce76dSHerbert Dürr 	const int nGroupCount = pRegexMatcher->groupCount();
7850c7ce76dSHerbert Dürr 	aRet.subRegExpressions = nGroupCount + 1;
786cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
787cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
788cc450e3aSHerbert Dürr 	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
789cc450e3aSHerbert Dürr 	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
7900c7ce76dSHerbert Dürr 	for( int i = 1; i <= nGroupCount; ++i) {
7910c7ce76dSHerbert Dürr 		aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
7920c7ce76dSHerbert Dürr 		aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
7930c7ce76dSHerbert Dürr 	}
794cc450e3aSHerbert Dürr 
795cc450e3aSHerbert Dürr 	return aRet;
796cdf0e10cSrcweir }
797cdf0e10cSrcweir 
798cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
799cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
800cdf0e10cSrcweir             throw(RuntimeException)
801cdf0e10cSrcweir {
802cc450e3aSHerbert Dürr 	// NOTE: for backwards search callers provide startPos/endPos inverted!
803cc450e3aSHerbert Dürr 	SearchResult aRet;
804cc450e3aSHerbert Dürr 	aRet.subRegExpressions = 0;
805cc450e3aSHerbert Dürr 	if( !pRegexMatcher)
806cc450e3aSHerbert Dürr 		return aRet;
80719ee98b9SHerbert Dürr 
808cc450e3aSHerbert Dürr 	if( startPos > searchStr.getLength())
809cc450e3aSHerbert Dürr 		startPos = searchStr.getLength();
810cc450e3aSHerbert Dürr 
811cc450e3aSHerbert Dürr 	// use the ICU RegexMatcher to find the matches
812cc450e3aSHerbert Dürr 	// TODO: use ICU's backward searching once it becomes available
8130c7ce76dSHerbert Dürr 	//       as its replacement using forward search is not as good as the real thing
814cc450e3aSHerbert Dürr 	UErrorCode nIcuErr = U_ZERO_ERROR;
81503c97e34SYuri Dario 	const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), startPos);
816cc450e3aSHerbert Dürr 	pRegexMatcher->reset( aSearchTargetStr);
817cc450e3aSHerbert Dürr 	if( !pRegexMatcher->find( endPos, nIcuErr))
818cc450e3aSHerbert Dürr 		return aRet;
819cc450e3aSHerbert Dürr 
8200c7ce76dSHerbert Dürr 	// find the last match
8210c7ce76dSHerbert Dürr 	int nLastPos = 0;
8222c1e93e7SHerbert Dürr 	int nFoundEnd = 0;
8230c7ce76dSHerbert Dürr 	do {
8240c7ce76dSHerbert Dürr 		nLastPos = pRegexMatcher->start( nIcuErr);
8252c1e93e7SHerbert Dürr 		nFoundEnd = pRegexMatcher->end( nIcuErr);
8262c1e93e7SHerbert Dürr 		if( nFoundEnd >= startPos)
8272c1e93e7SHerbert Dürr 			break;
8282c1e93e7SHerbert Dürr 		if( nFoundEnd == nLastPos)
8292c1e93e7SHerbert Dürr 			++nFoundEnd;
8302c1e93e7SHerbert Dürr 	} while( pRegexMatcher->find( nFoundEnd, nIcuErr));
8310c7ce76dSHerbert Dürr 
8320c7ce76dSHerbert Dürr 	// find last match again to get its details
8330c7ce76dSHerbert Dürr 	pRegexMatcher->find( nLastPos, nIcuErr);
8340c7ce76dSHerbert Dürr 
8350c7ce76dSHerbert Dürr 	// fill in the details of the last match
8360c7ce76dSHerbert Dürr 	const int nGroupCount = pRegexMatcher->groupCount();
8370c7ce76dSHerbert Dürr 	aRet.subRegExpressions = nGroupCount + 1;
838cc450e3aSHerbert Dürr 	aRet.startOffset.realloc( aRet.subRegExpressions);
839cc450e3aSHerbert Dürr 	aRet.endOffset.realloc( aRet.subRegExpressions);
8400c7ce76dSHerbert Dürr 	// NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
8410c7ce76dSHerbert Dürr 	aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
8420c7ce76dSHerbert Dürr 	aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
8430c7ce76dSHerbert Dürr 	for( int i = 1; i <= nGroupCount; ++i) {
8440c7ce76dSHerbert Dürr 		aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
8450c7ce76dSHerbert Dürr 		aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
8460c7ce76dSHerbert Dürr 	}
847cc450e3aSHerbert Dürr 
848cc450e3aSHerbert Dürr 	return aRet;
849cdf0e10cSrcweir }
850cdf0e10cSrcweir 
851cc450e3aSHerbert Dürr //---------------------------------------------------------------------------
852cc450e3aSHerbert Dürr 
853cc450e3aSHerbert Dürr // search for words phonetically
854cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
855cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
856cdf0e10cSrcweir             throw(RuntimeException)
857cdf0e10cSrcweir {
858cdf0e10cSrcweir     SearchResult aRet;
859cdf0e10cSrcweir     aRet.subRegExpressions = 0;
860cdf0e10cSrcweir 
861cdf0e10cSrcweir     if( !xBreak.is() )
862cdf0e10cSrcweir         return aRet;
863cdf0e10cSrcweir 
864cdf0e10cSrcweir     OUString aWTemp( searchStr );
865cdf0e10cSrcweir 
866cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
867cdf0e10cSrcweir 
868cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
869cdf0e10cSrcweir             aSrchPara.Locale,
870cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
871cdf0e10cSrcweir 
872cdf0e10cSrcweir     do
873cdf0e10cSrcweir     {
874cdf0e10cSrcweir         if( aWBnd.startPos >= endPos )
875cdf0e10cSrcweir             break;
876cdf0e10cSrcweir         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
877cdf0e10cSrcweir         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
878cdf0e10cSrcweir 
879cdf0e10cSrcweir         if( nStt < nEnd &&
880cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
881cdf0e10cSrcweir         {
882cdf0e10cSrcweir             aRet.subRegExpressions = 1;
883cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
884cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nStt;
885cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
886cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nEnd;
887cdf0e10cSrcweir             break;
888cdf0e10cSrcweir         }
889cdf0e10cSrcweir 
890cdf0e10cSrcweir         nStt = nEnd - 1;
891cdf0e10cSrcweir         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
892cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
893cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos ||
894cdf0e10cSrcweir             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
895cdf0e10cSrcweir     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
896cdf0e10cSrcweir     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
897cdf0e10cSrcweir     // and nextWord() does also => don't loop forever.
898cdf0e10cSrcweir     return aRet;
899cdf0e10cSrcweir }
900cdf0e10cSrcweir 
901cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
902cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
903cdf0e10cSrcweir             throw(RuntimeException)
904cdf0e10cSrcweir {
905cdf0e10cSrcweir     SearchResult aRet;
906cdf0e10cSrcweir     aRet.subRegExpressions = 0;
907cdf0e10cSrcweir 
908cdf0e10cSrcweir     if( !xBreak.is() )
909cdf0e10cSrcweir         return aRet;
910cdf0e10cSrcweir 
911cdf0e10cSrcweir     OUString aWTemp( searchStr );
912cdf0e10cSrcweir 
913cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
914cdf0e10cSrcweir 
915cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
916cdf0e10cSrcweir             aSrchPara.Locale,
917cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
918cdf0e10cSrcweir 
919cdf0e10cSrcweir     do
920cdf0e10cSrcweir     {
921cdf0e10cSrcweir         if( aWBnd.endPos <= endPos )
922cdf0e10cSrcweir             break;
923cdf0e10cSrcweir         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
924cdf0e10cSrcweir         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
925cdf0e10cSrcweir 
926cdf0e10cSrcweir         if( nStt < nEnd &&
927cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
928cdf0e10cSrcweir         {
929cdf0e10cSrcweir             aRet.subRegExpressions = 1;
930cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
931cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nEnd;
932cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
933cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nStt;
934cdf0e10cSrcweir             break;
935cdf0e10cSrcweir         }
936cdf0e10cSrcweir         if( !nStt )
937cdf0e10cSrcweir             break;
938cdf0e10cSrcweir 
939cdf0e10cSrcweir         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
940cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
941cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
942cdf0e10cSrcweir     return aRet;
943cdf0e10cSrcweir }
944cdf0e10cSrcweir 
945cdf0e10cSrcweir 
946cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
947cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
948cdf0e10cSrcweir 
949cdf0e10cSrcweir static OUString getServiceName_Static()
950cdf0e10cSrcweir {
951cdf0e10cSrcweir     return OUString::createFromAscii( cSearchName );
952cdf0e10cSrcweir }
953cdf0e10cSrcweir 
954cdf0e10cSrcweir static OUString getImplementationName_Static()
955cdf0e10cSrcweir {
956cdf0e10cSrcweir     return OUString::createFromAscii( cSearchImpl );
957cdf0e10cSrcweir }
958cdf0e10cSrcweir 
959cdf0e10cSrcweir OUString SAL_CALL
960cdf0e10cSrcweir TextSearch::getImplementationName()
961cdf0e10cSrcweir                 throw( RuntimeException )
962cdf0e10cSrcweir {
963cdf0e10cSrcweir     return getImplementationName_Static();
964cdf0e10cSrcweir }
965cdf0e10cSrcweir 
966cdf0e10cSrcweir sal_Bool SAL_CALL
967cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName)
968cdf0e10cSrcweir                 throw( RuntimeException )
969cdf0e10cSrcweir {
970cdf0e10cSrcweir     return !rServiceName.compareToAscii( cSearchName );
971cdf0e10cSrcweir }
972cdf0e10cSrcweir 
973cdf0e10cSrcweir Sequence< OUString > SAL_CALL
974cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
975cdf0e10cSrcweir {
976cdf0e10cSrcweir     Sequence< OUString > aRet(1);
977cdf0e10cSrcweir     aRet[0] = getServiceName_Static();
978cdf0e10cSrcweir     return aRet;
979cdf0e10cSrcweir }
980cdf0e10cSrcweir 
981cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
982cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance(
983cdf0e10cSrcweir         const ::com::sun::star::uno::Reference<
984cdf0e10cSrcweir         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
985cdf0e10cSrcweir {
986cdf0e10cSrcweir     return ::com::sun::star::uno::Reference<
987cdf0e10cSrcweir         ::com::sun::star::uno::XInterface >(
988cdf0e10cSrcweir                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
989cdf0e10cSrcweir }
990cdf0e10cSrcweir 
991cdf0e10cSrcweir extern "C"
992cdf0e10cSrcweir {
993cdf0e10cSrcweir 
994cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment(
995cdf0e10cSrcweir         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
996cdf0e10cSrcweir {
997cdf0e10cSrcweir     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
998cdf0e10cSrcweir }
999cdf0e10cSrcweir 
1000cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
1001cdf0e10cSrcweir         void* _pServiceManager, void* /*_pRegistryKey*/ )
1002cdf0e10cSrcweir {
1003cdf0e10cSrcweir     void* pRet = NULL;
1004cdf0e10cSrcweir 
1005cdf0e10cSrcweir     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
1006cdf0e10cSrcweir         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
1007cdf0e10cSrcweir             ( _pServiceManager );
1008cdf0e10cSrcweir     ::com::sun::star::uno::Reference<
1009cdf0e10cSrcweir             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1010cdf0e10cSrcweir 
1011cdf0e10cSrcweir     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1012cdf0e10cSrcweir     {
1013cdf0e10cSrcweir         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
1014cdf0e10cSrcweir         aServiceNames[0] = getServiceName_Static();
1015cdf0e10cSrcweir         xFactory = ::cppu::createSingleFactory(
1016cdf0e10cSrcweir                 pServiceManager, getImplementationName_Static(),
1017cdf0e10cSrcweir                 &TextSearch_CreateInstance, aServiceNames );
1018cdf0e10cSrcweir     }
1019cdf0e10cSrcweir 
1020cdf0e10cSrcweir     if ( xFactory.is() )
1021cdf0e10cSrcweir     {
1022cdf0e10cSrcweir         xFactory->acquire();
1023cdf0e10cSrcweir         pRet = xFactory.get();
1024cdf0e10cSrcweir     }
1025cdf0e10cSrcweir 
1026cdf0e10cSrcweir     return pRet;
1027cdf0e10cSrcweir }
1028cdf0e10cSrcweir 
1029cdf0e10cSrcweir } // extern "C"
1030