1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
30*cdf0e10cSrcweir 
31*cdf0e10cSrcweir #include "textsearch.hxx"
32*cdf0e10cSrcweir #include "levdis.hxx"
33*cdf0e10cSrcweir #include <regexp/reclass.hxx>
34*cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp>
35*cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp>
36*cdf0e10cSrcweir #include <comphelper/processfactory.hxx>
37*cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
38*cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp>
39*cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp>
40*cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp>
41*cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
42*cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
43*cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp>
44*cdf0e10cSrcweir #include <cppuhelper/factory.hxx>
45*cdf0e10cSrcweir #include <cppuhelper/weak.hxx>
46*cdf0e10cSrcweir 
47*cdf0e10cSrcweir #ifdef _MSC_VER
48*cdf0e10cSrcweir // get rid of that dumb compiler warning
49*cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information
50*cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created
51*cdf0e10cSrcweir #pragma warning( disable: 4786 )
52*cdf0e10cSrcweir #endif
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir #include <string.h>
55*cdf0e10cSrcweir 
56*cdf0e10cSrcweir using namespace ::com::sun::star::util;
57*cdf0e10cSrcweir using namespace ::com::sun::star::uno;
58*cdf0e10cSrcweir using namespace ::com::sun::star::lang;
59*cdf0e10cSrcweir using namespace ::com::sun::star::i18n;
60*cdf0e10cSrcweir using namespace ::rtl;
61*cdf0e10cSrcweir 
62*cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP =
63*cdf0e10cSrcweir     TransliterationModules_ignoreBaFa_ja_JP |
64*cdf0e10cSrcweir     TransliterationModules_ignoreIterationMark_ja_JP |
65*cdf0e10cSrcweir     TransliterationModules_ignoreTiJi_ja_JP |
66*cdf0e10cSrcweir     TransliterationModules_ignoreHyuByu_ja_JP |
67*cdf0e10cSrcweir     TransliterationModules_ignoreSeZe_ja_JP |
68*cdf0e10cSrcweir     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
69*cdf0e10cSrcweir     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
70*cdf0e10cSrcweir     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
71*cdf0e10cSrcweir static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
72*cdf0e10cSrcweir static const sal_Int32 COMPLEX_TRANS_MASK =
73*cdf0e10cSrcweir     COMPLEX_TRANS_MASK_TMP |
74*cdf0e10cSrcweir     TransliterationModules_IGNORE_KANA |
75*cdf0e10cSrcweir     TransliterationModules_IGNORE_WIDTH;
76*cdf0e10cSrcweir     // Above 2 transliteration is simple but need to take effect in
77*cdf0e10cSrcweir     // complex transliteration
78*cdf0e10cSrcweir 
79*cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
80*cdf0e10cSrcweir         : xMSF( rxMSF )
81*cdf0e10cSrcweir         , pJumpTable( 0 )
82*cdf0e10cSrcweir         , pJumpTable2( 0 )
83*cdf0e10cSrcweir         , pRegExp( 0 )
84*cdf0e10cSrcweir         , pWLD( 0 )
85*cdf0e10cSrcweir {
86*cdf0e10cSrcweir     SearchOptions aOpt;
87*cdf0e10cSrcweir     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
88*cdf0e10cSrcweir     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
89*cdf0e10cSrcweir     //aOpt.Locale = ???;
90*cdf0e10cSrcweir     setOptions( aOpt );
91*cdf0e10cSrcweir }
92*cdf0e10cSrcweir 
93*cdf0e10cSrcweir TextSearch::~TextSearch()
94*cdf0e10cSrcweir {
95*cdf0e10cSrcweir     delete pRegExp;
96*cdf0e10cSrcweir     delete pWLD;
97*cdf0e10cSrcweir     delete pJumpTable;
98*cdf0e10cSrcweir     delete pJumpTable2;
99*cdf0e10cSrcweir }
100*cdf0e10cSrcweir 
101*cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
102*cdf0e10cSrcweir {
103*cdf0e10cSrcweir     aSrchPara = rOptions;
104*cdf0e10cSrcweir 
105*cdf0e10cSrcweir     delete pRegExp, pRegExp = 0;
106*cdf0e10cSrcweir     delete pWLD, pWLD = 0;
107*cdf0e10cSrcweir     delete pJumpTable, pJumpTable = 0;
108*cdf0e10cSrcweir     delete pJumpTable2, pJumpTable2 = 0;
109*cdf0e10cSrcweir 
110*cdf0e10cSrcweir     // Create Transliteration class
111*cdf0e10cSrcweir     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
112*cdf0e10cSrcweir     {
113*cdf0e10cSrcweir         if( !xTranslit.is() )
114*cdf0e10cSrcweir         {
115*cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
116*cdf0e10cSrcweir                     OUString::createFromAscii(
117*cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
118*cdf0e10cSrcweir             if ( xI.is() )
119*cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
120*cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
121*cdf0e10cSrcweir                     >>= xTranslit;
122*cdf0e10cSrcweir         }
123*cdf0e10cSrcweir         // Load transliteration module
124*cdf0e10cSrcweir         if( xTranslit.is() )
125*cdf0e10cSrcweir             xTranslit->loadModule(
126*cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
127*cdf0e10cSrcweir                     aSrchPara.Locale);
128*cdf0e10cSrcweir     }
129*cdf0e10cSrcweir     else if( xTranslit.is() )
130*cdf0e10cSrcweir         xTranslit = 0;
131*cdf0e10cSrcweir 
132*cdf0e10cSrcweir     // Create Transliteration for 2<->1, 2<->2 transliteration
133*cdf0e10cSrcweir     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
134*cdf0e10cSrcweir     {
135*cdf0e10cSrcweir         if( !xTranslit2.is() )
136*cdf0e10cSrcweir         {
137*cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
138*cdf0e10cSrcweir                     OUString::createFromAscii(
139*cdf0e10cSrcweir                         "com.sun.star.i18n.Transliteration"));
140*cdf0e10cSrcweir             if ( xI.is() )
141*cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
142*cdf0e10cSrcweir                             (const Reference< XExtendedTransliteration >*)0))
143*cdf0e10cSrcweir                     >>= xTranslit2;
144*cdf0e10cSrcweir         }
145*cdf0e10cSrcweir         // Load transliteration module
146*cdf0e10cSrcweir         if( xTranslit2.is() )
147*cdf0e10cSrcweir             xTranslit2->loadModule(
148*cdf0e10cSrcweir                     (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
149*cdf0e10cSrcweir                     aSrchPara.Locale);
150*cdf0e10cSrcweir     }
151*cdf0e10cSrcweir 
152*cdf0e10cSrcweir     if ( !xBreak.is() )
153*cdf0e10cSrcweir     {
154*cdf0e10cSrcweir         Reference < XInterface > xI = xMSF->createInstance(
155*cdf0e10cSrcweir                 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
156*cdf0e10cSrcweir         if( xI.is() )
157*cdf0e10cSrcweir             xI->queryInterface( ::getCppuType(
158*cdf0e10cSrcweir                         (const Reference< XBreakIterator >*)0))
159*cdf0e10cSrcweir                 >>= xBreak;
160*cdf0e10cSrcweir     }
161*cdf0e10cSrcweir 
162*cdf0e10cSrcweir     sSrchStr = aSrchPara.searchString;
163*cdf0e10cSrcweir 
164*cdf0e10cSrcweir     // use transliteration here, but only if not RegEx, which does it different
165*cdf0e10cSrcweir     if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
166*cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
167*cdf0e10cSrcweir         sSrchStr = xTranslit->transliterateString2String(
168*cdf0e10cSrcweir                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir     if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
171*cdf0e10cSrcweir 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
172*cdf0e10cSrcweir 	sSrchStr2 = xTranslit2->transliterateString2String(
173*cdf0e10cSrcweir 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
174*cdf0e10cSrcweir 
175*cdf0e10cSrcweir     // When start or end of search string is a complex script type, we need to
176*cdf0e10cSrcweir     // make sure the result boundary is not located in the middle of cell.
177*cdf0e10cSrcweir     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
178*cdf0e10cSrcweir                 ScriptType::COMPLEX));
179*cdf0e10cSrcweir     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
180*cdf0e10cSrcweir                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
181*cdf0e10cSrcweir 
182*cdf0e10cSrcweir     if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
183*cdf0e10cSrcweir     {
184*cdf0e10cSrcweir         fnForward = &TextSearch::RESrchFrwrd;
185*cdf0e10cSrcweir         fnBackward = &TextSearch::RESrchBkwrd;
186*cdf0e10cSrcweir 
187*cdf0e10cSrcweir         pRegExp = new Regexpr( aSrchPara, xTranslit );
188*cdf0e10cSrcweir     }
189*cdf0e10cSrcweir     else
190*cdf0e10cSrcweir     {
191*cdf0e10cSrcweir         if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
192*cdf0e10cSrcweir         {
193*cdf0e10cSrcweir             fnForward = &TextSearch::ApproxSrchFrwrd;
194*cdf0e10cSrcweir             fnBackward = &TextSearch::ApproxSrchBkwrd;
195*cdf0e10cSrcweir 
196*cdf0e10cSrcweir             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
197*cdf0e10cSrcweir                     aSrchPara.insertedChars, aSrchPara.deletedChars,
198*cdf0e10cSrcweir                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
199*cdf0e10cSrcweir 
200*cdf0e10cSrcweir             nLimit = pWLD->GetLimit();
201*cdf0e10cSrcweir         }
202*cdf0e10cSrcweir         else
203*cdf0e10cSrcweir         {
204*cdf0e10cSrcweir             fnForward = &TextSearch::NSrchFrwrd;
205*cdf0e10cSrcweir             fnBackward = &TextSearch::NSrchBkwrd;
206*cdf0e10cSrcweir         }
207*cdf0e10cSrcweir     }
208*cdf0e10cSrcweir }
209*cdf0e10cSrcweir 
210*cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
211*cdf0e10cSrcweir {
212*cdf0e10cSrcweir     sal_Int32 nRet = 0, nEnd = rOff.getLength();
213*cdf0e10cSrcweir     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
214*cdf0e10cSrcweir     return nRet;
215*cdf0e10cSrcweir }
216*cdf0e10cSrcweir 
217*cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
218*cdf0e10cSrcweir         throw( RuntimeException )
219*cdf0e10cSrcweir {
220*cdf0e10cSrcweir     sal_Int32 nDone;
221*cdf0e10cSrcweir     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
222*cdf0e10cSrcweir             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
223*cdf0e10cSrcweir }
224*cdf0e10cSrcweir 
225*cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
226*cdf0e10cSrcweir         throw( RuntimeException )
227*cdf0e10cSrcweir {
228*cdf0e10cSrcweir     SearchResult sres;
229*cdf0e10cSrcweir 
230*cdf0e10cSrcweir     OUString in_str(searchStr);
231*cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
232*cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
233*cdf0e10cSrcweir 
234*cdf0e10cSrcweir     bUsePrimarySrchStr = true;
235*cdf0e10cSrcweir 
236*cdf0e10cSrcweir     if ( xTranslit.is() )
237*cdf0e10cSrcweir     {
238*cdf0e10cSrcweir         // apply normal transliteration (1<->1, 1<->0)
239*cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
240*cdf0e10cSrcweir         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
241*cdf0e10cSrcweir 
242*cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
243*cdf0e10cSrcweir         if( startPos )
244*cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
245*cdf0e10cSrcweir 
246*cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
247*cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
248*cdf0e10cSrcweir         else
249*cdf0e10cSrcweir             newEndPos = in_str.getLength();
250*cdf0e10cSrcweir 
251*cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
252*cdf0e10cSrcweir 
253*cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
254*cdf0e10cSrcweir         {
255*cdf0e10cSrcweir             if (sres.startOffset[k])
256*cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k]];
257*cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
258*cdf0e10cSrcweir             //               the position of the next character - return the
259*cdf0e10cSrcweir             //               next position behind the last found character!
260*cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
261*cdf0e10cSrcweir             if (sres.endOffset[k])
262*cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
263*cdf0e10cSrcweir         }
264*cdf0e10cSrcweir     }
265*cdf0e10cSrcweir     else
266*cdf0e10cSrcweir     {
267*cdf0e10cSrcweir         sres = (this->*fnForward)( in_str, startPos, endPos );
268*cdf0e10cSrcweir     }
269*cdf0e10cSrcweir 
270*cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
271*cdf0e10cSrcweir     {
272*cdf0e10cSrcweir         SearchResult sres2;
273*cdf0e10cSrcweir 
274*cdf0e10cSrcweir 	in_str = OUString(searchStr);
275*cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
276*cdf0e10cSrcweir 
277*cdf0e10cSrcweir         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
278*cdf0e10cSrcweir 
279*cdf0e10cSrcweir         if( startPos )
280*cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
281*cdf0e10cSrcweir 
282*cdf0e10cSrcweir         if( endPos < searchStr.getLength() )
283*cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
284*cdf0e10cSrcweir         else
285*cdf0e10cSrcweir             endPos = in_str.getLength();
286*cdf0e10cSrcweir 
287*cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
288*cdf0e10cSrcweir         sres2 = (this->*fnForward)( in_str, startPos, endPos );
289*cdf0e10cSrcweir 
290*cdf0e10cSrcweir         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
291*cdf0e10cSrcweir         {
292*cdf0e10cSrcweir             if (sres2.startOffset[k])
293*cdf0e10cSrcweir 	      sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
294*cdf0e10cSrcweir             if (sres2.endOffset[k])
295*cdf0e10cSrcweir 	      sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
296*cdf0e10cSrcweir         }
297*cdf0e10cSrcweir 
298*cdf0e10cSrcweir 	// pick first and long one
299*cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0)
300*cdf0e10cSrcweir 	    return sres2;
301*cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1)
302*cdf0e10cSrcweir 	{
303*cdf0e10cSrcweir 	    if ( sres.startOffset[0] > sres2.startOffset[0])
304*cdf0e10cSrcweir 	        return sres2;
305*cdf0e10cSrcweir 	    else if ( sres.startOffset[0] == sres2.startOffset[0] &&
306*cdf0e10cSrcweir 	        sres.endOffset[0] < sres2.endOffset[0])
307*cdf0e10cSrcweir 	        return sres2;
308*cdf0e10cSrcweir 	}
309*cdf0e10cSrcweir     }
310*cdf0e10cSrcweir 
311*cdf0e10cSrcweir     return sres;
312*cdf0e10cSrcweir }
313*cdf0e10cSrcweir 
314*cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
315*cdf0e10cSrcweir         throw(RuntimeException)
316*cdf0e10cSrcweir {
317*cdf0e10cSrcweir     SearchResult sres;
318*cdf0e10cSrcweir 
319*cdf0e10cSrcweir     OUString in_str(searchStr);
320*cdf0e10cSrcweir     sal_Int32 newStartPos = startPos;
321*cdf0e10cSrcweir     sal_Int32 newEndPos = endPos;
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir     bUsePrimarySrchStr = true;
324*cdf0e10cSrcweir 
325*cdf0e10cSrcweir     if ( xTranslit.is() )
326*cdf0e10cSrcweir     {
327*cdf0e10cSrcweir         // apply only simple 1<->1 transliteration here
328*cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
329*cdf0e10cSrcweir 	in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
330*cdf0e10cSrcweir 
331*cdf0e10cSrcweir         // JP 20.6.2001: also the start and end positions must be corrected!
332*cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
333*cdf0e10cSrcweir             newStartPos = FindPosInSeq_Impl( offset, startPos );
334*cdf0e10cSrcweir 	else
335*cdf0e10cSrcweir 	    newStartPos = in_str.getLength();
336*cdf0e10cSrcweir 
337*cdf0e10cSrcweir         if( endPos )
338*cdf0e10cSrcweir 	    newEndPos = FindPosInSeq_Impl( offset, endPos );
339*cdf0e10cSrcweir 
340*cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
341*cdf0e10cSrcweir 
342*cdf0e10cSrcweir         for ( int k = 0; k < sres.startOffset.getLength(); k++ )
343*cdf0e10cSrcweir         {
344*cdf0e10cSrcweir             if (sres.startOffset[k])
345*cdf0e10cSrcweir 	      sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
346*cdf0e10cSrcweir             // JP 20.6.2001: end is ever exclusive and then don't return
347*cdf0e10cSrcweir             //               the position of the next character - return the
348*cdf0e10cSrcweir             //               next position behind the last found character!
349*cdf0e10cSrcweir             //               "a b c" find "b" must return 2,3 and not 2,4!!!
350*cdf0e10cSrcweir             if (sres.endOffset[k])
351*cdf0e10cSrcweir 	      sres.endOffset[k] = offset[sres.endOffset[k]];
352*cdf0e10cSrcweir         }
353*cdf0e10cSrcweir     }
354*cdf0e10cSrcweir     else
355*cdf0e10cSrcweir     {
356*cdf0e10cSrcweir         sres = (this->*fnBackward)( in_str, startPos, endPos );
357*cdf0e10cSrcweir     }
358*cdf0e10cSrcweir 
359*cdf0e10cSrcweir     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
360*cdf0e10cSrcweir     {
361*cdf0e10cSrcweir 	SearchResult sres2;
362*cdf0e10cSrcweir 
363*cdf0e10cSrcweir 	in_str = OUString(searchStr);
364*cdf0e10cSrcweir         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
365*cdf0e10cSrcweir 
366*cdf0e10cSrcweir         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
367*cdf0e10cSrcweir 
368*cdf0e10cSrcweir         if( startPos < searchStr.getLength() )
369*cdf0e10cSrcweir             startPos = FindPosInSeq_Impl( offset, startPos );
370*cdf0e10cSrcweir         else
371*cdf0e10cSrcweir             startPos = in_str.getLength();
372*cdf0e10cSrcweir 
373*cdf0e10cSrcweir         if( endPos )
374*cdf0e10cSrcweir             endPos = FindPosInSeq_Impl( offset, endPos );
375*cdf0e10cSrcweir 
376*cdf0e10cSrcweir 	bUsePrimarySrchStr = false;
377*cdf0e10cSrcweir 	sres2 = (this->*fnBackward)( in_str, startPos, endPos );
378*cdf0e10cSrcweir 
379*cdf0e10cSrcweir         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
380*cdf0e10cSrcweir         {
381*cdf0e10cSrcweir             if (sres2.startOffset[k])
382*cdf0e10cSrcweir                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
383*cdf0e10cSrcweir             if (sres2.endOffset[k])
384*cdf0e10cSrcweir                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
385*cdf0e10cSrcweir         }
386*cdf0e10cSrcweir 
387*cdf0e10cSrcweir 	// pick last and long one
388*cdf0e10cSrcweir 	if ( sres.subRegExpressions == 0 )
389*cdf0e10cSrcweir 	    return sres2;
390*cdf0e10cSrcweir 	if ( sres2.subRegExpressions == 1 )
391*cdf0e10cSrcweir 	{
392*cdf0e10cSrcweir 	    if ( sres.startOffset[0] < sres2.startOffset[0] )
393*cdf0e10cSrcweir 	        return sres2;
394*cdf0e10cSrcweir 	    if ( sres.startOffset[0] == sres2.startOffset[0] &&
395*cdf0e10cSrcweir 		sres.endOffset[0] > sres2.endOffset[0] )
396*cdf0e10cSrcweir 	        return sres2;
397*cdf0e10cSrcweir 	}
398*cdf0e10cSrcweir     }
399*cdf0e10cSrcweir 
400*cdf0e10cSrcweir     return sres;
401*cdf0e10cSrcweir }
402*cdf0e10cSrcweir 
403*cdf0e10cSrcweir 
404*cdf0e10cSrcweir 
405*cdf0e10cSrcweir //--------------- die Wort-Trennner ----------------------------------
406*cdf0e10cSrcweir 
407*cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
408*cdf0e10cSrcweir {
409*cdf0e10cSrcweir     bool bRet = 1;
410*cdf0e10cSrcweir     if( '\x7f' != rStr[nPos])
411*cdf0e10cSrcweir     {
412*cdf0e10cSrcweir         if ( !xCharClass.is() )
413*cdf0e10cSrcweir         {
414*cdf0e10cSrcweir             Reference < XInterface > xI = xMSF->createInstance(
415*cdf0e10cSrcweir                     OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
416*cdf0e10cSrcweir             if( xI.is() )
417*cdf0e10cSrcweir                 xI->queryInterface( ::getCppuType(
418*cdf0e10cSrcweir                             (const Reference< XCharacterClassification >*)0))
419*cdf0e10cSrcweir                     >>= xCharClass;
420*cdf0e10cSrcweir         }
421*cdf0e10cSrcweir         if ( xCharClass.is() )
422*cdf0e10cSrcweir         {
423*cdf0e10cSrcweir             sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
424*cdf0e10cSrcweir                     aSrchPara.Locale );
425*cdf0e10cSrcweir             if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
426*cdf0e10cSrcweir                             KCharacterType::LETTER ) & nCType ) )
427*cdf0e10cSrcweir                 bRet = 0;
428*cdf0e10cSrcweir         }
429*cdf0e10cSrcweir     }
430*cdf0e10cSrcweir     return bRet;
431*cdf0e10cSrcweir }
432*cdf0e10cSrcweir 
433*cdf0e10cSrcweir 
434*cdf0e10cSrcweir 
435*cdf0e10cSrcweir // --------- methods for the kind of boyer-morre search ------------------
436*cdf0e10cSrcweir 
437*cdf0e10cSrcweir 
438*cdf0e10cSrcweir void TextSearch::MakeForwardTab()
439*cdf0e10cSrcweir {
440*cdf0e10cSrcweir     // create the jumptable for the search text
441*cdf0e10cSrcweir     if( pJumpTable )
442*cdf0e10cSrcweir     {
443*cdf0e10cSrcweir         if( bIsForwardTab )
444*cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
445*cdf0e10cSrcweir         delete pJumpTable;
446*cdf0e10cSrcweir     }
447*cdf0e10cSrcweir     bIsForwardTab = true;
448*cdf0e10cSrcweir 
449*cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
450*cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
451*cdf0e10cSrcweir 
452*cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
453*cdf0e10cSrcweir     {
454*cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
455*cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
456*cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
457*cdf0e10cSrcweir 
458*cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
459*cdf0e10cSrcweir             pJumpTable->insert( aEntry );
460*cdf0e10cSrcweir         if ( !aPair.second )
461*cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
462*cdf0e10cSrcweir     }
463*cdf0e10cSrcweir }
464*cdf0e10cSrcweir 
465*cdf0e10cSrcweir void TextSearch::MakeForwardTab2()
466*cdf0e10cSrcweir {
467*cdf0e10cSrcweir     // create the jumptable for the search text
468*cdf0e10cSrcweir     if( pJumpTable2 )
469*cdf0e10cSrcweir     {
470*cdf0e10cSrcweir         if( bIsForwardTab )
471*cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
472*cdf0e10cSrcweir         delete pJumpTable2;
473*cdf0e10cSrcweir     }
474*cdf0e10cSrcweir     bIsForwardTab = true;
475*cdf0e10cSrcweir 
476*cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
477*cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
478*cdf0e10cSrcweir 
479*cdf0e10cSrcweir     for( n = 0; n < nLen - 1; ++n )
480*cdf0e10cSrcweir     {
481*cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
482*cdf0e10cSrcweir         sal_Int32 nDiff = nLen - n - 1;
483*cdf0e10cSrcweir 
484*cdf0e10cSrcweir 	TextSearchJumpTable::value_type aEntry( cCh, nDiff );
485*cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
486*cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
487*cdf0e10cSrcweir         if ( !aPair.second )
488*cdf0e10cSrcweir             (*(aPair.first)).second = nDiff;
489*cdf0e10cSrcweir     }
490*cdf0e10cSrcweir }
491*cdf0e10cSrcweir 
492*cdf0e10cSrcweir void TextSearch::MakeBackwardTab()
493*cdf0e10cSrcweir {
494*cdf0e10cSrcweir     // create the jumptable for the search text
495*cdf0e10cSrcweir     if( pJumpTable )
496*cdf0e10cSrcweir     {
497*cdf0e10cSrcweir         if( !bIsForwardTab )
498*cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
499*cdf0e10cSrcweir         delete pJumpTable;
500*cdf0e10cSrcweir     }
501*cdf0e10cSrcweir     bIsForwardTab = false;
502*cdf0e10cSrcweir 
503*cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr.getLength();
504*cdf0e10cSrcweir     pJumpTable = new TextSearchJumpTable;
505*cdf0e10cSrcweir 
506*cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
507*cdf0e10cSrcweir     {
508*cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr[n];
509*cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
510*cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
511*cdf0e10cSrcweir             pJumpTable->insert( aEntry );
512*cdf0e10cSrcweir         if ( !aPair.second )
513*cdf0e10cSrcweir             (*(aPair.first)).second = n;
514*cdf0e10cSrcweir     }
515*cdf0e10cSrcweir }
516*cdf0e10cSrcweir 
517*cdf0e10cSrcweir void TextSearch::MakeBackwardTab2()
518*cdf0e10cSrcweir {
519*cdf0e10cSrcweir     // create the jumptable for the search text
520*cdf0e10cSrcweir     if( pJumpTable2 )
521*cdf0e10cSrcweir     {
522*cdf0e10cSrcweir         if( !bIsForwardTab )
523*cdf0e10cSrcweir             return ;                                        // the jumpTable is ok
524*cdf0e10cSrcweir         delete pJumpTable2;
525*cdf0e10cSrcweir     }
526*cdf0e10cSrcweir     bIsForwardTab = false;
527*cdf0e10cSrcweir 
528*cdf0e10cSrcweir     sal_Int32 n, nLen = sSrchStr2.getLength();
529*cdf0e10cSrcweir     pJumpTable2 = new TextSearchJumpTable;
530*cdf0e10cSrcweir 
531*cdf0e10cSrcweir     for( n = nLen-1; n > 0; --n )
532*cdf0e10cSrcweir     {
533*cdf0e10cSrcweir         sal_Unicode cCh = sSrchStr2[n];
534*cdf0e10cSrcweir         TextSearchJumpTable::value_type aEntry( cCh, n );
535*cdf0e10cSrcweir         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
536*cdf0e10cSrcweir             pJumpTable2->insert( aEntry );
537*cdf0e10cSrcweir         if ( !aPair.second )
538*cdf0e10cSrcweir             (*(aPair.first)).second = n;
539*cdf0e10cSrcweir     }
540*cdf0e10cSrcweir }
541*cdf0e10cSrcweir 
542*cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
543*cdf0e10cSrcweir {
544*cdf0e10cSrcweir     TextSearchJumpTable *pJump;
545*cdf0e10cSrcweir     OUString sSearchKey;
546*cdf0e10cSrcweir 
547*cdf0e10cSrcweir     if ( bUsePrimarySrchStr ) {
548*cdf0e10cSrcweir       pJump = pJumpTable;
549*cdf0e10cSrcweir       sSearchKey = sSrchStr;
550*cdf0e10cSrcweir     } else {
551*cdf0e10cSrcweir       pJump = pJumpTable2;
552*cdf0e10cSrcweir       sSearchKey = sSrchStr2;
553*cdf0e10cSrcweir     }
554*cdf0e10cSrcweir 
555*cdf0e10cSrcweir     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
556*cdf0e10cSrcweir     if ( iLook == pJump->end() )
557*cdf0e10cSrcweir         return sSearchKey.getLength();
558*cdf0e10cSrcweir     return (*iLook).second;
559*cdf0e10cSrcweir }
560*cdf0e10cSrcweir 
561*cdf0e10cSrcweir 
562*cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
563*cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
564*cdf0e10cSrcweir         throw(RuntimeException)
565*cdf0e10cSrcweir {
566*cdf0e10cSrcweir     SearchResult aRet;
567*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
568*cdf0e10cSrcweir 
569*cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
570*cdf0e10cSrcweir 
571*cdf0e10cSrcweir     OUString aStr( searchStr );
572*cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
573*cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
574*cdf0e10cSrcweir     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
575*cdf0e10cSrcweir         return aRet;
576*cdf0e10cSrcweir 
577*cdf0e10cSrcweir 
578*cdf0e10cSrcweir     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
579*cdf0e10cSrcweir         return aRet;
580*cdf0e10cSrcweir 
581*cdf0e10cSrcweir     nEnde -= sSearchKey.getLength();
582*cdf0e10cSrcweir 
583*cdf0e10cSrcweir     if (bUsePrimarySrchStr)
584*cdf0e10cSrcweir       MakeForwardTab();                   // create the jumptable
585*cdf0e10cSrcweir     else
586*cdf0e10cSrcweir       MakeForwardTab2();
587*cdf0e10cSrcweir 
588*cdf0e10cSrcweir     for (sal_Int32 nCmpIdx = startPos; // start position for the search
589*cdf0e10cSrcweir             nCmpIdx <= nEnde;
590*cdf0e10cSrcweir             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
591*cdf0e10cSrcweir     {
592*cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
593*cdf0e10cSrcweir         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
594*cdf0e10cSrcweir                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
595*cdf0e10cSrcweir             continue;
596*cdf0e10cSrcweir 
597*cdf0e10cSrcweir         nSuchIdx = sSearchKey.getLength() - 1;
598*cdf0e10cSrcweir         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
599*cdf0e10cSrcweir         {
600*cdf0e10cSrcweir             if( nSuchIdx == 0 )
601*cdf0e10cSrcweir             {
602*cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
603*cdf0e10cSrcweir                 {
604*cdf0e10cSrcweir                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
605*cdf0e10cSrcweir                     bool bAtStart = !nCmpIdx;
606*cdf0e10cSrcweir                     bool bAtEnd = nFndEnd == endPos;
607*cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
608*cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
609*cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
610*cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
611*cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
612*cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
613*cdf0e10cSrcweir                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
614*cdf0e10cSrcweir                                 ( bAtStart && bDelimBehind ) ||     // 2
615*cdf0e10cSrcweir                                 ( bAtEnd && bDelimBefore ) ||       // 3
616*cdf0e10cSrcweir                                 ( bDelimBefore && bDelimBehind )))  // 4
617*cdf0e10cSrcweir                         break;
618*cdf0e10cSrcweir                 }
619*cdf0e10cSrcweir 
620*cdf0e10cSrcweir                 aRet.subRegExpressions = 1;
621*cdf0e10cSrcweir                 aRet.startOffset.realloc( 1 );
622*cdf0e10cSrcweir                 aRet.startOffset[ 0 ] = nCmpIdx;
623*cdf0e10cSrcweir                 aRet.endOffset.realloc( 1 );
624*cdf0e10cSrcweir                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
625*cdf0e10cSrcweir 
626*cdf0e10cSrcweir                 return aRet;
627*cdf0e10cSrcweir             }
628*cdf0e10cSrcweir             else
629*cdf0e10cSrcweir                 nSuchIdx--;
630*cdf0e10cSrcweir         }
631*cdf0e10cSrcweir     }
632*cdf0e10cSrcweir     return aRet;
633*cdf0e10cSrcweir }
634*cdf0e10cSrcweir 
635*cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
636*cdf0e10cSrcweir         throw(RuntimeException)
637*cdf0e10cSrcweir {
638*cdf0e10cSrcweir     SearchResult aRet;
639*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
640*cdf0e10cSrcweir 
641*cdf0e10cSrcweir     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
642*cdf0e10cSrcweir 
643*cdf0e10cSrcweir     OUString aStr( searchStr );
644*cdf0e10cSrcweir     sal_Int32 nSuchIdx = aStr.getLength();
645*cdf0e10cSrcweir     sal_Int32 nEnde = endPos;
646*cdf0e10cSrcweir     if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
647*cdf0e10cSrcweir         return aRet;
648*cdf0e10cSrcweir 
649*cdf0e10cSrcweir     if (bUsePrimarySrchStr)
650*cdf0e10cSrcweir       MakeBackwardTab();                      // create the jumptable
651*cdf0e10cSrcweir     else
652*cdf0e10cSrcweir       MakeBackwardTab2();
653*cdf0e10cSrcweir 
654*cdf0e10cSrcweir     if( nEnde == nSuchIdx )                 // end position for the search
655*cdf0e10cSrcweir         nEnde = sSearchKey.getLength();
656*cdf0e10cSrcweir     else
657*cdf0e10cSrcweir         nEnde += sSearchKey.getLength();
658*cdf0e10cSrcweir 
659*cdf0e10cSrcweir     sal_Int32 nCmpIdx = startPos;          // start position for the search
660*cdf0e10cSrcweir 
661*cdf0e10cSrcweir     while (nCmpIdx >= nEnde)
662*cdf0e10cSrcweir     {
663*cdf0e10cSrcweir         // if the match would be the completed cells, skip it.
664*cdf0e10cSrcweir         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
665*cdf0e10cSrcweir                         sSearchKey.getLength() )) && (!checkCTLEnd ||
666*cdf0e10cSrcweir                     isCellStart( aStr, nCmpIdx)))
667*cdf0e10cSrcweir         {
668*cdf0e10cSrcweir             nSuchIdx = 0;
669*cdf0e10cSrcweir             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
670*cdf0e10cSrcweir                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
671*cdf0e10cSrcweir                 nSuchIdx++;
672*cdf0e10cSrcweir             if( nSuchIdx >= sSearchKey.getLength() )
673*cdf0e10cSrcweir             {
674*cdf0e10cSrcweir                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
675*cdf0e10cSrcweir                 {
676*cdf0e10cSrcweir                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
677*cdf0e10cSrcweir                     bool bAtStart = !nFndStt;
678*cdf0e10cSrcweir                     bool bAtEnd = nCmpIdx == startPos;
679*cdf0e10cSrcweir                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
680*cdf0e10cSrcweir                     bool bDelimBefore = bAtStart || // begin of paragraph
681*cdf0e10cSrcweir                         IsDelimiter( aStr, nFndStt-1 );
682*cdf0e10cSrcweir                     //  *       1 -> only one word in the paragraph
683*cdf0e10cSrcweir                     //  *       2 -> at begin of paragraph
684*cdf0e10cSrcweir                     //  *       3 -> at end of paragraph
685*cdf0e10cSrcweir                     //  *       4 -> inside the paragraph
686*cdf0e10cSrcweir                     if( ( bAtStart && bAtEnd ) ||           // 1
687*cdf0e10cSrcweir                             ( bAtStart && bDelimBehind ) ||     // 2
688*cdf0e10cSrcweir                             ( bAtEnd && bDelimBefore ) ||       // 3
689*cdf0e10cSrcweir                             ( bDelimBefore && bDelimBehind ))   // 4
690*cdf0e10cSrcweir                     {
691*cdf0e10cSrcweir                         aRet.subRegExpressions = 1;
692*cdf0e10cSrcweir                         aRet.startOffset.realloc( 1 );
693*cdf0e10cSrcweir                         aRet.startOffset[ 0 ] = nCmpIdx;
694*cdf0e10cSrcweir                         aRet.endOffset.realloc( 1 );
695*cdf0e10cSrcweir                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
696*cdf0e10cSrcweir                         return aRet;
697*cdf0e10cSrcweir                     }
698*cdf0e10cSrcweir                 }
699*cdf0e10cSrcweir                 else
700*cdf0e10cSrcweir                 {
701*cdf0e10cSrcweir                     aRet.subRegExpressions = 1;
702*cdf0e10cSrcweir                     aRet.startOffset.realloc( 1 );
703*cdf0e10cSrcweir                     aRet.startOffset[ 0 ] = nCmpIdx;
704*cdf0e10cSrcweir                     aRet.endOffset.realloc( 1 );
705*cdf0e10cSrcweir                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
706*cdf0e10cSrcweir                     return aRet;
707*cdf0e10cSrcweir                 }
708*cdf0e10cSrcweir             }
709*cdf0e10cSrcweir         }
710*cdf0e10cSrcweir         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
711*cdf0e10cSrcweir         if( nCmpIdx < nSuchIdx )
712*cdf0e10cSrcweir             return aRet;
713*cdf0e10cSrcweir         nCmpIdx -= nSuchIdx;
714*cdf0e10cSrcweir     }
715*cdf0e10cSrcweir     return aRet;
716*cdf0e10cSrcweir }
717*cdf0e10cSrcweir 
718*cdf0e10cSrcweir 
719*cdf0e10cSrcweir 
720*cdf0e10cSrcweir //---------------------------------------------------------------------------
721*cdf0e10cSrcweir // ------- Methoden fuer die Suche ueber Regular-Expressions --------------
722*cdf0e10cSrcweir 
723*cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
724*cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
725*cdf0e10cSrcweir             throw(RuntimeException)
726*cdf0e10cSrcweir {
727*cdf0e10cSrcweir     SearchResult aRet;
728*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
729*cdf0e10cSrcweir     OUString aStr( searchStr );
730*cdf0e10cSrcweir 
731*cdf0e10cSrcweir     bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
732*cdf0e10cSrcweir                     SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
733*cdf0e10cSrcweir 
734*cdf0e10cSrcweir     pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
735*cdf0e10cSrcweir 
736*cdf0e10cSrcweir     struct re_registers regs;
737*cdf0e10cSrcweir 
738*cdf0e10cSrcweir     // Clear structure
739*cdf0e10cSrcweir     memset((void *)&regs, 0, sizeof(struct re_registers));
740*cdf0e10cSrcweir     if ( ! pRegExp->re_search(&regs, startPos) )
741*cdf0e10cSrcweir     {
742*cdf0e10cSrcweir         if( regs.num_of_match > 0 &&
743*cdf0e10cSrcweir                 (regs.start[0] != -1 && regs.end[0] != -1) )
744*cdf0e10cSrcweir         {
745*cdf0e10cSrcweir             aRet.startOffset.realloc(regs.num_of_match);
746*cdf0e10cSrcweir             aRet.endOffset.realloc(regs.num_of_match);
747*cdf0e10cSrcweir 
748*cdf0e10cSrcweir             sal_Int32 i = 0, j = 0;
749*cdf0e10cSrcweir             while( j < regs.num_of_match )
750*cdf0e10cSrcweir             {
751*cdf0e10cSrcweir                 if( regs.start[j] != -1 && regs.end[j] != -1 )
752*cdf0e10cSrcweir                 {
753*cdf0e10cSrcweir                     aRet.startOffset[i] = regs.start[j];
754*cdf0e10cSrcweir                     aRet.endOffset[i] = regs.end[j];
755*cdf0e10cSrcweir                     ++i;
756*cdf0e10cSrcweir                 }
757*cdf0e10cSrcweir                 ++j;
758*cdf0e10cSrcweir             }
759*cdf0e10cSrcweir             aRet.subRegExpressions = i;
760*cdf0e10cSrcweir         }
761*cdf0e10cSrcweir         if ( regs.num_regs > 0 )
762*cdf0e10cSrcweir         {
763*cdf0e10cSrcweir             if ( regs.start )
764*cdf0e10cSrcweir                 free(regs.start);
765*cdf0e10cSrcweir             if ( regs.end )
766*cdf0e10cSrcweir                 free(regs.end);
767*cdf0e10cSrcweir         }
768*cdf0e10cSrcweir     }
769*cdf0e10cSrcweir 
770*cdf0e10cSrcweir     return aRet;
771*cdf0e10cSrcweir }
772*cdf0e10cSrcweir 
773*cdf0e10cSrcweir /*
774*cdf0e10cSrcweir  * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
775*cdf0e10cSrcweir  */
776*cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
777*cdf0e10cSrcweir                                       sal_Int32 startPos, sal_Int32 endPos )
778*cdf0e10cSrcweir             throw(RuntimeException)
779*cdf0e10cSrcweir {
780*cdf0e10cSrcweir     SearchResult aRet;
781*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
782*cdf0e10cSrcweir     OUString aStr( searchStr );
783*cdf0e10cSrcweir 
784*cdf0e10cSrcweir     sal_Int32 nOffset = 0;
785*cdf0e10cSrcweir     sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
786*cdf0e10cSrcweir 
787*cdf0e10cSrcweir     bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
788*cdf0e10cSrcweir                     SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
789*cdf0e10cSrcweir 
790*cdf0e10cSrcweir     if( startPos )
791*cdf0e10cSrcweir         nOffset = startPos - 1;
792*cdf0e10cSrcweir 
793*cdf0e10cSrcweir     // search only in the subString
794*cdf0e10cSrcweir     if( bSearchInSel && nStrEnde )
795*cdf0e10cSrcweir     {
796*cdf0e10cSrcweir         aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
797*cdf0e10cSrcweir         if( nOffset > nStrEnde )
798*cdf0e10cSrcweir             nOffset = nOffset - nStrEnde;
799*cdf0e10cSrcweir         else
800*cdf0e10cSrcweir             nOffset = 0;
801*cdf0e10cSrcweir     }
802*cdf0e10cSrcweir 
803*cdf0e10cSrcweir     // set the length to negative for reverse search
804*cdf0e10cSrcweir     pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
805*cdf0e10cSrcweir     struct re_registers regs;
806*cdf0e10cSrcweir 
807*cdf0e10cSrcweir     // Clear structure
808*cdf0e10cSrcweir     memset((void *)&regs, 0, sizeof(struct re_registers));
809*cdf0e10cSrcweir     if ( ! pRegExp->re_search(&regs, nOffset) )
810*cdf0e10cSrcweir     {
811*cdf0e10cSrcweir         if( regs.num_of_match > 0 &&
812*cdf0e10cSrcweir                 (regs.start[0] != -1 && regs.end[0] != -1) )
813*cdf0e10cSrcweir         {
814*cdf0e10cSrcweir             nOffset = bSearchInSel ? nStrEnde : 0;
815*cdf0e10cSrcweir             aRet.startOffset.realloc(regs.num_of_match);
816*cdf0e10cSrcweir             aRet.endOffset.realloc(regs.num_of_match);
817*cdf0e10cSrcweir 
818*cdf0e10cSrcweir             sal_Int32 i = 0, j = 0;
819*cdf0e10cSrcweir             while( j < regs.num_of_match )
820*cdf0e10cSrcweir             {
821*cdf0e10cSrcweir                 if( regs.start[j] != -1 && regs.end[j] != -1 )
822*cdf0e10cSrcweir                 {
823*cdf0e10cSrcweir                     aRet.startOffset[i] = regs.end[j] + nOffset;
824*cdf0e10cSrcweir                     aRet.endOffset[i] = regs.start[j] + nOffset;
825*cdf0e10cSrcweir                     ++i;
826*cdf0e10cSrcweir                 }
827*cdf0e10cSrcweir                 ++j;
828*cdf0e10cSrcweir             }
829*cdf0e10cSrcweir             aRet.subRegExpressions = i;
830*cdf0e10cSrcweir         }
831*cdf0e10cSrcweir         if ( regs.num_regs > 0 )
832*cdf0e10cSrcweir         {
833*cdf0e10cSrcweir             if ( regs.start )
834*cdf0e10cSrcweir                 free(regs.start);
835*cdf0e10cSrcweir             if ( regs.end )
836*cdf0e10cSrcweir                 free(regs.end);
837*cdf0e10cSrcweir         }
838*cdf0e10cSrcweir     }
839*cdf0e10cSrcweir 
840*cdf0e10cSrcweir     return aRet;
841*cdf0e10cSrcweir }
842*cdf0e10cSrcweir 
843*cdf0e10cSrcweir // Phonetische Suche von Worten
844*cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
845*cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
846*cdf0e10cSrcweir             throw(RuntimeException)
847*cdf0e10cSrcweir {
848*cdf0e10cSrcweir     SearchResult aRet;
849*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
850*cdf0e10cSrcweir 
851*cdf0e10cSrcweir     if( !xBreak.is() )
852*cdf0e10cSrcweir         return aRet;
853*cdf0e10cSrcweir 
854*cdf0e10cSrcweir     OUString aWTemp( searchStr );
855*cdf0e10cSrcweir 
856*cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
857*cdf0e10cSrcweir 
858*cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
859*cdf0e10cSrcweir             aSrchPara.Locale,
860*cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
861*cdf0e10cSrcweir 
862*cdf0e10cSrcweir     do
863*cdf0e10cSrcweir     {
864*cdf0e10cSrcweir         if( aWBnd.startPos >= endPos )
865*cdf0e10cSrcweir             break;
866*cdf0e10cSrcweir         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
867*cdf0e10cSrcweir         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
868*cdf0e10cSrcweir 
869*cdf0e10cSrcweir         if( nStt < nEnd &&
870*cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
871*cdf0e10cSrcweir         {
872*cdf0e10cSrcweir             aRet.subRegExpressions = 1;
873*cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
874*cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nStt;
875*cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
876*cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nEnd;
877*cdf0e10cSrcweir             break;
878*cdf0e10cSrcweir         }
879*cdf0e10cSrcweir 
880*cdf0e10cSrcweir         nStt = nEnd - 1;
881*cdf0e10cSrcweir         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
882*cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
883*cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos ||
884*cdf0e10cSrcweir             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
885*cdf0e10cSrcweir     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
886*cdf0e10cSrcweir     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
887*cdf0e10cSrcweir     // and nextWord() does also => don't loop forever.
888*cdf0e10cSrcweir     return aRet;
889*cdf0e10cSrcweir }
890*cdf0e10cSrcweir 
891*cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
892*cdf0e10cSrcweir                                           sal_Int32 startPos, sal_Int32 endPos )
893*cdf0e10cSrcweir             throw(RuntimeException)
894*cdf0e10cSrcweir {
895*cdf0e10cSrcweir     SearchResult aRet;
896*cdf0e10cSrcweir     aRet.subRegExpressions = 0;
897*cdf0e10cSrcweir 
898*cdf0e10cSrcweir     if( !xBreak.is() )
899*cdf0e10cSrcweir         return aRet;
900*cdf0e10cSrcweir 
901*cdf0e10cSrcweir     OUString aWTemp( searchStr );
902*cdf0e10cSrcweir 
903*cdf0e10cSrcweir     register sal_Int32 nStt, nEnd;
904*cdf0e10cSrcweir 
905*cdf0e10cSrcweir     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
906*cdf0e10cSrcweir             aSrchPara.Locale,
907*cdf0e10cSrcweir             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
908*cdf0e10cSrcweir 
909*cdf0e10cSrcweir     do
910*cdf0e10cSrcweir     {
911*cdf0e10cSrcweir         if( aWBnd.endPos <= endPos )
912*cdf0e10cSrcweir             break;
913*cdf0e10cSrcweir         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
914*cdf0e10cSrcweir         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
915*cdf0e10cSrcweir 
916*cdf0e10cSrcweir         if( nStt < nEnd &&
917*cdf0e10cSrcweir                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
918*cdf0e10cSrcweir         {
919*cdf0e10cSrcweir             aRet.subRegExpressions = 1;
920*cdf0e10cSrcweir             aRet.startOffset.realloc( 1 );
921*cdf0e10cSrcweir             aRet.startOffset[ 0 ] = nEnd;
922*cdf0e10cSrcweir             aRet.endOffset.realloc( 1 );
923*cdf0e10cSrcweir             aRet.endOffset[ 0 ] = nStt;
924*cdf0e10cSrcweir             break;
925*cdf0e10cSrcweir         }
926*cdf0e10cSrcweir         if( !nStt )
927*cdf0e10cSrcweir             break;
928*cdf0e10cSrcweir 
929*cdf0e10cSrcweir         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
930*cdf0e10cSrcweir                 WordType::ANYWORD_IGNOREWHITESPACES);
931*cdf0e10cSrcweir     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
932*cdf0e10cSrcweir     return aRet;
933*cdf0e10cSrcweir }
934*cdf0e10cSrcweir 
935*cdf0e10cSrcweir 
936*cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
937*cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
938*cdf0e10cSrcweir 
939*cdf0e10cSrcweir static OUString getServiceName_Static()
940*cdf0e10cSrcweir {
941*cdf0e10cSrcweir     return OUString::createFromAscii( cSearchName );
942*cdf0e10cSrcweir }
943*cdf0e10cSrcweir 
944*cdf0e10cSrcweir static OUString getImplementationName_Static()
945*cdf0e10cSrcweir {
946*cdf0e10cSrcweir     return OUString::createFromAscii( cSearchImpl );
947*cdf0e10cSrcweir }
948*cdf0e10cSrcweir 
949*cdf0e10cSrcweir OUString SAL_CALL
950*cdf0e10cSrcweir TextSearch::getImplementationName()
951*cdf0e10cSrcweir                 throw( RuntimeException )
952*cdf0e10cSrcweir {
953*cdf0e10cSrcweir     return getImplementationName_Static();
954*cdf0e10cSrcweir }
955*cdf0e10cSrcweir 
956*cdf0e10cSrcweir sal_Bool SAL_CALL
957*cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName)
958*cdf0e10cSrcweir                 throw( RuntimeException )
959*cdf0e10cSrcweir {
960*cdf0e10cSrcweir     return !rServiceName.compareToAscii( cSearchName );
961*cdf0e10cSrcweir }
962*cdf0e10cSrcweir 
963*cdf0e10cSrcweir Sequence< OUString > SAL_CALL
964*cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
965*cdf0e10cSrcweir {
966*cdf0e10cSrcweir     Sequence< OUString > aRet(1);
967*cdf0e10cSrcweir     aRet[0] = getServiceName_Static();
968*cdf0e10cSrcweir     return aRet;
969*cdf0e10cSrcweir }
970*cdf0e10cSrcweir 
971*cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
972*cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance(
973*cdf0e10cSrcweir         const ::com::sun::star::uno::Reference<
974*cdf0e10cSrcweir         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
975*cdf0e10cSrcweir {
976*cdf0e10cSrcweir     return ::com::sun::star::uno::Reference<
977*cdf0e10cSrcweir         ::com::sun::star::uno::XInterface >(
978*cdf0e10cSrcweir                 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
979*cdf0e10cSrcweir }
980*cdf0e10cSrcweir 
981*cdf0e10cSrcweir extern "C"
982*cdf0e10cSrcweir {
983*cdf0e10cSrcweir 
984*cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment(
985*cdf0e10cSrcweir         const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
986*cdf0e10cSrcweir {
987*cdf0e10cSrcweir     *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
988*cdf0e10cSrcweir }
989*cdf0e10cSrcweir 
990*cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
991*cdf0e10cSrcweir         void* _pServiceManager, void* /*_pRegistryKey*/ )
992*cdf0e10cSrcweir {
993*cdf0e10cSrcweir     void* pRet = NULL;
994*cdf0e10cSrcweir 
995*cdf0e10cSrcweir     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
996*cdf0e10cSrcweir         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
997*cdf0e10cSrcweir             ( _pServiceManager );
998*cdf0e10cSrcweir     ::com::sun::star::uno::Reference<
999*cdf0e10cSrcweir             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1000*cdf0e10cSrcweir 
1001*cdf0e10cSrcweir     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1002*cdf0e10cSrcweir     {
1003*cdf0e10cSrcweir         ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
1004*cdf0e10cSrcweir         aServiceNames[0] = getServiceName_Static();
1005*cdf0e10cSrcweir         xFactory = ::cppu::createSingleFactory(
1006*cdf0e10cSrcweir                 pServiceManager, getImplementationName_Static(),
1007*cdf0e10cSrcweir                 &TextSearch_CreateInstance, aServiceNames );
1008*cdf0e10cSrcweir     }
1009*cdf0e10cSrcweir 
1010*cdf0e10cSrcweir     if ( xFactory.is() )
1011*cdf0e10cSrcweir     {
1012*cdf0e10cSrcweir         xFactory->acquire();
1013*cdf0e10cSrcweir         pRet = xFactory.get();
1014*cdf0e10cSrcweir     }
1015*cdf0e10cSrcweir 
1016*cdf0e10cSrcweir     return pRet;
1017*cdf0e10cSrcweir }
1018*cdf0e10cSrcweir 
1019*cdf0e10cSrcweir } // extern "C"
1020