1cdf0e10cSrcweir /************************************************************************* 2cdf0e10cSrcweir * 3cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4cdf0e10cSrcweir * 5cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6cdf0e10cSrcweir * 7cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8cdf0e10cSrcweir * 9cdf0e10cSrcweir * This file is part of OpenOffice.org. 10cdf0e10cSrcweir * 11cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14cdf0e10cSrcweir * 15cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20cdf0e10cSrcweir * 21cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25cdf0e10cSrcweir * 26cdf0e10cSrcweir ************************************************************************/ 27cdf0e10cSrcweir 28cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 30cdf0e10cSrcweir 31cdf0e10cSrcweir #include "textsearch.hxx" 32cdf0e10cSrcweir #include "levdis.hxx" 33cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp> 34cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp> 35cdf0e10cSrcweir #include <comphelper/processfactory.hxx> 36cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 37cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp> 38cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp> 39cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp> 40cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 41cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 42cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp> 43cdf0e10cSrcweir #include <cppuhelper/factory.hxx> 44cdf0e10cSrcweir #include <cppuhelper/weak.hxx> 45cdf0e10cSrcweir 46cdf0e10cSrcweir #ifdef _MSC_VER 47cdf0e10cSrcweir // get rid of that dumb compiler warning 48cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information 49cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created 50cdf0e10cSrcweir #pragma warning( disable: 4786 ) 51cdf0e10cSrcweir #endif 52cdf0e10cSrcweir 53cdf0e10cSrcweir #include <string.h> 54cdf0e10cSrcweir 55cdf0e10cSrcweir using namespace ::com::sun::star::util; 56cdf0e10cSrcweir using namespace ::com::sun::star::uno; 57cdf0e10cSrcweir using namespace ::com::sun::star::lang; 58cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 59cdf0e10cSrcweir using namespace ::rtl; 60cdf0e10cSrcweir 61cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP = 62cdf0e10cSrcweir TransliterationModules_ignoreBaFa_ja_JP | 63cdf0e10cSrcweir TransliterationModules_ignoreIterationMark_ja_JP | 64cdf0e10cSrcweir TransliterationModules_ignoreTiJi_ja_JP | 65cdf0e10cSrcweir TransliterationModules_ignoreHyuByu_ja_JP | 66cdf0e10cSrcweir TransliterationModules_ignoreSeZe_ja_JP | 67cdf0e10cSrcweir TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 68cdf0e10cSrcweir TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 69cdf0e10cSrcweir TransliterationModules_ignoreProlongedSoundMark_ja_JP; 70*cc450e3aSHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH; 71*cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH; 72cdf0e10cSrcweir // Above 2 transliteration is simple but need to take effect in 73cdf0e10cSrcweir // complex transliteration 74cdf0e10cSrcweir 75cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 76cdf0e10cSrcweir : xMSF( rxMSF ) 77cdf0e10cSrcweir , pJumpTable( 0 ) 78cdf0e10cSrcweir , pJumpTable2( 0 ) 79*cc450e3aSHerbert Dürr , pRegexMatcher( NULL ) 80cdf0e10cSrcweir , pWLD( 0 ) 81cdf0e10cSrcweir { 82cdf0e10cSrcweir SearchOptions aOpt; 83cdf0e10cSrcweir aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 84cdf0e10cSrcweir aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 85cdf0e10cSrcweir //aOpt.Locale = ???; 86cdf0e10cSrcweir setOptions( aOpt ); 87cdf0e10cSrcweir } 88cdf0e10cSrcweir 89cdf0e10cSrcweir TextSearch::~TextSearch() 90cdf0e10cSrcweir { 91*cc450e3aSHerbert Dürr delete pRegexMatcher; 92cdf0e10cSrcweir delete pWLD; 93cdf0e10cSrcweir delete pJumpTable; 94cdf0e10cSrcweir delete pJumpTable2; 95cdf0e10cSrcweir } 96cdf0e10cSrcweir 97cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 98cdf0e10cSrcweir { 99cdf0e10cSrcweir aSrchPara = rOptions; 100cdf0e10cSrcweir 101*cc450e3aSHerbert Dürr delete pRegexMatcher, pRegexMatcher = NULL; 102cdf0e10cSrcweir delete pWLD, pWLD = 0; 103cdf0e10cSrcweir delete pJumpTable, pJumpTable = 0; 104cdf0e10cSrcweir delete pJumpTable2, pJumpTable2 = 0; 105cdf0e10cSrcweir 106cdf0e10cSrcweir // Create Transliteration class 107cdf0e10cSrcweir if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 108cdf0e10cSrcweir { 109cdf0e10cSrcweir if( !xTranslit.is() ) 110cdf0e10cSrcweir { 111cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 112cdf0e10cSrcweir OUString::createFromAscii( 113cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 114cdf0e10cSrcweir if ( xI.is() ) 115cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 116cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 117cdf0e10cSrcweir >>= xTranslit; 118cdf0e10cSrcweir } 119cdf0e10cSrcweir // Load transliteration module 120cdf0e10cSrcweir if( xTranslit.is() ) 121cdf0e10cSrcweir xTranslit->loadModule( 122cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 123cdf0e10cSrcweir aSrchPara.Locale); 124cdf0e10cSrcweir } 125cdf0e10cSrcweir else if( xTranslit.is() ) 126cdf0e10cSrcweir xTranslit = 0; 127cdf0e10cSrcweir 128cdf0e10cSrcweir // Create Transliteration for 2<->1, 2<->2 transliteration 129cdf0e10cSrcweir if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 130cdf0e10cSrcweir { 131cdf0e10cSrcweir if( !xTranslit2.is() ) 132cdf0e10cSrcweir { 133cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 134cdf0e10cSrcweir OUString::createFromAscii( 135cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 136cdf0e10cSrcweir if ( xI.is() ) 137cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 138cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 139cdf0e10cSrcweir >>= xTranslit2; 140cdf0e10cSrcweir } 141cdf0e10cSrcweir // Load transliteration module 142cdf0e10cSrcweir if( xTranslit2.is() ) 143cdf0e10cSrcweir xTranslit2->loadModule( 144cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 145cdf0e10cSrcweir aSrchPara.Locale); 146cdf0e10cSrcweir } 147cdf0e10cSrcweir 148cdf0e10cSrcweir if ( !xBreak.is() ) 149cdf0e10cSrcweir { 150cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 151cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 152cdf0e10cSrcweir if( xI.is() ) 153cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 154cdf0e10cSrcweir (const Reference< XBreakIterator >*)0)) 155cdf0e10cSrcweir >>= xBreak; 156cdf0e10cSrcweir } 157cdf0e10cSrcweir 158cdf0e10cSrcweir sSrchStr = aSrchPara.searchString; 159cdf0e10cSrcweir 160*cc450e3aSHerbert Dürr // use transliteration here 161*cc450e3aSHerbert Dürr if ( xTranslit.is() && 162cdf0e10cSrcweir aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 163cdf0e10cSrcweir sSrchStr = xTranslit->transliterateString2String( 164cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 165cdf0e10cSrcweir 166*cc450e3aSHerbert Dürr if ( xTranslit2.is() && 167cdf0e10cSrcweir aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 168cdf0e10cSrcweir sSrchStr2 = xTranslit2->transliterateString2String( 169cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 170cdf0e10cSrcweir 171cdf0e10cSrcweir // When start or end of search string is a complex script type, we need to 172cdf0e10cSrcweir // make sure the result boundary is not located in the middle of cell. 173cdf0e10cSrcweir checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 174cdf0e10cSrcweir ScriptType::COMPLEX)); 175cdf0e10cSrcweir checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 176cdf0e10cSrcweir sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 177cdf0e10cSrcweir 178*cc450e3aSHerbert Dürr switch( aSrchPara.algorithmType) 179cdf0e10cSrcweir { 180*cc450e3aSHerbert Dürr case SearchAlgorithms_REGEXP: 181*cc450e3aSHerbert Dürr fnForward = &TextSearch::RESrchFrwrd; 182*cc450e3aSHerbert Dürr fnBackward = &TextSearch::RESrchBkwrd; 183*cc450e3aSHerbert Dürr 184*cc450e3aSHerbert Dürr { 185*cc450e3aSHerbert Dürr sal_uInt32 nIcuSearchFlags = 0; 186*cc450e3aSHerbert Dürr // map com::sun::star::util::SearchFlags to ICU uregex.h flags 187*cc450e3aSHerbert Dürr // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE 188*cc450e3aSHerbert Dürr // REG_NEWLINE is neither defined properly nor used anywhere => not implemented 189*cc450e3aSHerbert Dürr // REG_NOSUB is not used anywhere => not implemented 190*cc450e3aSHerbert Dürr // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute 191*cc450e3aSHerbert Dürr // LEV_RELAXED is only used for SearchAlgorithm==Approximate 192*cc450e3aSHerbert Dürr // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it??? 193*cc450e3aSHerbert Dürr if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0) 194*cc450e3aSHerbert Dürr nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE; 195*cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 196*cc450e3aSHerbert Dürr // assumption: transliteration doesn't mangle regexp control chars 197*cc450e3aSHerbert Dürr OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr 198*cc450e3aSHerbert Dürr : ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString); 199*cc450e3aSHerbert Dürr const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength()); 200*cc450e3aSHerbert Dürr pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); 201*cc450e3aSHerbert Dürr if( nIcuErr) 202*cc450e3aSHerbert Dürr { delete pRegexMatcher; pRegexMatcher = NULL;} 203*cc450e3aSHerbert Dürr } break; 204*cc450e3aSHerbert Dürr 205*cc450e3aSHerbert Dürr case SearchAlgorithms_APPROXIMATE: 206cdf0e10cSrcweir fnForward = &TextSearch::ApproxSrchFrwrd; 207cdf0e10cSrcweir fnBackward = &TextSearch::ApproxSrchBkwrd; 208cdf0e10cSrcweir 209cdf0e10cSrcweir pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 210cdf0e10cSrcweir aSrchPara.insertedChars, aSrchPara.deletedChars, 211cdf0e10cSrcweir 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 212cdf0e10cSrcweir 213cdf0e10cSrcweir nLimit = pWLD->GetLimit(); 214*cc450e3aSHerbert Dürr break; 215*cc450e3aSHerbert Dürr 216*cc450e3aSHerbert Dürr default: 217cdf0e10cSrcweir fnForward = &TextSearch::NSrchFrwrd; 218cdf0e10cSrcweir fnBackward = &TextSearch::NSrchBkwrd; 219*cc450e3aSHerbert Dürr break; 220cdf0e10cSrcweir } 221cdf0e10cSrcweir } 222cdf0e10cSrcweir 223cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 224cdf0e10cSrcweir { 225cdf0e10cSrcweir sal_Int32 nRet = 0, nEnd = rOff.getLength(); 226cdf0e10cSrcweir while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 227cdf0e10cSrcweir return nRet; 228cdf0e10cSrcweir } 229cdf0e10cSrcweir 230cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 231cdf0e10cSrcweir throw( RuntimeException ) 232cdf0e10cSrcweir { 233cdf0e10cSrcweir sal_Int32 nDone; 234cdf0e10cSrcweir return nPos == xBreak->previousCharacters(searchStr, nPos+1, 235cdf0e10cSrcweir aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 236cdf0e10cSrcweir } 237cdf0e10cSrcweir 238cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 239cdf0e10cSrcweir throw( RuntimeException ) 240cdf0e10cSrcweir { 241cdf0e10cSrcweir SearchResult sres; 242cdf0e10cSrcweir 243cdf0e10cSrcweir OUString in_str(searchStr); 244cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 245cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 246cdf0e10cSrcweir 247cdf0e10cSrcweir bUsePrimarySrchStr = true; 248cdf0e10cSrcweir 249cdf0e10cSrcweir if ( xTranslit.is() ) 250cdf0e10cSrcweir { 251cdf0e10cSrcweir // apply normal transliteration (1<->1, 1<->0) 252cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 253cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 254cdf0e10cSrcweir 255cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 256cdf0e10cSrcweir if( startPos ) 257cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 258cdf0e10cSrcweir 259cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 260cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 261cdf0e10cSrcweir else 262cdf0e10cSrcweir newEndPos = in_str.getLength(); 263cdf0e10cSrcweir 264cdf0e10cSrcweir sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 265cdf0e10cSrcweir 266cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 267cdf0e10cSrcweir { 268cdf0e10cSrcweir if (sres.startOffset[k]) 269cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k]]; 270cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 271cdf0e10cSrcweir // the position of the next character - return the 272cdf0e10cSrcweir // next position behind the last found character! 273cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 274cdf0e10cSrcweir if (sres.endOffset[k]) 275cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 276cdf0e10cSrcweir } 277cdf0e10cSrcweir } 278cdf0e10cSrcweir else 279cdf0e10cSrcweir { 280cdf0e10cSrcweir sres = (this->*fnForward)( in_str, startPos, endPos ); 281cdf0e10cSrcweir } 282cdf0e10cSrcweir 283cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 284cdf0e10cSrcweir { 285cdf0e10cSrcweir SearchResult sres2; 286cdf0e10cSrcweir 287cdf0e10cSrcweir in_str = OUString(searchStr); 288cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 289cdf0e10cSrcweir 290cdf0e10cSrcweir in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 291cdf0e10cSrcweir 292cdf0e10cSrcweir if( startPos ) 293cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 294cdf0e10cSrcweir 295cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 296cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 297cdf0e10cSrcweir else 298cdf0e10cSrcweir endPos = in_str.getLength(); 299cdf0e10cSrcweir 300cdf0e10cSrcweir bUsePrimarySrchStr = false; 301cdf0e10cSrcweir sres2 = (this->*fnForward)( in_str, startPos, endPos ); 302cdf0e10cSrcweir 303cdf0e10cSrcweir for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 304cdf0e10cSrcweir { 305cdf0e10cSrcweir if (sres2.startOffset[k]) 306cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 307cdf0e10cSrcweir if (sres2.endOffset[k]) 308cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 309cdf0e10cSrcweir } 310cdf0e10cSrcweir 311cdf0e10cSrcweir // pick first and long one 312cdf0e10cSrcweir if ( sres.subRegExpressions == 0) 313cdf0e10cSrcweir return sres2; 314cdf0e10cSrcweir if ( sres2.subRegExpressions == 1) 315cdf0e10cSrcweir { 316cdf0e10cSrcweir if ( sres.startOffset[0] > sres2.startOffset[0]) 317cdf0e10cSrcweir return sres2; 318cdf0e10cSrcweir else if ( sres.startOffset[0] == sres2.startOffset[0] && 319cdf0e10cSrcweir sres.endOffset[0] < sres2.endOffset[0]) 320cdf0e10cSrcweir return sres2; 321cdf0e10cSrcweir } 322cdf0e10cSrcweir } 323cdf0e10cSrcweir 324cdf0e10cSrcweir return sres; 325cdf0e10cSrcweir } 326cdf0e10cSrcweir 327cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 328cdf0e10cSrcweir throw(RuntimeException) 329cdf0e10cSrcweir { 330cdf0e10cSrcweir SearchResult sres; 331cdf0e10cSrcweir 332cdf0e10cSrcweir OUString in_str(searchStr); 333cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 334cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 335cdf0e10cSrcweir 336cdf0e10cSrcweir bUsePrimarySrchStr = true; 337cdf0e10cSrcweir 338cdf0e10cSrcweir if ( xTranslit.is() ) 339cdf0e10cSrcweir { 340cdf0e10cSrcweir // apply only simple 1<->1 transliteration here 341cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 342cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 343cdf0e10cSrcweir 344cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 345cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 346cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 347cdf0e10cSrcweir else 348cdf0e10cSrcweir newStartPos = in_str.getLength(); 349cdf0e10cSrcweir 350cdf0e10cSrcweir if( endPos ) 351cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 352cdf0e10cSrcweir 353cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 354cdf0e10cSrcweir 355cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 356cdf0e10cSrcweir { 357cdf0e10cSrcweir if (sres.startOffset[k]) 358cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 359cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 360cdf0e10cSrcweir // the position of the next character - return the 361cdf0e10cSrcweir // next position behind the last found character! 362cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 363cdf0e10cSrcweir if (sres.endOffset[k]) 364cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]]; 365cdf0e10cSrcweir } 366cdf0e10cSrcweir } 367cdf0e10cSrcweir else 368cdf0e10cSrcweir { 369cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, startPos, endPos ); 370cdf0e10cSrcweir } 371cdf0e10cSrcweir 372cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 373cdf0e10cSrcweir { 374cdf0e10cSrcweir SearchResult sres2; 375cdf0e10cSrcweir 376cdf0e10cSrcweir in_str = OUString(searchStr); 377cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 378cdf0e10cSrcweir 379cdf0e10cSrcweir in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 380cdf0e10cSrcweir 381cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 382cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 383cdf0e10cSrcweir else 384cdf0e10cSrcweir startPos = in_str.getLength(); 385cdf0e10cSrcweir 386cdf0e10cSrcweir if( endPos ) 387cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 388cdf0e10cSrcweir 389cdf0e10cSrcweir bUsePrimarySrchStr = false; 390cdf0e10cSrcweir sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 391cdf0e10cSrcweir 392cdf0e10cSrcweir for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 393cdf0e10cSrcweir { 394cdf0e10cSrcweir if (sres2.startOffset[k]) 395cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 396cdf0e10cSrcweir if (sres2.endOffset[k]) 397cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 398cdf0e10cSrcweir } 399cdf0e10cSrcweir 400cdf0e10cSrcweir // pick last and long one 401cdf0e10cSrcweir if ( sres.subRegExpressions == 0 ) 402cdf0e10cSrcweir return sres2; 403cdf0e10cSrcweir if ( sres2.subRegExpressions == 1 ) 404cdf0e10cSrcweir { 405cdf0e10cSrcweir if ( sres.startOffset[0] < sres2.startOffset[0] ) 406cdf0e10cSrcweir return sres2; 407cdf0e10cSrcweir if ( sres.startOffset[0] == sres2.startOffset[0] && 408cdf0e10cSrcweir sres.endOffset[0] > sres2.endOffset[0] ) 409cdf0e10cSrcweir return sres2; 410cdf0e10cSrcweir } 411cdf0e10cSrcweir } 412cdf0e10cSrcweir 413cdf0e10cSrcweir return sres; 414cdf0e10cSrcweir } 415cdf0e10cSrcweir 416*cc450e3aSHerbert Dürr //--------------------------------------------------------------------- 417cdf0e10cSrcweir 418cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 419cdf0e10cSrcweir { 420cdf0e10cSrcweir bool bRet = 1; 421cdf0e10cSrcweir if( '\x7f' != rStr[nPos]) 422cdf0e10cSrcweir { 423cdf0e10cSrcweir if ( !xCharClass.is() ) 424cdf0e10cSrcweir { 425cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 426cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 427cdf0e10cSrcweir if( xI.is() ) 428cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 429cdf0e10cSrcweir (const Reference< XCharacterClassification >*)0)) 430cdf0e10cSrcweir >>= xCharClass; 431cdf0e10cSrcweir } 432cdf0e10cSrcweir if ( xCharClass.is() ) 433cdf0e10cSrcweir { 434cdf0e10cSrcweir sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 435cdf0e10cSrcweir aSrchPara.Locale ); 436cdf0e10cSrcweir if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 437cdf0e10cSrcweir KCharacterType::LETTER ) & nCType ) ) 438cdf0e10cSrcweir bRet = 0; 439cdf0e10cSrcweir } 440cdf0e10cSrcweir } 441cdf0e10cSrcweir return bRet; 442cdf0e10cSrcweir } 443cdf0e10cSrcweir 444*cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ---------- 445*cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available 446cdf0e10cSrcweir 447cdf0e10cSrcweir void TextSearch::MakeForwardTab() 448cdf0e10cSrcweir { 449cdf0e10cSrcweir // create the jumptable for the search text 450cdf0e10cSrcweir if( pJumpTable ) 451cdf0e10cSrcweir { 452cdf0e10cSrcweir if( bIsForwardTab ) 453cdf0e10cSrcweir return ; // the jumpTable is ok 454cdf0e10cSrcweir delete pJumpTable; 455cdf0e10cSrcweir } 456cdf0e10cSrcweir bIsForwardTab = true; 457cdf0e10cSrcweir 458cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 459cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 460cdf0e10cSrcweir 461cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 462cdf0e10cSrcweir { 463cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 464cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 465cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 466cdf0e10cSrcweir 467cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 468cdf0e10cSrcweir pJumpTable->insert( aEntry ); 469cdf0e10cSrcweir if ( !aPair.second ) 470cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 471cdf0e10cSrcweir } 472cdf0e10cSrcweir } 473cdf0e10cSrcweir 474cdf0e10cSrcweir void TextSearch::MakeForwardTab2() 475cdf0e10cSrcweir { 476cdf0e10cSrcweir // create the jumptable for the search text 477cdf0e10cSrcweir if( pJumpTable2 ) 478cdf0e10cSrcweir { 479cdf0e10cSrcweir if( bIsForwardTab ) 480cdf0e10cSrcweir return ; // the jumpTable is ok 481cdf0e10cSrcweir delete pJumpTable2; 482cdf0e10cSrcweir } 483cdf0e10cSrcweir bIsForwardTab = true; 484cdf0e10cSrcweir 485cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 486cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 487cdf0e10cSrcweir 488cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 489cdf0e10cSrcweir { 490cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 491cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 492cdf0e10cSrcweir 493cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 494cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 495cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 496cdf0e10cSrcweir if ( !aPair.second ) 497cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 498cdf0e10cSrcweir } 499cdf0e10cSrcweir } 500cdf0e10cSrcweir 501cdf0e10cSrcweir void TextSearch::MakeBackwardTab() 502cdf0e10cSrcweir { 503cdf0e10cSrcweir // create the jumptable for the search text 504cdf0e10cSrcweir if( pJumpTable ) 505cdf0e10cSrcweir { 506cdf0e10cSrcweir if( !bIsForwardTab ) 507cdf0e10cSrcweir return ; // the jumpTable is ok 508cdf0e10cSrcweir delete pJumpTable; 509cdf0e10cSrcweir } 510cdf0e10cSrcweir bIsForwardTab = false; 511cdf0e10cSrcweir 512cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 513cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 514cdf0e10cSrcweir 515cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 516cdf0e10cSrcweir { 517cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 518cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 519cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 520cdf0e10cSrcweir pJumpTable->insert( aEntry ); 521cdf0e10cSrcweir if ( !aPair.second ) 522cdf0e10cSrcweir (*(aPair.first)).second = n; 523cdf0e10cSrcweir } 524cdf0e10cSrcweir } 525cdf0e10cSrcweir 526cdf0e10cSrcweir void TextSearch::MakeBackwardTab2() 527cdf0e10cSrcweir { 528cdf0e10cSrcweir // create the jumptable for the search text 529cdf0e10cSrcweir if( pJumpTable2 ) 530cdf0e10cSrcweir { 531cdf0e10cSrcweir if( !bIsForwardTab ) 532cdf0e10cSrcweir return ; // the jumpTable is ok 533cdf0e10cSrcweir delete pJumpTable2; 534cdf0e10cSrcweir } 535cdf0e10cSrcweir bIsForwardTab = false; 536cdf0e10cSrcweir 537cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 538cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 539cdf0e10cSrcweir 540cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 541cdf0e10cSrcweir { 542cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 543cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 544cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 545cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 546cdf0e10cSrcweir if ( !aPair.second ) 547cdf0e10cSrcweir (*(aPair.first)).second = n; 548cdf0e10cSrcweir } 549cdf0e10cSrcweir } 550cdf0e10cSrcweir 551cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 552cdf0e10cSrcweir { 553cdf0e10cSrcweir TextSearchJumpTable *pJump; 554cdf0e10cSrcweir OUString sSearchKey; 555cdf0e10cSrcweir 556cdf0e10cSrcweir if ( bUsePrimarySrchStr ) { 557cdf0e10cSrcweir pJump = pJumpTable; 558cdf0e10cSrcweir sSearchKey = sSrchStr; 559cdf0e10cSrcweir } else { 560cdf0e10cSrcweir pJump = pJumpTable2; 561cdf0e10cSrcweir sSearchKey = sSrchStr2; 562cdf0e10cSrcweir } 563cdf0e10cSrcweir 564cdf0e10cSrcweir TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 565cdf0e10cSrcweir if ( iLook == pJump->end() ) 566cdf0e10cSrcweir return sSearchKey.getLength(); 567cdf0e10cSrcweir return (*iLook).second; 568cdf0e10cSrcweir } 569cdf0e10cSrcweir 570cdf0e10cSrcweir 571cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 572cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 573cdf0e10cSrcweir throw(RuntimeException) 574cdf0e10cSrcweir { 575cdf0e10cSrcweir SearchResult aRet; 576cdf0e10cSrcweir aRet.subRegExpressions = 0; 577cdf0e10cSrcweir 578cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 579cdf0e10cSrcweir 580cdf0e10cSrcweir OUString aStr( searchStr ); 581cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 582cdf0e10cSrcweir sal_Int32 nEnde = endPos; 583cdf0e10cSrcweir if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 584cdf0e10cSrcweir return aRet; 585cdf0e10cSrcweir 586cdf0e10cSrcweir 587cdf0e10cSrcweir if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 588cdf0e10cSrcweir return aRet; 589cdf0e10cSrcweir 590cdf0e10cSrcweir nEnde -= sSearchKey.getLength(); 591cdf0e10cSrcweir 592cdf0e10cSrcweir if (bUsePrimarySrchStr) 593cdf0e10cSrcweir MakeForwardTab(); // create the jumptable 594cdf0e10cSrcweir else 595cdf0e10cSrcweir MakeForwardTab2(); 596cdf0e10cSrcweir 597cdf0e10cSrcweir for (sal_Int32 nCmpIdx = startPos; // start position for the search 598cdf0e10cSrcweir nCmpIdx <= nEnde; 599cdf0e10cSrcweir nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 600cdf0e10cSrcweir { 601cdf0e10cSrcweir // if the match would be the completed cells, skip it. 602cdf0e10cSrcweir if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 603cdf0e10cSrcweir && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 604cdf0e10cSrcweir continue; 605cdf0e10cSrcweir 606cdf0e10cSrcweir nSuchIdx = sSearchKey.getLength() - 1; 607cdf0e10cSrcweir while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 608cdf0e10cSrcweir { 609cdf0e10cSrcweir if( nSuchIdx == 0 ) 610cdf0e10cSrcweir { 611cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 612cdf0e10cSrcweir { 613cdf0e10cSrcweir sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 614cdf0e10cSrcweir bool bAtStart = !nCmpIdx; 615cdf0e10cSrcweir bool bAtEnd = nFndEnd == endPos; 616cdf0e10cSrcweir bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 617cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 618cdf0e10cSrcweir // * 1 -> only one word in the paragraph 619cdf0e10cSrcweir // * 2 -> at begin of paragraph 620cdf0e10cSrcweir // * 3 -> at end of paragraph 621cdf0e10cSrcweir // * 4 -> inside the paragraph 622cdf0e10cSrcweir if( !( ( bAtStart && bAtEnd ) || // 1 623cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 624cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 625cdf0e10cSrcweir ( bDelimBefore && bDelimBehind ))) // 4 626cdf0e10cSrcweir break; 627cdf0e10cSrcweir } 628cdf0e10cSrcweir 629cdf0e10cSrcweir aRet.subRegExpressions = 1; 630cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 631cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 632cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 633cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 634cdf0e10cSrcweir 635cdf0e10cSrcweir return aRet; 636cdf0e10cSrcweir } 637cdf0e10cSrcweir else 638cdf0e10cSrcweir nSuchIdx--; 639cdf0e10cSrcweir } 640cdf0e10cSrcweir } 641cdf0e10cSrcweir return aRet; 642cdf0e10cSrcweir } 643cdf0e10cSrcweir 644cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 645cdf0e10cSrcweir throw(RuntimeException) 646cdf0e10cSrcweir { 647cdf0e10cSrcweir SearchResult aRet; 648cdf0e10cSrcweir aRet.subRegExpressions = 0; 649cdf0e10cSrcweir 650cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 651cdf0e10cSrcweir 652cdf0e10cSrcweir OUString aStr( searchStr ); 653cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 654cdf0e10cSrcweir sal_Int32 nEnde = endPos; 655cdf0e10cSrcweir if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 656cdf0e10cSrcweir return aRet; 657cdf0e10cSrcweir 658cdf0e10cSrcweir if (bUsePrimarySrchStr) 659cdf0e10cSrcweir MakeBackwardTab(); // create the jumptable 660cdf0e10cSrcweir else 661cdf0e10cSrcweir MakeBackwardTab2(); 662cdf0e10cSrcweir 663cdf0e10cSrcweir if( nEnde == nSuchIdx ) // end position for the search 664cdf0e10cSrcweir nEnde = sSearchKey.getLength(); 665cdf0e10cSrcweir else 666cdf0e10cSrcweir nEnde += sSearchKey.getLength(); 667cdf0e10cSrcweir 668cdf0e10cSrcweir sal_Int32 nCmpIdx = startPos; // start position for the search 669cdf0e10cSrcweir 670cdf0e10cSrcweir while (nCmpIdx >= nEnde) 671cdf0e10cSrcweir { 672cdf0e10cSrcweir // if the match would be the completed cells, skip it. 673cdf0e10cSrcweir if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 674cdf0e10cSrcweir sSearchKey.getLength() )) && (!checkCTLEnd || 675cdf0e10cSrcweir isCellStart( aStr, nCmpIdx))) 676cdf0e10cSrcweir { 677cdf0e10cSrcweir nSuchIdx = 0; 678cdf0e10cSrcweir while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 679cdf0e10cSrcweir aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 680cdf0e10cSrcweir nSuchIdx++; 681cdf0e10cSrcweir if( nSuchIdx >= sSearchKey.getLength() ) 682cdf0e10cSrcweir { 683cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 684cdf0e10cSrcweir { 685cdf0e10cSrcweir sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 686cdf0e10cSrcweir bool bAtStart = !nFndStt; 687cdf0e10cSrcweir bool bAtEnd = nCmpIdx == startPos; 688cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 689cdf0e10cSrcweir bool bDelimBefore = bAtStart || // begin of paragraph 690cdf0e10cSrcweir IsDelimiter( aStr, nFndStt-1 ); 691cdf0e10cSrcweir // * 1 -> only one word in the paragraph 692cdf0e10cSrcweir // * 2 -> at begin of paragraph 693cdf0e10cSrcweir // * 3 -> at end of paragraph 694cdf0e10cSrcweir // * 4 -> inside the paragraph 695cdf0e10cSrcweir if( ( bAtStart && bAtEnd ) || // 1 696cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 697cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 698cdf0e10cSrcweir ( bDelimBefore && bDelimBehind )) // 4 699cdf0e10cSrcweir { 700cdf0e10cSrcweir aRet.subRegExpressions = 1; 701cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 702cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 703cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 704cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 705cdf0e10cSrcweir return aRet; 706cdf0e10cSrcweir } 707cdf0e10cSrcweir } 708cdf0e10cSrcweir else 709cdf0e10cSrcweir { 710cdf0e10cSrcweir aRet.subRegExpressions = 1; 711cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 712cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 713cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 714cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 715cdf0e10cSrcweir return aRet; 716cdf0e10cSrcweir } 717cdf0e10cSrcweir } 718cdf0e10cSrcweir } 719cdf0e10cSrcweir nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 720cdf0e10cSrcweir if( nCmpIdx < nSuchIdx ) 721cdf0e10cSrcweir return aRet; 722cdf0e10cSrcweir nCmpIdx -= nSuchIdx; 723cdf0e10cSrcweir } 724cdf0e10cSrcweir return aRet; 725cdf0e10cSrcweir } 726cdf0e10cSrcweir 727cdf0e10cSrcweir //--------------------------------------------------------------------------- 728cdf0e10cSrcweir 729cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 730cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 731cdf0e10cSrcweir throw(RuntimeException) 732cdf0e10cSrcweir { 733*cc450e3aSHerbert Dürr SearchResult aRet; 734*cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 735*cc450e3aSHerbert Dürr if( !pRegexMatcher) 736*cc450e3aSHerbert Dürr return aRet; 737*cc450e3aSHerbert Dürr 738*cc450e3aSHerbert Dürr if( endPos > searchStr.getLength()) 739*cc450e3aSHerbert Dürr endPos = searchStr.getLength(); 740*cc450e3aSHerbert Dürr 741*cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 742*cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 743*cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos); 744*cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 745*cc450e3aSHerbert Dürr if( !pRegexMatcher->find( startPos, nIcuErr)) 746*cc450e3aSHerbert Dürr return aRet; 747*cc450e3aSHerbert Dürr 748*cc450e3aSHerbert Dürr aRet.subRegExpressions = 1; 749*cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 750*cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 751*cc450e3aSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->start( nIcuErr); 752*cc450e3aSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->end( nIcuErr); 753*cc450e3aSHerbert Dürr 754*cc450e3aSHerbert Dürr return aRet; 755cdf0e10cSrcweir } 756cdf0e10cSrcweir 757cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 758cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 759cdf0e10cSrcweir throw(RuntimeException) 760cdf0e10cSrcweir { 761*cc450e3aSHerbert Dürr // NOTE: for backwards search callers provide startPos/endPos inverted! 762*cc450e3aSHerbert Dürr SearchResult aRet; 763*cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 764*cc450e3aSHerbert Dürr if( !pRegexMatcher) 765*cc450e3aSHerbert Dürr return aRet; 766*cc450e3aSHerbert Dürr 767*cc450e3aSHerbert Dürr if( startPos > searchStr.getLength()) 768*cc450e3aSHerbert Dürr startPos = searchStr.getLength(); 769*cc450e3aSHerbert Dürr 770*cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 771*cc450e3aSHerbert Dürr // TODO: use ICU's backward searching once it becomes available 772*cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 773*cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos); 774*cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 775*cc450e3aSHerbert Dürr if( !pRegexMatcher->find( endPos, nIcuErr)) 776*cc450e3aSHerbert Dürr return aRet; 777*cc450e3aSHerbert Dürr 778*cc450e3aSHerbert Dürr aRet.subRegExpressions = 1; 779*cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 780*cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 781*cc450e3aSHerbert Dürr 782*cc450e3aSHerbert Dürr do { 783*cc450e3aSHerbert Dürr // NOTE: backward search seems to be expected to have startOfs/endOfs inverted! 784*cc450e3aSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->end( nIcuErr); 785*cc450e3aSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->start( nIcuErr); 786*cc450e3aSHerbert Dürr } while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr)); 787*cc450e3aSHerbert Dürr 788*cc450e3aSHerbert Dürr return aRet; 789cdf0e10cSrcweir } 790cdf0e10cSrcweir 791*cc450e3aSHerbert Dürr //--------------------------------------------------------------------------- 792*cc450e3aSHerbert Dürr 793*cc450e3aSHerbert Dürr // search for words phonetically 794cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 795cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 796cdf0e10cSrcweir throw(RuntimeException) 797cdf0e10cSrcweir { 798cdf0e10cSrcweir SearchResult aRet; 799cdf0e10cSrcweir aRet.subRegExpressions = 0; 800cdf0e10cSrcweir 801cdf0e10cSrcweir if( !xBreak.is() ) 802cdf0e10cSrcweir return aRet; 803cdf0e10cSrcweir 804cdf0e10cSrcweir OUString aWTemp( searchStr ); 805cdf0e10cSrcweir 806cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 807cdf0e10cSrcweir 808cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 809cdf0e10cSrcweir aSrchPara.Locale, 810cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 811cdf0e10cSrcweir 812cdf0e10cSrcweir do 813cdf0e10cSrcweir { 814cdf0e10cSrcweir if( aWBnd.startPos >= endPos ) 815cdf0e10cSrcweir break; 816cdf0e10cSrcweir nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 817cdf0e10cSrcweir nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 818cdf0e10cSrcweir 819cdf0e10cSrcweir if( nStt < nEnd && 820cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 821cdf0e10cSrcweir { 822cdf0e10cSrcweir aRet.subRegExpressions = 1; 823cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 824cdf0e10cSrcweir aRet.startOffset[ 0 ] = nStt; 825cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 826cdf0e10cSrcweir aRet.endOffset[ 0 ] = nEnd; 827cdf0e10cSrcweir break; 828cdf0e10cSrcweir } 829cdf0e10cSrcweir 830cdf0e10cSrcweir nStt = nEnd - 1; 831cdf0e10cSrcweir aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 832cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 833cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || 834cdf0e10cSrcweir (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 835cdf0e10cSrcweir // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 836cdf0e10cSrcweir // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 837cdf0e10cSrcweir // and nextWord() does also => don't loop forever. 838cdf0e10cSrcweir return aRet; 839cdf0e10cSrcweir } 840cdf0e10cSrcweir 841cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 842cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 843cdf0e10cSrcweir throw(RuntimeException) 844cdf0e10cSrcweir { 845cdf0e10cSrcweir SearchResult aRet; 846cdf0e10cSrcweir aRet.subRegExpressions = 0; 847cdf0e10cSrcweir 848cdf0e10cSrcweir if( !xBreak.is() ) 849cdf0e10cSrcweir return aRet; 850cdf0e10cSrcweir 851cdf0e10cSrcweir OUString aWTemp( searchStr ); 852cdf0e10cSrcweir 853cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 854cdf0e10cSrcweir 855cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 856cdf0e10cSrcweir aSrchPara.Locale, 857cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 858cdf0e10cSrcweir 859cdf0e10cSrcweir do 860cdf0e10cSrcweir { 861cdf0e10cSrcweir if( aWBnd.endPos <= endPos ) 862cdf0e10cSrcweir break; 863cdf0e10cSrcweir nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 864cdf0e10cSrcweir nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 865cdf0e10cSrcweir 866cdf0e10cSrcweir if( nStt < nEnd && 867cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 868cdf0e10cSrcweir { 869cdf0e10cSrcweir aRet.subRegExpressions = 1; 870cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 871cdf0e10cSrcweir aRet.startOffset[ 0 ] = nEnd; 872cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 873cdf0e10cSrcweir aRet.endOffset[ 0 ] = nStt; 874cdf0e10cSrcweir break; 875cdf0e10cSrcweir } 876cdf0e10cSrcweir if( !nStt ) 877cdf0e10cSrcweir break; 878cdf0e10cSrcweir 879cdf0e10cSrcweir aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 880cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 881cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 882cdf0e10cSrcweir return aRet; 883cdf0e10cSrcweir } 884cdf0e10cSrcweir 885cdf0e10cSrcweir 886cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 887cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 888cdf0e10cSrcweir 889cdf0e10cSrcweir static OUString getServiceName_Static() 890cdf0e10cSrcweir { 891cdf0e10cSrcweir return OUString::createFromAscii( cSearchName ); 892cdf0e10cSrcweir } 893cdf0e10cSrcweir 894cdf0e10cSrcweir static OUString getImplementationName_Static() 895cdf0e10cSrcweir { 896cdf0e10cSrcweir return OUString::createFromAscii( cSearchImpl ); 897cdf0e10cSrcweir } 898cdf0e10cSrcweir 899cdf0e10cSrcweir OUString SAL_CALL 900cdf0e10cSrcweir TextSearch::getImplementationName() 901cdf0e10cSrcweir throw( RuntimeException ) 902cdf0e10cSrcweir { 903cdf0e10cSrcweir return getImplementationName_Static(); 904cdf0e10cSrcweir } 905cdf0e10cSrcweir 906cdf0e10cSrcweir sal_Bool SAL_CALL 907cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName) 908cdf0e10cSrcweir throw( RuntimeException ) 909cdf0e10cSrcweir { 910cdf0e10cSrcweir return !rServiceName.compareToAscii( cSearchName ); 911cdf0e10cSrcweir } 912cdf0e10cSrcweir 913cdf0e10cSrcweir Sequence< OUString > SAL_CALL 914cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 915cdf0e10cSrcweir { 916cdf0e10cSrcweir Sequence< OUString > aRet(1); 917cdf0e10cSrcweir aRet[0] = getServiceName_Static(); 918cdf0e10cSrcweir return aRet; 919cdf0e10cSrcweir } 920cdf0e10cSrcweir 921cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 922cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance( 923cdf0e10cSrcweir const ::com::sun::star::uno::Reference< 924cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 925cdf0e10cSrcweir { 926cdf0e10cSrcweir return ::com::sun::star::uno::Reference< 927cdf0e10cSrcweir ::com::sun::star::uno::XInterface >( 928cdf0e10cSrcweir (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 929cdf0e10cSrcweir } 930cdf0e10cSrcweir 931cdf0e10cSrcweir extern "C" 932cdf0e10cSrcweir { 933cdf0e10cSrcweir 934cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment( 935cdf0e10cSrcweir const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 936cdf0e10cSrcweir { 937cdf0e10cSrcweir *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 938cdf0e10cSrcweir } 939cdf0e10cSrcweir 940cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 941cdf0e10cSrcweir void* _pServiceManager, void* /*_pRegistryKey*/ ) 942cdf0e10cSrcweir { 943cdf0e10cSrcweir void* pRet = NULL; 944cdf0e10cSrcweir 945cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 946cdf0e10cSrcweir reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 947cdf0e10cSrcweir ( _pServiceManager ); 948cdf0e10cSrcweir ::com::sun::star::uno::Reference< 949cdf0e10cSrcweir ::com::sun::star::lang::XSingleServiceFactory > xFactory; 950cdf0e10cSrcweir 951cdf0e10cSrcweir if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 952cdf0e10cSrcweir { 953cdf0e10cSrcweir ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 954cdf0e10cSrcweir aServiceNames[0] = getServiceName_Static(); 955cdf0e10cSrcweir xFactory = ::cppu::createSingleFactory( 956cdf0e10cSrcweir pServiceManager, getImplementationName_Static(), 957cdf0e10cSrcweir &TextSearch_CreateInstance, aServiceNames ); 958cdf0e10cSrcweir } 959cdf0e10cSrcweir 960cdf0e10cSrcweir if ( xFactory.is() ) 961cdf0e10cSrcweir { 962cdf0e10cSrcweir xFactory->acquire(); 963cdf0e10cSrcweir pRet = xFactory.get(); 964cdf0e10cSrcweir } 965cdf0e10cSrcweir 966cdf0e10cSrcweir return pRet; 967cdf0e10cSrcweir } 968cdf0e10cSrcweir 969cdf0e10cSrcweir } // extern "C" 970