1*449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*449ab281SAndrew Rist * distributed with this work for additional information 6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10*449ab281SAndrew Rist * 11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*449ab281SAndrew Rist * 13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*449ab281SAndrew Rist * software distributed under the License is distributed on an 15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17*449ab281SAndrew Rist * specific language governing permissions and limitations 18*449ab281SAndrew Rist * under the License. 19*449ab281SAndrew Rist * 20*449ab281SAndrew Rist *************************************************************/ 21*449ab281SAndrew Rist 22*449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include "textsearch.hxx" 28cdf0e10cSrcweir #include "levdis.hxx" 29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp> 30cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp> 31cdf0e10cSrcweir #include <comphelper/processfactory.hxx> 32cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 33cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp> 34cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp> 35cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp> 36cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 37cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 38cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp> 39cdf0e10cSrcweir #include <cppuhelper/factory.hxx> 40cdf0e10cSrcweir #include <cppuhelper/weak.hxx> 41cdf0e10cSrcweir 42cdf0e10cSrcweir #ifdef _MSC_VER 43cdf0e10cSrcweir // get rid of that dumb compiler warning 44cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information 45cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created 46cdf0e10cSrcweir #pragma warning( disable: 4786 ) 47cdf0e10cSrcweir #endif 48cdf0e10cSrcweir 49cdf0e10cSrcweir #include <string.h> 50cdf0e10cSrcweir 51cdf0e10cSrcweir using namespace ::com::sun::star::util; 52cdf0e10cSrcweir using namespace ::com::sun::star::uno; 53cdf0e10cSrcweir using namespace ::com::sun::star::lang; 54cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 55cdf0e10cSrcweir using namespace ::rtl; 56cdf0e10cSrcweir 57cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP = 58cdf0e10cSrcweir TransliterationModules_ignoreBaFa_ja_JP | 59cdf0e10cSrcweir TransliterationModules_ignoreIterationMark_ja_JP | 60cdf0e10cSrcweir TransliterationModules_ignoreTiJi_ja_JP | 61cdf0e10cSrcweir TransliterationModules_ignoreHyuByu_ja_JP | 62cdf0e10cSrcweir TransliterationModules_ignoreSeZe_ja_JP | 63cdf0e10cSrcweir TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 64cdf0e10cSrcweir TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 65cdf0e10cSrcweir TransliterationModules_ignoreProlongedSoundMark_ja_JP; 66cc450e3aSHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH) | TransliterationModules_FULLWIDTH_HALFWIDTH; 67cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH; 68cdf0e10cSrcweir // Above 2 transliteration is simple but need to take effect in 69cdf0e10cSrcweir // complex transliteration 70cdf0e10cSrcweir 71cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 72cdf0e10cSrcweir : xMSF( rxMSF ) 73cdf0e10cSrcweir , pJumpTable( 0 ) 74cdf0e10cSrcweir , pJumpTable2( 0 ) 75cc450e3aSHerbert Dürr , pRegexMatcher( NULL ) 76cdf0e10cSrcweir , pWLD( 0 ) 77cdf0e10cSrcweir { 78cdf0e10cSrcweir SearchOptions aOpt; 79cdf0e10cSrcweir aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 80cdf0e10cSrcweir aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 81cdf0e10cSrcweir //aOpt.Locale = ???; 82cdf0e10cSrcweir setOptions( aOpt ); 83cdf0e10cSrcweir } 84cdf0e10cSrcweir 85cdf0e10cSrcweir TextSearch::~TextSearch() 86cdf0e10cSrcweir { 87cc450e3aSHerbert Dürr delete pRegexMatcher; 88cdf0e10cSrcweir delete pWLD; 89cdf0e10cSrcweir delete pJumpTable; 90cdf0e10cSrcweir delete pJumpTable2; 91cdf0e10cSrcweir } 92cdf0e10cSrcweir 93cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 94cdf0e10cSrcweir { 95cdf0e10cSrcweir aSrchPara = rOptions; 96cdf0e10cSrcweir 97cc450e3aSHerbert Dürr delete pRegexMatcher, pRegexMatcher = NULL; 98cdf0e10cSrcweir delete pWLD, pWLD = 0; 99cdf0e10cSrcweir delete pJumpTable, pJumpTable = 0; 100cdf0e10cSrcweir delete pJumpTable2, pJumpTable2 = 0; 101cdf0e10cSrcweir 102cdf0e10cSrcweir // Create Transliteration class 103cdf0e10cSrcweir if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 104cdf0e10cSrcweir { 105cdf0e10cSrcweir if( !xTranslit.is() ) 106cdf0e10cSrcweir { 107cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 108cdf0e10cSrcweir OUString::createFromAscii( 109cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 110cdf0e10cSrcweir if ( xI.is() ) 111cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 112cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 113cdf0e10cSrcweir >>= xTranslit; 114cdf0e10cSrcweir } 115cdf0e10cSrcweir // Load transliteration module 116cdf0e10cSrcweir if( xTranslit.is() ) 117cdf0e10cSrcweir xTranslit->loadModule( 118cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 119cdf0e10cSrcweir aSrchPara.Locale); 120cdf0e10cSrcweir } 121cdf0e10cSrcweir else if( xTranslit.is() ) 122cdf0e10cSrcweir xTranslit = 0; 123cdf0e10cSrcweir 124cdf0e10cSrcweir // Create Transliteration for 2<->1, 2<->2 transliteration 125cdf0e10cSrcweir if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 126cdf0e10cSrcweir { 127cdf0e10cSrcweir if( !xTranslit2.is() ) 128cdf0e10cSrcweir { 129cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 130cdf0e10cSrcweir OUString::createFromAscii( 131cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 132cdf0e10cSrcweir if ( xI.is() ) 133cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 134cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 135cdf0e10cSrcweir >>= xTranslit2; 136cdf0e10cSrcweir } 137cdf0e10cSrcweir // Load transliteration module 138cdf0e10cSrcweir if( xTranslit2.is() ) 139cdf0e10cSrcweir xTranslit2->loadModule( 140cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 141cdf0e10cSrcweir aSrchPara.Locale); 142cdf0e10cSrcweir } 143cdf0e10cSrcweir 144cdf0e10cSrcweir if ( !xBreak.is() ) 145cdf0e10cSrcweir { 146cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 147cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 148cdf0e10cSrcweir if( xI.is() ) 149cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 150cdf0e10cSrcweir (const Reference< XBreakIterator >*)0)) 151cdf0e10cSrcweir >>= xBreak; 152cdf0e10cSrcweir } 153cdf0e10cSrcweir 154cdf0e10cSrcweir sSrchStr = aSrchPara.searchString; 155cdf0e10cSrcweir 156cc450e3aSHerbert Dürr // use transliteration here 157cc450e3aSHerbert Dürr if ( xTranslit.is() && 158cdf0e10cSrcweir aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 159cdf0e10cSrcweir sSrchStr = xTranslit->transliterateString2String( 160cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 161cdf0e10cSrcweir 162cc450e3aSHerbert Dürr if ( xTranslit2.is() && 163cdf0e10cSrcweir aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 164cdf0e10cSrcweir sSrchStr2 = xTranslit2->transliterateString2String( 165cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 166cdf0e10cSrcweir 167cdf0e10cSrcweir // When start or end of search string is a complex script type, we need to 168cdf0e10cSrcweir // make sure the result boundary is not located in the middle of cell. 169cdf0e10cSrcweir checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 170cdf0e10cSrcweir ScriptType::COMPLEX)); 171cdf0e10cSrcweir checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 172cdf0e10cSrcweir sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 173cdf0e10cSrcweir 174cc450e3aSHerbert Dürr switch( aSrchPara.algorithmType) 175cdf0e10cSrcweir { 176cc450e3aSHerbert Dürr case SearchAlgorithms_REGEXP: 177cc450e3aSHerbert Dürr fnForward = &TextSearch::RESrchFrwrd; 178cc450e3aSHerbert Dürr fnBackward = &TextSearch::RESrchBkwrd; 179cc450e3aSHerbert Dürr 180cc450e3aSHerbert Dürr { 181cc450e3aSHerbert Dürr sal_uInt32 nIcuSearchFlags = 0; 182cc450e3aSHerbert Dürr // map com::sun::star::util::SearchFlags to ICU uregex.h flags 183cc450e3aSHerbert Dürr // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE 184cc450e3aSHerbert Dürr // REG_NEWLINE is neither defined properly nor used anywhere => not implemented 185cc450e3aSHerbert Dürr // REG_NOSUB is not used anywhere => not implemented 186cc450e3aSHerbert Dürr // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute 187cc450e3aSHerbert Dürr // LEV_RELAXED is only used for SearchAlgorithm==Approximate 188cc450e3aSHerbert Dürr // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it??? 189cc450e3aSHerbert Dürr if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0) 190cc450e3aSHerbert Dürr nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE; 191cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 192cc450e3aSHerbert Dürr // assumption: transliteration doesn't mangle regexp control chars 193cc450e3aSHerbert Dürr OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr 194cc450e3aSHerbert Dürr : ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString); 195cc450e3aSHerbert Dürr const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength()); 196cc450e3aSHerbert Dürr pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); 197cc450e3aSHerbert Dürr if( nIcuErr) 198cc450e3aSHerbert Dürr { delete pRegexMatcher; pRegexMatcher = NULL;} 199cc450e3aSHerbert Dürr } break; 200cc450e3aSHerbert Dürr 201cc450e3aSHerbert Dürr case SearchAlgorithms_APPROXIMATE: 202cdf0e10cSrcweir fnForward = &TextSearch::ApproxSrchFrwrd; 203cdf0e10cSrcweir fnBackward = &TextSearch::ApproxSrchBkwrd; 204cdf0e10cSrcweir 205cdf0e10cSrcweir pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 206cdf0e10cSrcweir aSrchPara.insertedChars, aSrchPara.deletedChars, 207cdf0e10cSrcweir 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 208cdf0e10cSrcweir 209cdf0e10cSrcweir nLimit = pWLD->GetLimit(); 210cc450e3aSHerbert Dürr break; 211cc450e3aSHerbert Dürr 212cc450e3aSHerbert Dürr default: 213cdf0e10cSrcweir fnForward = &TextSearch::NSrchFrwrd; 214cdf0e10cSrcweir fnBackward = &TextSearch::NSrchBkwrd; 215cc450e3aSHerbert Dürr break; 216cdf0e10cSrcweir } 217cdf0e10cSrcweir } 218cdf0e10cSrcweir 219cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 220cdf0e10cSrcweir { 221cdf0e10cSrcweir sal_Int32 nRet = 0, nEnd = rOff.getLength(); 222cdf0e10cSrcweir while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 223cdf0e10cSrcweir return nRet; 224cdf0e10cSrcweir } 225cdf0e10cSrcweir 226cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 227cdf0e10cSrcweir throw( RuntimeException ) 228cdf0e10cSrcweir { 229cdf0e10cSrcweir sal_Int32 nDone; 230cdf0e10cSrcweir return nPos == xBreak->previousCharacters(searchStr, nPos+1, 231cdf0e10cSrcweir aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 232cdf0e10cSrcweir } 233cdf0e10cSrcweir 234cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 235cdf0e10cSrcweir throw( RuntimeException ) 236cdf0e10cSrcweir { 237cdf0e10cSrcweir SearchResult sres; 238cdf0e10cSrcweir 239cdf0e10cSrcweir OUString in_str(searchStr); 240cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 241cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 242cdf0e10cSrcweir 243cdf0e10cSrcweir bUsePrimarySrchStr = true; 244cdf0e10cSrcweir 245cdf0e10cSrcweir if ( xTranslit.is() ) 246cdf0e10cSrcweir { 247cdf0e10cSrcweir // apply normal transliteration (1<->1, 1<->0) 248cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 249cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 250cdf0e10cSrcweir 251cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 252cdf0e10cSrcweir if( startPos ) 253cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 254cdf0e10cSrcweir 255cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 256cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 257cdf0e10cSrcweir else 258cdf0e10cSrcweir newEndPos = in_str.getLength(); 259cdf0e10cSrcweir 260cdf0e10cSrcweir sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 261cdf0e10cSrcweir 262cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 263cdf0e10cSrcweir { 264cdf0e10cSrcweir if (sres.startOffset[k]) 265cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k]]; 266cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 267cdf0e10cSrcweir // the position of the next character - return the 268cdf0e10cSrcweir // next position behind the last found character! 269cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 270cdf0e10cSrcweir if (sres.endOffset[k]) 271cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 272cdf0e10cSrcweir } 273cdf0e10cSrcweir } 274cdf0e10cSrcweir else 275cdf0e10cSrcweir { 276cdf0e10cSrcweir sres = (this->*fnForward)( in_str, startPos, endPos ); 277cdf0e10cSrcweir } 278cdf0e10cSrcweir 279cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 280cdf0e10cSrcweir { 281cdf0e10cSrcweir SearchResult sres2; 282cdf0e10cSrcweir 283cdf0e10cSrcweir in_str = OUString(searchStr); 284cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 285cdf0e10cSrcweir 286cdf0e10cSrcweir in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 287cdf0e10cSrcweir 288cdf0e10cSrcweir if( startPos ) 289cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 290cdf0e10cSrcweir 291cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 292cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 293cdf0e10cSrcweir else 294cdf0e10cSrcweir endPos = in_str.getLength(); 295cdf0e10cSrcweir 296cdf0e10cSrcweir bUsePrimarySrchStr = false; 297cdf0e10cSrcweir sres2 = (this->*fnForward)( in_str, startPos, endPos ); 298cdf0e10cSrcweir 299cdf0e10cSrcweir for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 300cdf0e10cSrcweir { 301cdf0e10cSrcweir if (sres2.startOffset[k]) 302cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 303cdf0e10cSrcweir if (sres2.endOffset[k]) 304cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 305cdf0e10cSrcweir } 306cdf0e10cSrcweir 307cdf0e10cSrcweir // pick first and long one 308cdf0e10cSrcweir if ( sres.subRegExpressions == 0) 309cdf0e10cSrcweir return sres2; 310cdf0e10cSrcweir if ( sres2.subRegExpressions == 1) 311cdf0e10cSrcweir { 312cdf0e10cSrcweir if ( sres.startOffset[0] > sres2.startOffset[0]) 313cdf0e10cSrcweir return sres2; 314cdf0e10cSrcweir else if ( sres.startOffset[0] == sres2.startOffset[0] && 315cdf0e10cSrcweir sres.endOffset[0] < sres2.endOffset[0]) 316cdf0e10cSrcweir return sres2; 317cdf0e10cSrcweir } 318cdf0e10cSrcweir } 319cdf0e10cSrcweir 320cdf0e10cSrcweir return sres; 321cdf0e10cSrcweir } 322cdf0e10cSrcweir 323cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 324cdf0e10cSrcweir throw(RuntimeException) 325cdf0e10cSrcweir { 326cdf0e10cSrcweir SearchResult sres; 327cdf0e10cSrcweir 328cdf0e10cSrcweir OUString in_str(searchStr); 329cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 330cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 331cdf0e10cSrcweir 332cdf0e10cSrcweir bUsePrimarySrchStr = true; 333cdf0e10cSrcweir 334cdf0e10cSrcweir if ( xTranslit.is() ) 335cdf0e10cSrcweir { 336cdf0e10cSrcweir // apply only simple 1<->1 transliteration here 337cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 338cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 339cdf0e10cSrcweir 340cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 341cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 342cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 343cdf0e10cSrcweir else 344cdf0e10cSrcweir newStartPos = in_str.getLength(); 345cdf0e10cSrcweir 346cdf0e10cSrcweir if( endPos ) 347cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 348cdf0e10cSrcweir 349cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 350cdf0e10cSrcweir 351cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 352cdf0e10cSrcweir { 353cdf0e10cSrcweir if (sres.startOffset[k]) 354cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 355cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 356cdf0e10cSrcweir // the position of the next character - return the 357cdf0e10cSrcweir // next position behind the last found character! 358cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 359cdf0e10cSrcweir if (sres.endOffset[k]) 360cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]]; 361cdf0e10cSrcweir } 362cdf0e10cSrcweir } 363cdf0e10cSrcweir else 364cdf0e10cSrcweir { 365cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, startPos, endPos ); 366cdf0e10cSrcweir } 367cdf0e10cSrcweir 368cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 369cdf0e10cSrcweir { 370cdf0e10cSrcweir SearchResult sres2; 371cdf0e10cSrcweir 372cdf0e10cSrcweir in_str = OUString(searchStr); 373cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 374cdf0e10cSrcweir 375cdf0e10cSrcweir in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 376cdf0e10cSrcweir 377cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 378cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 379cdf0e10cSrcweir else 380cdf0e10cSrcweir startPos = in_str.getLength(); 381cdf0e10cSrcweir 382cdf0e10cSrcweir if( endPos ) 383cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 384cdf0e10cSrcweir 385cdf0e10cSrcweir bUsePrimarySrchStr = false; 386cdf0e10cSrcweir sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 387cdf0e10cSrcweir 388cdf0e10cSrcweir for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 389cdf0e10cSrcweir { 390cdf0e10cSrcweir if (sres2.startOffset[k]) 391cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 392cdf0e10cSrcweir if (sres2.endOffset[k]) 393cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 394cdf0e10cSrcweir } 395cdf0e10cSrcweir 396cdf0e10cSrcweir // pick last and long one 397cdf0e10cSrcweir if ( sres.subRegExpressions == 0 ) 398cdf0e10cSrcweir return sres2; 399cdf0e10cSrcweir if ( sres2.subRegExpressions == 1 ) 400cdf0e10cSrcweir { 401cdf0e10cSrcweir if ( sres.startOffset[0] < sres2.startOffset[0] ) 402cdf0e10cSrcweir return sres2; 403cdf0e10cSrcweir if ( sres.startOffset[0] == sres2.startOffset[0] && 404cdf0e10cSrcweir sres.endOffset[0] > sres2.endOffset[0] ) 405cdf0e10cSrcweir return sres2; 406cdf0e10cSrcweir } 407cdf0e10cSrcweir } 408cdf0e10cSrcweir 409cdf0e10cSrcweir return sres; 410cdf0e10cSrcweir } 411cdf0e10cSrcweir 412cc450e3aSHerbert Dürr //--------------------------------------------------------------------- 413cdf0e10cSrcweir 414cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 415cdf0e10cSrcweir { 416cdf0e10cSrcweir bool bRet = 1; 417cdf0e10cSrcweir if( '\x7f' != rStr[nPos]) 418cdf0e10cSrcweir { 419cdf0e10cSrcweir if ( !xCharClass.is() ) 420cdf0e10cSrcweir { 421cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 422cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 423cdf0e10cSrcweir if( xI.is() ) 424cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 425cdf0e10cSrcweir (const Reference< XCharacterClassification >*)0)) 426cdf0e10cSrcweir >>= xCharClass; 427cdf0e10cSrcweir } 428cdf0e10cSrcweir if ( xCharClass.is() ) 429cdf0e10cSrcweir { 430cdf0e10cSrcweir sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 431cdf0e10cSrcweir aSrchPara.Locale ); 432cdf0e10cSrcweir if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 433cdf0e10cSrcweir KCharacterType::LETTER ) & nCType ) ) 434cdf0e10cSrcweir bRet = 0; 435cdf0e10cSrcweir } 436cdf0e10cSrcweir } 437cdf0e10cSrcweir return bRet; 438cdf0e10cSrcweir } 439cdf0e10cSrcweir 440cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ---------- 441cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available 442cdf0e10cSrcweir 443cdf0e10cSrcweir void TextSearch::MakeForwardTab() 444cdf0e10cSrcweir { 445cdf0e10cSrcweir // create the jumptable for the search text 446cdf0e10cSrcweir if( pJumpTable ) 447cdf0e10cSrcweir { 448cdf0e10cSrcweir if( bIsForwardTab ) 449cdf0e10cSrcweir return ; // the jumpTable is ok 450cdf0e10cSrcweir delete pJumpTable; 451cdf0e10cSrcweir } 452cdf0e10cSrcweir bIsForwardTab = true; 453cdf0e10cSrcweir 454cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 455cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 456cdf0e10cSrcweir 457cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 458cdf0e10cSrcweir { 459cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 460cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 461cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 462cdf0e10cSrcweir 463cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 464cdf0e10cSrcweir pJumpTable->insert( aEntry ); 465cdf0e10cSrcweir if ( !aPair.second ) 466cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 467cdf0e10cSrcweir } 468cdf0e10cSrcweir } 469cdf0e10cSrcweir 470cdf0e10cSrcweir void TextSearch::MakeForwardTab2() 471cdf0e10cSrcweir { 472cdf0e10cSrcweir // create the jumptable for the search text 473cdf0e10cSrcweir if( pJumpTable2 ) 474cdf0e10cSrcweir { 475cdf0e10cSrcweir if( bIsForwardTab ) 476cdf0e10cSrcweir return ; // the jumpTable is ok 477cdf0e10cSrcweir delete pJumpTable2; 478cdf0e10cSrcweir } 479cdf0e10cSrcweir bIsForwardTab = true; 480cdf0e10cSrcweir 481cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 482cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 483cdf0e10cSrcweir 484cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 485cdf0e10cSrcweir { 486cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 487cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 488cdf0e10cSrcweir 489cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 490cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 491cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 492cdf0e10cSrcweir if ( !aPair.second ) 493cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 494cdf0e10cSrcweir } 495cdf0e10cSrcweir } 496cdf0e10cSrcweir 497cdf0e10cSrcweir void TextSearch::MakeBackwardTab() 498cdf0e10cSrcweir { 499cdf0e10cSrcweir // create the jumptable for the search text 500cdf0e10cSrcweir if( pJumpTable ) 501cdf0e10cSrcweir { 502cdf0e10cSrcweir if( !bIsForwardTab ) 503cdf0e10cSrcweir return ; // the jumpTable is ok 504cdf0e10cSrcweir delete pJumpTable; 505cdf0e10cSrcweir } 506cdf0e10cSrcweir bIsForwardTab = false; 507cdf0e10cSrcweir 508cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 509cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 510cdf0e10cSrcweir 511cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 512cdf0e10cSrcweir { 513cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 514cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 515cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 516cdf0e10cSrcweir pJumpTable->insert( aEntry ); 517cdf0e10cSrcweir if ( !aPair.second ) 518cdf0e10cSrcweir (*(aPair.first)).second = n; 519cdf0e10cSrcweir } 520cdf0e10cSrcweir } 521cdf0e10cSrcweir 522cdf0e10cSrcweir void TextSearch::MakeBackwardTab2() 523cdf0e10cSrcweir { 524cdf0e10cSrcweir // create the jumptable for the search text 525cdf0e10cSrcweir if( pJumpTable2 ) 526cdf0e10cSrcweir { 527cdf0e10cSrcweir if( !bIsForwardTab ) 528cdf0e10cSrcweir return ; // the jumpTable is ok 529cdf0e10cSrcweir delete pJumpTable2; 530cdf0e10cSrcweir } 531cdf0e10cSrcweir bIsForwardTab = false; 532cdf0e10cSrcweir 533cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 534cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 535cdf0e10cSrcweir 536cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 537cdf0e10cSrcweir { 538cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 539cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 540cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 541cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 542cdf0e10cSrcweir if ( !aPair.second ) 543cdf0e10cSrcweir (*(aPair.first)).second = n; 544cdf0e10cSrcweir } 545cdf0e10cSrcweir } 546cdf0e10cSrcweir 547cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 548cdf0e10cSrcweir { 549cdf0e10cSrcweir TextSearchJumpTable *pJump; 550cdf0e10cSrcweir OUString sSearchKey; 551cdf0e10cSrcweir 552cdf0e10cSrcweir if ( bUsePrimarySrchStr ) { 553cdf0e10cSrcweir pJump = pJumpTable; 554cdf0e10cSrcweir sSearchKey = sSrchStr; 555cdf0e10cSrcweir } else { 556cdf0e10cSrcweir pJump = pJumpTable2; 557cdf0e10cSrcweir sSearchKey = sSrchStr2; 558cdf0e10cSrcweir } 559cdf0e10cSrcweir 560cdf0e10cSrcweir TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 561cdf0e10cSrcweir if ( iLook == pJump->end() ) 562cdf0e10cSrcweir return sSearchKey.getLength(); 563cdf0e10cSrcweir return (*iLook).second; 564cdf0e10cSrcweir } 565cdf0e10cSrcweir 566cdf0e10cSrcweir 567cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 568cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 569cdf0e10cSrcweir throw(RuntimeException) 570cdf0e10cSrcweir { 571cdf0e10cSrcweir SearchResult aRet; 572cdf0e10cSrcweir aRet.subRegExpressions = 0; 573cdf0e10cSrcweir 574cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 575cdf0e10cSrcweir 576cdf0e10cSrcweir OUString aStr( searchStr ); 577cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 578cdf0e10cSrcweir sal_Int32 nEnde = endPos; 579cdf0e10cSrcweir if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 580cdf0e10cSrcweir return aRet; 581cdf0e10cSrcweir 582cdf0e10cSrcweir 583cdf0e10cSrcweir if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 584cdf0e10cSrcweir return aRet; 585cdf0e10cSrcweir 586cdf0e10cSrcweir nEnde -= sSearchKey.getLength(); 587cdf0e10cSrcweir 588cdf0e10cSrcweir if (bUsePrimarySrchStr) 589cdf0e10cSrcweir MakeForwardTab(); // create the jumptable 590cdf0e10cSrcweir else 591cdf0e10cSrcweir MakeForwardTab2(); 592cdf0e10cSrcweir 593cdf0e10cSrcweir for (sal_Int32 nCmpIdx = startPos; // start position for the search 594cdf0e10cSrcweir nCmpIdx <= nEnde; 595cdf0e10cSrcweir nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 596cdf0e10cSrcweir { 597cdf0e10cSrcweir // if the match would be the completed cells, skip it. 598cdf0e10cSrcweir if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 599cdf0e10cSrcweir && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 600cdf0e10cSrcweir continue; 601cdf0e10cSrcweir 602cdf0e10cSrcweir nSuchIdx = sSearchKey.getLength() - 1; 603cdf0e10cSrcweir while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 604cdf0e10cSrcweir { 605cdf0e10cSrcweir if( nSuchIdx == 0 ) 606cdf0e10cSrcweir { 607cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 608cdf0e10cSrcweir { 609cdf0e10cSrcweir sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 610cdf0e10cSrcweir bool bAtStart = !nCmpIdx; 611cdf0e10cSrcweir bool bAtEnd = nFndEnd == endPos; 612cdf0e10cSrcweir bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 613cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 614cdf0e10cSrcweir // * 1 -> only one word in the paragraph 615cdf0e10cSrcweir // * 2 -> at begin of paragraph 616cdf0e10cSrcweir // * 3 -> at end of paragraph 617cdf0e10cSrcweir // * 4 -> inside the paragraph 618cdf0e10cSrcweir if( !( ( bAtStart && bAtEnd ) || // 1 619cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 620cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 621cdf0e10cSrcweir ( bDelimBefore && bDelimBehind ))) // 4 622cdf0e10cSrcweir break; 623cdf0e10cSrcweir } 624cdf0e10cSrcweir 625cdf0e10cSrcweir aRet.subRegExpressions = 1; 626cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 627cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 628cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 629cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 630cdf0e10cSrcweir 631cdf0e10cSrcweir return aRet; 632cdf0e10cSrcweir } 633cdf0e10cSrcweir else 634cdf0e10cSrcweir nSuchIdx--; 635cdf0e10cSrcweir } 636cdf0e10cSrcweir } 637cdf0e10cSrcweir return aRet; 638cdf0e10cSrcweir } 639cdf0e10cSrcweir 640cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 641cdf0e10cSrcweir throw(RuntimeException) 642cdf0e10cSrcweir { 643cdf0e10cSrcweir SearchResult aRet; 644cdf0e10cSrcweir aRet.subRegExpressions = 0; 645cdf0e10cSrcweir 646cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 647cdf0e10cSrcweir 648cdf0e10cSrcweir OUString aStr( searchStr ); 649cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 650cdf0e10cSrcweir sal_Int32 nEnde = endPos; 651cdf0e10cSrcweir if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 652cdf0e10cSrcweir return aRet; 653cdf0e10cSrcweir 654cdf0e10cSrcweir if (bUsePrimarySrchStr) 655cdf0e10cSrcweir MakeBackwardTab(); // create the jumptable 656cdf0e10cSrcweir else 657cdf0e10cSrcweir MakeBackwardTab2(); 658cdf0e10cSrcweir 659cdf0e10cSrcweir if( nEnde == nSuchIdx ) // end position for the search 660cdf0e10cSrcweir nEnde = sSearchKey.getLength(); 661cdf0e10cSrcweir else 662cdf0e10cSrcweir nEnde += sSearchKey.getLength(); 663cdf0e10cSrcweir 664cdf0e10cSrcweir sal_Int32 nCmpIdx = startPos; // start position for the search 665cdf0e10cSrcweir 666cdf0e10cSrcweir while (nCmpIdx >= nEnde) 667cdf0e10cSrcweir { 668cdf0e10cSrcweir // if the match would be the completed cells, skip it. 669cdf0e10cSrcweir if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 670cdf0e10cSrcweir sSearchKey.getLength() )) && (!checkCTLEnd || 671cdf0e10cSrcweir isCellStart( aStr, nCmpIdx))) 672cdf0e10cSrcweir { 673cdf0e10cSrcweir nSuchIdx = 0; 674cdf0e10cSrcweir while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 675cdf0e10cSrcweir aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 676cdf0e10cSrcweir nSuchIdx++; 677cdf0e10cSrcweir if( nSuchIdx >= sSearchKey.getLength() ) 678cdf0e10cSrcweir { 679cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 680cdf0e10cSrcweir { 681cdf0e10cSrcweir sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 682cdf0e10cSrcweir bool bAtStart = !nFndStt; 683cdf0e10cSrcweir bool bAtEnd = nCmpIdx == startPos; 684cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 685cdf0e10cSrcweir bool bDelimBefore = bAtStart || // begin of paragraph 686cdf0e10cSrcweir IsDelimiter( aStr, nFndStt-1 ); 687cdf0e10cSrcweir // * 1 -> only one word in the paragraph 688cdf0e10cSrcweir // * 2 -> at begin of paragraph 689cdf0e10cSrcweir // * 3 -> at end of paragraph 690cdf0e10cSrcweir // * 4 -> inside the paragraph 691cdf0e10cSrcweir if( ( bAtStart && bAtEnd ) || // 1 692cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 693cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 694cdf0e10cSrcweir ( bDelimBefore && bDelimBehind )) // 4 695cdf0e10cSrcweir { 696cdf0e10cSrcweir aRet.subRegExpressions = 1; 697cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 698cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 699cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 700cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 701cdf0e10cSrcweir return aRet; 702cdf0e10cSrcweir } 703cdf0e10cSrcweir } 704cdf0e10cSrcweir else 705cdf0e10cSrcweir { 706cdf0e10cSrcweir aRet.subRegExpressions = 1; 707cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 708cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 709cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 710cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 711cdf0e10cSrcweir return aRet; 712cdf0e10cSrcweir } 713cdf0e10cSrcweir } 714cdf0e10cSrcweir } 715cdf0e10cSrcweir nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 716cdf0e10cSrcweir if( nCmpIdx < nSuchIdx ) 717cdf0e10cSrcweir return aRet; 718cdf0e10cSrcweir nCmpIdx -= nSuchIdx; 719cdf0e10cSrcweir } 720cdf0e10cSrcweir return aRet; 721cdf0e10cSrcweir } 722cdf0e10cSrcweir 723cdf0e10cSrcweir //--------------------------------------------------------------------------- 724cdf0e10cSrcweir 725cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 726cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 727cdf0e10cSrcweir throw(RuntimeException) 728cdf0e10cSrcweir { 729cc450e3aSHerbert Dürr SearchResult aRet; 730cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 731cc450e3aSHerbert Dürr if( !pRegexMatcher) 732cc450e3aSHerbert Dürr return aRet; 733cc450e3aSHerbert Dürr 734cc450e3aSHerbert Dürr if( endPos > searchStr.getLength()) 735cc450e3aSHerbert Dürr endPos = searchStr.getLength(); 736cc450e3aSHerbert Dürr 737cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 738cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 739cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos); 740cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 741cc450e3aSHerbert Dürr if( !pRegexMatcher->find( startPos, nIcuErr)) 742cc450e3aSHerbert Dürr return aRet; 743cc450e3aSHerbert Dürr 744cc450e3aSHerbert Dürr aRet.subRegExpressions = 1; 745cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 746cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 747cc450e3aSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->start( nIcuErr); 748cc450e3aSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->end( nIcuErr); 749cc450e3aSHerbert Dürr 750cc450e3aSHerbert Dürr return aRet; 751cdf0e10cSrcweir } 752cdf0e10cSrcweir 753cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 754cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 755cdf0e10cSrcweir throw(RuntimeException) 756cdf0e10cSrcweir { 757cc450e3aSHerbert Dürr // NOTE: for backwards search callers provide startPos/endPos inverted! 758cc450e3aSHerbert Dürr SearchResult aRet; 759cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 760cc450e3aSHerbert Dürr if( !pRegexMatcher) 761cc450e3aSHerbert Dürr return aRet; 762cc450e3aSHerbert Dürr 763cc450e3aSHerbert Dürr if( startPos > searchStr.getLength()) 764cc450e3aSHerbert Dürr startPos = searchStr.getLength(); 765cc450e3aSHerbert Dürr 766cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 767cc450e3aSHerbert Dürr // TODO: use ICU's backward searching once it becomes available 768cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 769cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos); 770cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 771cc450e3aSHerbert Dürr if( !pRegexMatcher->find( endPos, nIcuErr)) 772cc450e3aSHerbert Dürr return aRet; 773cc450e3aSHerbert Dürr 774cc450e3aSHerbert Dürr aRet.subRegExpressions = 1; 775cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 776cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 777cc450e3aSHerbert Dürr 778cc450e3aSHerbert Dürr do { 779cc450e3aSHerbert Dürr // NOTE: backward search seems to be expected to have startOfs/endOfs inverted! 780cc450e3aSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->end( nIcuErr); 781cc450e3aSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->start( nIcuErr); 782cc450e3aSHerbert Dürr } while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr)); 783cc450e3aSHerbert Dürr 784cc450e3aSHerbert Dürr return aRet; 785cdf0e10cSrcweir } 786cdf0e10cSrcweir 787cc450e3aSHerbert Dürr //--------------------------------------------------------------------------- 788cc450e3aSHerbert Dürr 789cc450e3aSHerbert Dürr // search for words phonetically 790cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 791cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 792cdf0e10cSrcweir throw(RuntimeException) 793cdf0e10cSrcweir { 794cdf0e10cSrcweir SearchResult aRet; 795cdf0e10cSrcweir aRet.subRegExpressions = 0; 796cdf0e10cSrcweir 797cdf0e10cSrcweir if( !xBreak.is() ) 798cdf0e10cSrcweir return aRet; 799cdf0e10cSrcweir 800cdf0e10cSrcweir OUString aWTemp( searchStr ); 801cdf0e10cSrcweir 802cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 803cdf0e10cSrcweir 804cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 805cdf0e10cSrcweir aSrchPara.Locale, 806cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 807cdf0e10cSrcweir 808cdf0e10cSrcweir do 809cdf0e10cSrcweir { 810cdf0e10cSrcweir if( aWBnd.startPos >= endPos ) 811cdf0e10cSrcweir break; 812cdf0e10cSrcweir nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 813cdf0e10cSrcweir nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 814cdf0e10cSrcweir 815cdf0e10cSrcweir if( nStt < nEnd && 816cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 817cdf0e10cSrcweir { 818cdf0e10cSrcweir aRet.subRegExpressions = 1; 819cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 820cdf0e10cSrcweir aRet.startOffset[ 0 ] = nStt; 821cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 822cdf0e10cSrcweir aRet.endOffset[ 0 ] = nEnd; 823cdf0e10cSrcweir break; 824cdf0e10cSrcweir } 825cdf0e10cSrcweir 826cdf0e10cSrcweir nStt = nEnd - 1; 827cdf0e10cSrcweir aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 828cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 829cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || 830cdf0e10cSrcweir (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 831cdf0e10cSrcweir // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 832cdf0e10cSrcweir // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 833cdf0e10cSrcweir // and nextWord() does also => don't loop forever. 834cdf0e10cSrcweir return aRet; 835cdf0e10cSrcweir } 836cdf0e10cSrcweir 837cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 838cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 839cdf0e10cSrcweir throw(RuntimeException) 840cdf0e10cSrcweir { 841cdf0e10cSrcweir SearchResult aRet; 842cdf0e10cSrcweir aRet.subRegExpressions = 0; 843cdf0e10cSrcweir 844cdf0e10cSrcweir if( !xBreak.is() ) 845cdf0e10cSrcweir return aRet; 846cdf0e10cSrcweir 847cdf0e10cSrcweir OUString aWTemp( searchStr ); 848cdf0e10cSrcweir 849cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 850cdf0e10cSrcweir 851cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 852cdf0e10cSrcweir aSrchPara.Locale, 853cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 854cdf0e10cSrcweir 855cdf0e10cSrcweir do 856cdf0e10cSrcweir { 857cdf0e10cSrcweir if( aWBnd.endPos <= endPos ) 858cdf0e10cSrcweir break; 859cdf0e10cSrcweir nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 860cdf0e10cSrcweir nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 861cdf0e10cSrcweir 862cdf0e10cSrcweir if( nStt < nEnd && 863cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 864cdf0e10cSrcweir { 865cdf0e10cSrcweir aRet.subRegExpressions = 1; 866cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 867cdf0e10cSrcweir aRet.startOffset[ 0 ] = nEnd; 868cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 869cdf0e10cSrcweir aRet.endOffset[ 0 ] = nStt; 870cdf0e10cSrcweir break; 871cdf0e10cSrcweir } 872cdf0e10cSrcweir if( !nStt ) 873cdf0e10cSrcweir break; 874cdf0e10cSrcweir 875cdf0e10cSrcweir aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 876cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 877cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 878cdf0e10cSrcweir return aRet; 879cdf0e10cSrcweir } 880cdf0e10cSrcweir 881cdf0e10cSrcweir 882cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 883cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 884cdf0e10cSrcweir 885cdf0e10cSrcweir static OUString getServiceName_Static() 886cdf0e10cSrcweir { 887cdf0e10cSrcweir return OUString::createFromAscii( cSearchName ); 888cdf0e10cSrcweir } 889cdf0e10cSrcweir 890cdf0e10cSrcweir static OUString getImplementationName_Static() 891cdf0e10cSrcweir { 892cdf0e10cSrcweir return OUString::createFromAscii( cSearchImpl ); 893cdf0e10cSrcweir } 894cdf0e10cSrcweir 895cdf0e10cSrcweir OUString SAL_CALL 896cdf0e10cSrcweir TextSearch::getImplementationName() 897cdf0e10cSrcweir throw( RuntimeException ) 898cdf0e10cSrcweir { 899cdf0e10cSrcweir return getImplementationName_Static(); 900cdf0e10cSrcweir } 901cdf0e10cSrcweir 902cdf0e10cSrcweir sal_Bool SAL_CALL 903cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName) 904cdf0e10cSrcweir throw( RuntimeException ) 905cdf0e10cSrcweir { 906cdf0e10cSrcweir return !rServiceName.compareToAscii( cSearchName ); 907cdf0e10cSrcweir } 908cdf0e10cSrcweir 909cdf0e10cSrcweir Sequence< OUString > SAL_CALL 910cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 911cdf0e10cSrcweir { 912cdf0e10cSrcweir Sequence< OUString > aRet(1); 913cdf0e10cSrcweir aRet[0] = getServiceName_Static(); 914cdf0e10cSrcweir return aRet; 915cdf0e10cSrcweir } 916cdf0e10cSrcweir 917cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 918cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance( 919cdf0e10cSrcweir const ::com::sun::star::uno::Reference< 920cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 921cdf0e10cSrcweir { 922cdf0e10cSrcweir return ::com::sun::star::uno::Reference< 923cdf0e10cSrcweir ::com::sun::star::uno::XInterface >( 924cdf0e10cSrcweir (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 925cdf0e10cSrcweir } 926cdf0e10cSrcweir 927cdf0e10cSrcweir extern "C" 928cdf0e10cSrcweir { 929cdf0e10cSrcweir 930cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment( 931cdf0e10cSrcweir const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 932cdf0e10cSrcweir { 933cdf0e10cSrcweir *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 934cdf0e10cSrcweir } 935cdf0e10cSrcweir 936cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 937cdf0e10cSrcweir void* _pServiceManager, void* /*_pRegistryKey*/ ) 938cdf0e10cSrcweir { 939cdf0e10cSrcweir void* pRet = NULL; 940cdf0e10cSrcweir 941cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 942cdf0e10cSrcweir reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 943cdf0e10cSrcweir ( _pServiceManager ); 944cdf0e10cSrcweir ::com::sun::star::uno::Reference< 945cdf0e10cSrcweir ::com::sun::star::lang::XSingleServiceFactory > xFactory; 946cdf0e10cSrcweir 947cdf0e10cSrcweir if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 948cdf0e10cSrcweir { 949cdf0e10cSrcweir ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 950cdf0e10cSrcweir aServiceNames[0] = getServiceName_Static(); 951cdf0e10cSrcweir xFactory = ::cppu::createSingleFactory( 952cdf0e10cSrcweir pServiceManager, getImplementationName_Static(), 953cdf0e10cSrcweir &TextSearch_CreateInstance, aServiceNames ); 954cdf0e10cSrcweir } 955cdf0e10cSrcweir 956cdf0e10cSrcweir if ( xFactory.is() ) 957cdf0e10cSrcweir { 958cdf0e10cSrcweir xFactory->acquire(); 959cdf0e10cSrcweir pRet = xFactory.get(); 960cdf0e10cSrcweir } 961cdf0e10cSrcweir 962cdf0e10cSrcweir return pRet; 963cdf0e10cSrcweir } 964cdf0e10cSrcweir 965cdf0e10cSrcweir } // extern "C" 966