1449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5449ab281SAndrew Rist * distributed with this work for additional information 6449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10449ab281SAndrew Rist * 11449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12449ab281SAndrew Rist * 13449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14449ab281SAndrew Rist * software distributed under the License is distributed on an 15449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17449ab281SAndrew Rist * specific language governing permissions and limitations 18449ab281SAndrew Rist * under the License. 19449ab281SAndrew Rist * 20449ab281SAndrew Rist *************************************************************/ 21449ab281SAndrew Rist 22449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include "textsearch.hxx" 28cdf0e10cSrcweir #include "levdis.hxx" 29cdf0e10cSrcweir #include <com/sun/star/lang/Locale.hpp> 30cdf0e10cSrcweir #include <com/sun/star/lang/XMultiServiceFactory.hpp> 31cdf0e10cSrcweir #include <comphelper/processfactory.hxx> 32cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp> 33cdf0e10cSrcweir #include <com/sun/star/util/SearchFlags.hpp> 34cdf0e10cSrcweir #include <com/sun/star/i18n/WordType.hpp> 35cdf0e10cSrcweir #include <com/sun/star/i18n/ScriptType.hpp> 36cdf0e10cSrcweir #include <com/sun/star/i18n/CharacterIteratorMode.hpp> 37cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp> 38cdf0e10cSrcweir #include <com/sun/star/registry/XRegistryKey.hpp> 39cdf0e10cSrcweir #include <cppuhelper/factory.hxx> 40cdf0e10cSrcweir #include <cppuhelper/weak.hxx> 41cdf0e10cSrcweir 42cdf0e10cSrcweir #ifdef _MSC_VER 43cdf0e10cSrcweir // get rid of that dumb compiler warning 44cdf0e10cSrcweir // identifier was truncated to '255' characters in the debug information 45cdf0e10cSrcweir // for STL template usage, if .pdb files are to be created 46cdf0e10cSrcweir #pragma warning( disable: 4786 ) 47cdf0e10cSrcweir #endif 48cdf0e10cSrcweir 49cdf0e10cSrcweir #include <string.h> 50cdf0e10cSrcweir 51cdf0e10cSrcweir using namespace ::com::sun::star::util; 52cdf0e10cSrcweir using namespace ::com::sun::star::uno; 53cdf0e10cSrcweir using namespace ::com::sun::star::lang; 54cdf0e10cSrcweir using namespace ::com::sun::star::i18n; 55cdf0e10cSrcweir using namespace ::rtl; 56cdf0e10cSrcweir 57cdf0e10cSrcweir static sal_Int32 COMPLEX_TRANS_MASK_TMP = 58cdf0e10cSrcweir TransliterationModules_ignoreBaFa_ja_JP | 59cdf0e10cSrcweir TransliterationModules_ignoreIterationMark_ja_JP | 60cdf0e10cSrcweir TransliterationModules_ignoreTiJi_ja_JP | 61cdf0e10cSrcweir TransliterationModules_ignoreHyuByu_ja_JP | 62cdf0e10cSrcweir TransliterationModules_ignoreSeZe_ja_JP | 63cdf0e10cSrcweir TransliterationModules_ignoreIandEfollowedByYa_ja_JP | 64cdf0e10cSrcweir TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | 65cdf0e10cSrcweir TransliterationModules_ignoreProlongedSoundMark_ja_JP; 66cc450e3aSHerbert Dürr static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH; 67*f33c7e39SHerbert Dürr static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK; 68cdf0e10cSrcweir // Above 2 transliteration is simple but need to take effect in 69cdf0e10cSrcweir // complex transliteration 70cdf0e10cSrcweir 71cdf0e10cSrcweir TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF) 72cdf0e10cSrcweir : xMSF( rxMSF ) 73cdf0e10cSrcweir , pJumpTable( 0 ) 74cdf0e10cSrcweir , pJumpTable2( 0 ) 75cc450e3aSHerbert Dürr , pRegexMatcher( NULL ) 76cdf0e10cSrcweir , pWLD( 0 ) 77cdf0e10cSrcweir { 78cdf0e10cSrcweir SearchOptions aOpt; 79cdf0e10cSrcweir aOpt.algorithmType = SearchAlgorithms_ABSOLUTE; 80cdf0e10cSrcweir aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE; 81cdf0e10cSrcweir //aOpt.Locale = ???; 82cdf0e10cSrcweir setOptions( aOpt ); 83cdf0e10cSrcweir } 84cdf0e10cSrcweir 85cdf0e10cSrcweir TextSearch::~TextSearch() 86cdf0e10cSrcweir { 87cc450e3aSHerbert Dürr delete pRegexMatcher; 88cdf0e10cSrcweir delete pWLD; 89cdf0e10cSrcweir delete pJumpTable; 90cdf0e10cSrcweir delete pJumpTable2; 91cdf0e10cSrcweir } 92cdf0e10cSrcweir 93cdf0e10cSrcweir void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException ) 94cdf0e10cSrcweir { 95cdf0e10cSrcweir aSrchPara = rOptions; 96cdf0e10cSrcweir 97cc450e3aSHerbert Dürr delete pRegexMatcher, pRegexMatcher = NULL; 98cdf0e10cSrcweir delete pWLD, pWLD = 0; 99cdf0e10cSrcweir delete pJumpTable, pJumpTable = 0; 100cdf0e10cSrcweir delete pJumpTable2, pJumpTable2 = 0; 101cdf0e10cSrcweir 102cdf0e10cSrcweir // Create Transliteration class 103cdf0e10cSrcweir if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 104cdf0e10cSrcweir { 105cdf0e10cSrcweir if( !xTranslit.is() ) 106cdf0e10cSrcweir { 107cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 108cdf0e10cSrcweir OUString::createFromAscii( 109cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 110cdf0e10cSrcweir if ( xI.is() ) 111cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 112cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 113cdf0e10cSrcweir >>= xTranslit; 114cdf0e10cSrcweir } 115cdf0e10cSrcweir // Load transliteration module 116cdf0e10cSrcweir if( xTranslit.is() ) 117cdf0e10cSrcweir xTranslit->loadModule( 118cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), 119cdf0e10cSrcweir aSrchPara.Locale); 120cdf0e10cSrcweir } 121cdf0e10cSrcweir else if( xTranslit.is() ) 122cdf0e10cSrcweir xTranslit = 0; 123cdf0e10cSrcweir 124cdf0e10cSrcweir // Create Transliteration for 2<->1, 2<->2 transliteration 125cdf0e10cSrcweir if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 126cdf0e10cSrcweir { 127cdf0e10cSrcweir if( !xTranslit2.is() ) 128cdf0e10cSrcweir { 129cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 130cdf0e10cSrcweir OUString::createFromAscii( 131cdf0e10cSrcweir "com.sun.star.i18n.Transliteration")); 132cdf0e10cSrcweir if ( xI.is() ) 133cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 134cdf0e10cSrcweir (const Reference< XExtendedTransliteration >*)0)) 135cdf0e10cSrcweir >>= xTranslit2; 136cdf0e10cSrcweir } 137cdf0e10cSrcweir // Load transliteration module 138cdf0e10cSrcweir if( xTranslit2.is() ) 139cdf0e10cSrcweir xTranslit2->loadModule( 140cdf0e10cSrcweir (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), 141cdf0e10cSrcweir aSrchPara.Locale); 142cdf0e10cSrcweir } 143cdf0e10cSrcweir 144cdf0e10cSrcweir if ( !xBreak.is() ) 145cdf0e10cSrcweir { 146cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 147cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.BreakIterator")); 148cdf0e10cSrcweir if( xI.is() ) 149cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 150cdf0e10cSrcweir (const Reference< XBreakIterator >*)0)) 151cdf0e10cSrcweir >>= xBreak; 152cdf0e10cSrcweir } 153cdf0e10cSrcweir 154cdf0e10cSrcweir sSrchStr = aSrchPara.searchString; 155cdf0e10cSrcweir 156cc450e3aSHerbert Dürr // use transliteration here 157cc450e3aSHerbert Dürr if ( xTranslit.is() && 158cdf0e10cSrcweir aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) 159cdf0e10cSrcweir sSrchStr = xTranslit->transliterateString2String( 160cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 161cdf0e10cSrcweir 162cc450e3aSHerbert Dürr if ( xTranslit2.is() && 163cdf0e10cSrcweir aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) 164cdf0e10cSrcweir sSrchStr2 = xTranslit2->transliterateString2String( 165cdf0e10cSrcweir aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); 166cdf0e10cSrcweir 167cdf0e10cSrcweir // When start or end of search string is a complex script type, we need to 168cdf0e10cSrcweir // make sure the result boundary is not located in the middle of cell. 169cdf0e10cSrcweir checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) == 170cdf0e10cSrcweir ScriptType::COMPLEX)); 171cdf0e10cSrcweir checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 172cdf0e10cSrcweir sSrchStr.getLength()-1) == ScriptType::COMPLEX)); 173cdf0e10cSrcweir 174cc450e3aSHerbert Dürr switch( aSrchPara.algorithmType) 175cdf0e10cSrcweir { 176cc450e3aSHerbert Dürr case SearchAlgorithms_REGEXP: 177cc450e3aSHerbert Dürr fnForward = &TextSearch::RESrchFrwrd; 178cc450e3aSHerbert Dürr fnBackward = &TextSearch::RESrchBkwrd; 1797f9f793fSHerbert Dürr RESrchPrepare( aSrchPara); 1807f9f793fSHerbert Dürr break; 181cc450e3aSHerbert Dürr 182cc450e3aSHerbert Dürr case SearchAlgorithms_APPROXIMATE: 183cdf0e10cSrcweir fnForward = &TextSearch::ApproxSrchFrwrd; 184cdf0e10cSrcweir fnBackward = &TextSearch::ApproxSrchBkwrd; 185cdf0e10cSrcweir 186cdf0e10cSrcweir pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars, 187cdf0e10cSrcweir aSrchPara.insertedChars, aSrchPara.deletedChars, 188cdf0e10cSrcweir 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 189cdf0e10cSrcweir 190cdf0e10cSrcweir nLimit = pWLD->GetLimit(); 191cc450e3aSHerbert Dürr break; 192cc450e3aSHerbert Dürr 193cc450e3aSHerbert Dürr default: 194cdf0e10cSrcweir fnForward = &TextSearch::NSrchFrwrd; 195cdf0e10cSrcweir fnBackward = &TextSearch::NSrchBkwrd; 196cc450e3aSHerbert Dürr break; 197cdf0e10cSrcweir } 198cdf0e10cSrcweir } 199cdf0e10cSrcweir 200cdf0e10cSrcweir sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos ) 201cdf0e10cSrcweir { 202cdf0e10cSrcweir sal_Int32 nRet = 0, nEnd = rOff.getLength(); 203cdf0e10cSrcweir while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet; 204cdf0e10cSrcweir return nRet; 205cdf0e10cSrcweir } 206cdf0e10cSrcweir 207cdf0e10cSrcweir sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos) 208cdf0e10cSrcweir throw( RuntimeException ) 209cdf0e10cSrcweir { 210cdf0e10cSrcweir sal_Int32 nDone; 211cdf0e10cSrcweir return nPos == xBreak->previousCharacters(searchStr, nPos+1, 212cdf0e10cSrcweir aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone); 213cdf0e10cSrcweir } 214cdf0e10cSrcweir 215cdf0e10cSrcweir SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 216cdf0e10cSrcweir throw( RuntimeException ) 217cdf0e10cSrcweir { 218cdf0e10cSrcweir SearchResult sres; 219cdf0e10cSrcweir 220cdf0e10cSrcweir OUString in_str(searchStr); 221cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 222cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 223cdf0e10cSrcweir 224cdf0e10cSrcweir bUsePrimarySrchStr = true; 225cdf0e10cSrcweir 226cdf0e10cSrcweir if ( xTranslit.is() ) 227cdf0e10cSrcweir { 228cdf0e10cSrcweir // apply normal transliteration (1<->1, 1<->0) 229cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 230cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 231cdf0e10cSrcweir 232cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 233cdf0e10cSrcweir if( startPos ) 234cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 235cdf0e10cSrcweir 236cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 237cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 238cdf0e10cSrcweir else 239cdf0e10cSrcweir newEndPos = in_str.getLength(); 240cdf0e10cSrcweir 241cdf0e10cSrcweir sres = (this->*fnForward)( in_str, newStartPos, newEndPos ); 242cdf0e10cSrcweir 243cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 244cdf0e10cSrcweir { 245cdf0e10cSrcweir if (sres.startOffset[k]) 246cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k]]; 247cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 248cdf0e10cSrcweir // the position of the next character - return the 249cdf0e10cSrcweir // next position behind the last found character! 250cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 251cdf0e10cSrcweir if (sres.endOffset[k]) 252cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1; 253cdf0e10cSrcweir } 254cdf0e10cSrcweir } 255cdf0e10cSrcweir else 256cdf0e10cSrcweir { 257cdf0e10cSrcweir sres = (this->*fnForward)( in_str, startPos, endPos ); 258cdf0e10cSrcweir } 259cdf0e10cSrcweir 260cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP) 261cdf0e10cSrcweir { 262cdf0e10cSrcweir SearchResult sres2; 263cdf0e10cSrcweir 264cdf0e10cSrcweir in_str = OUString(searchStr); 265cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 266cdf0e10cSrcweir 267cdf0e10cSrcweir in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset ); 268cdf0e10cSrcweir 269cdf0e10cSrcweir if( startPos ) 270cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 271cdf0e10cSrcweir 272cdf0e10cSrcweir if( endPos < searchStr.getLength() ) 273cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 274cdf0e10cSrcweir else 275cdf0e10cSrcweir endPos = in_str.getLength(); 276cdf0e10cSrcweir 277cdf0e10cSrcweir bUsePrimarySrchStr = false; 278cdf0e10cSrcweir sres2 = (this->*fnForward)( in_str, startPos, endPos ); 279cdf0e10cSrcweir 280cdf0e10cSrcweir for ( int k = 0; k < sres2.startOffset.getLength(); k++ ) 281cdf0e10cSrcweir { 282cdf0e10cSrcweir if (sres2.startOffset[k]) 283cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1; 284cdf0e10cSrcweir if (sres2.endOffset[k]) 285cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1; 286cdf0e10cSrcweir } 287cdf0e10cSrcweir 288cdf0e10cSrcweir // pick first and long one 289cdf0e10cSrcweir if ( sres.subRegExpressions == 0) 290cdf0e10cSrcweir return sres2; 291cdf0e10cSrcweir if ( sres2.subRegExpressions == 1) 292cdf0e10cSrcweir { 293cdf0e10cSrcweir if ( sres.startOffset[0] > sres2.startOffset[0]) 294cdf0e10cSrcweir return sres2; 295cdf0e10cSrcweir else if ( sres.startOffset[0] == sres2.startOffset[0] && 296cdf0e10cSrcweir sres.endOffset[0] < sres2.endOffset[0]) 297cdf0e10cSrcweir return sres2; 298cdf0e10cSrcweir } 299cdf0e10cSrcweir } 300cdf0e10cSrcweir 301cdf0e10cSrcweir return sres; 302cdf0e10cSrcweir } 303cdf0e10cSrcweir 304cdf0e10cSrcweir SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 305cdf0e10cSrcweir throw(RuntimeException) 306cdf0e10cSrcweir { 307cdf0e10cSrcweir SearchResult sres; 308cdf0e10cSrcweir 309cdf0e10cSrcweir OUString in_str(searchStr); 310cdf0e10cSrcweir sal_Int32 newStartPos = startPos; 311cdf0e10cSrcweir sal_Int32 newEndPos = endPos; 312cdf0e10cSrcweir 313cdf0e10cSrcweir bUsePrimarySrchStr = true; 314cdf0e10cSrcweir 315cdf0e10cSrcweir if ( xTranslit.is() ) 316cdf0e10cSrcweir { 317cdf0e10cSrcweir // apply only simple 1<->1 transliteration here 318cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 319cdf0e10cSrcweir in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset ); 320cdf0e10cSrcweir 321cdf0e10cSrcweir // JP 20.6.2001: also the start and end positions must be corrected! 322cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 323cdf0e10cSrcweir newStartPos = FindPosInSeq_Impl( offset, startPos ); 324cdf0e10cSrcweir else 325cdf0e10cSrcweir newStartPos = in_str.getLength(); 326cdf0e10cSrcweir 327cdf0e10cSrcweir if( endPos ) 328cdf0e10cSrcweir newEndPos = FindPosInSeq_Impl( offset, endPos ); 329cdf0e10cSrcweir 330cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, newStartPos, newEndPos ); 331cdf0e10cSrcweir 332cdf0e10cSrcweir for ( int k = 0; k < sres.startOffset.getLength(); k++ ) 333cdf0e10cSrcweir { 334cdf0e10cSrcweir if (sres.startOffset[k]) 335cdf0e10cSrcweir sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1; 336cdf0e10cSrcweir // JP 20.6.2001: end is ever exclusive and then don't return 337cdf0e10cSrcweir // the position of the next character - return the 338cdf0e10cSrcweir // next position behind the last found character! 339cdf0e10cSrcweir // "a b c" find "b" must return 2,3 and not 2,4!!! 340cdf0e10cSrcweir if (sres.endOffset[k]) 341cdf0e10cSrcweir sres.endOffset[k] = offset[sres.endOffset[k]]; 342cdf0e10cSrcweir } 343cdf0e10cSrcweir } 344cdf0e10cSrcweir else 345cdf0e10cSrcweir { 346cdf0e10cSrcweir sres = (this->*fnBackward)( in_str, startPos, endPos ); 347cdf0e10cSrcweir } 348cdf0e10cSrcweir 349cdf0e10cSrcweir if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP ) 350cdf0e10cSrcweir { 351cdf0e10cSrcweir SearchResult sres2; 352cdf0e10cSrcweir 353cdf0e10cSrcweir in_str = OUString(searchStr); 354cdf0e10cSrcweir com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength()); 355cdf0e10cSrcweir 356cdf0e10cSrcweir in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset); 357cdf0e10cSrcweir 358cdf0e10cSrcweir if( startPos < searchStr.getLength() ) 359cdf0e10cSrcweir startPos = FindPosInSeq_Impl( offset, startPos ); 360cdf0e10cSrcweir else 361cdf0e10cSrcweir startPos = in_str.getLength(); 362cdf0e10cSrcweir 363cdf0e10cSrcweir if( endPos ) 364cdf0e10cSrcweir endPos = FindPosInSeq_Impl( offset, endPos ); 365cdf0e10cSrcweir 366cdf0e10cSrcweir bUsePrimarySrchStr = false; 367cdf0e10cSrcweir sres2 = (this->*fnBackward)( in_str, startPos, endPos ); 368cdf0e10cSrcweir 369cdf0e10cSrcweir for( int k = 0; k < sres2.startOffset.getLength(); k++ ) 370cdf0e10cSrcweir { 371cdf0e10cSrcweir if (sres2.startOffset[k]) 372cdf0e10cSrcweir sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1; 373cdf0e10cSrcweir if (sres2.endOffset[k]) 374cdf0e10cSrcweir sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1; 375cdf0e10cSrcweir } 376cdf0e10cSrcweir 377cdf0e10cSrcweir // pick last and long one 378cdf0e10cSrcweir if ( sres.subRegExpressions == 0 ) 379cdf0e10cSrcweir return sres2; 380cdf0e10cSrcweir if ( sres2.subRegExpressions == 1 ) 381cdf0e10cSrcweir { 382cdf0e10cSrcweir if ( sres.startOffset[0] < sres2.startOffset[0] ) 383cdf0e10cSrcweir return sres2; 384cdf0e10cSrcweir if ( sres.startOffset[0] == sres2.startOffset[0] && 385cdf0e10cSrcweir sres.endOffset[0] > sres2.endOffset[0] ) 386cdf0e10cSrcweir return sres2; 387cdf0e10cSrcweir } 388cdf0e10cSrcweir } 389cdf0e10cSrcweir 390cdf0e10cSrcweir return sres; 391cdf0e10cSrcweir } 392cdf0e10cSrcweir 393cc450e3aSHerbert Dürr //--------------------------------------------------------------------- 394cdf0e10cSrcweir 395cdf0e10cSrcweir bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const 396cdf0e10cSrcweir { 397cdf0e10cSrcweir bool bRet = 1; 398cdf0e10cSrcweir if( '\x7f' != rStr[nPos]) 399cdf0e10cSrcweir { 400cdf0e10cSrcweir if ( !xCharClass.is() ) 401cdf0e10cSrcweir { 402cdf0e10cSrcweir Reference < XInterface > xI = xMSF->createInstance( 403cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification")); 404cdf0e10cSrcweir if( xI.is() ) 405cdf0e10cSrcweir xI->queryInterface( ::getCppuType( 406cdf0e10cSrcweir (const Reference< XCharacterClassification >*)0)) 407cdf0e10cSrcweir >>= xCharClass; 408cdf0e10cSrcweir } 409cdf0e10cSrcweir if ( xCharClass.is() ) 410cdf0e10cSrcweir { 411cdf0e10cSrcweir sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, 412cdf0e10cSrcweir aSrchPara.Locale ); 413cdf0e10cSrcweir if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | 414cdf0e10cSrcweir KCharacterType::LETTER ) & nCType ) ) 415cdf0e10cSrcweir bRet = 0; 416cdf0e10cSrcweir } 417cdf0e10cSrcweir } 418cdf0e10cSrcweir return bRet; 419cdf0e10cSrcweir } 420cdf0e10cSrcweir 421cc450e3aSHerbert Dürr // --------- helper methods for Boyer-Moore like text searching ---------- 422cc450e3aSHerbert Dürr // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available 423cdf0e10cSrcweir 424cdf0e10cSrcweir void TextSearch::MakeForwardTab() 425cdf0e10cSrcweir { 426cdf0e10cSrcweir // create the jumptable for the search text 427cdf0e10cSrcweir if( pJumpTable ) 428cdf0e10cSrcweir { 429cdf0e10cSrcweir if( bIsForwardTab ) 430cdf0e10cSrcweir return ; // the jumpTable is ok 431cdf0e10cSrcweir delete pJumpTable; 432cdf0e10cSrcweir } 433cdf0e10cSrcweir bIsForwardTab = true; 434cdf0e10cSrcweir 435cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 436cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 437cdf0e10cSrcweir 438cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 439cdf0e10cSrcweir { 440cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 441cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 442cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 443cdf0e10cSrcweir 444cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 445cdf0e10cSrcweir pJumpTable->insert( aEntry ); 446cdf0e10cSrcweir if ( !aPair.second ) 447cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 448cdf0e10cSrcweir } 449cdf0e10cSrcweir } 450cdf0e10cSrcweir 451cdf0e10cSrcweir void TextSearch::MakeForwardTab2() 452cdf0e10cSrcweir { 453cdf0e10cSrcweir // create the jumptable for the search text 454cdf0e10cSrcweir if( pJumpTable2 ) 455cdf0e10cSrcweir { 456cdf0e10cSrcweir if( bIsForwardTab ) 457cdf0e10cSrcweir return ; // the jumpTable is ok 458cdf0e10cSrcweir delete pJumpTable2; 459cdf0e10cSrcweir } 460cdf0e10cSrcweir bIsForwardTab = true; 461cdf0e10cSrcweir 462cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 463cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 464cdf0e10cSrcweir 465cdf0e10cSrcweir for( n = 0; n < nLen - 1; ++n ) 466cdf0e10cSrcweir { 467cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 468cdf0e10cSrcweir sal_Int32 nDiff = nLen - n - 1; 469cdf0e10cSrcweir 470cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, nDiff ); 471cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 472cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 473cdf0e10cSrcweir if ( !aPair.second ) 474cdf0e10cSrcweir (*(aPair.first)).second = nDiff; 475cdf0e10cSrcweir } 476cdf0e10cSrcweir } 477cdf0e10cSrcweir 478cdf0e10cSrcweir void TextSearch::MakeBackwardTab() 479cdf0e10cSrcweir { 480cdf0e10cSrcweir // create the jumptable for the search text 481cdf0e10cSrcweir if( pJumpTable ) 482cdf0e10cSrcweir { 483cdf0e10cSrcweir if( !bIsForwardTab ) 484cdf0e10cSrcweir return ; // the jumpTable is ok 485cdf0e10cSrcweir delete pJumpTable; 486cdf0e10cSrcweir } 487cdf0e10cSrcweir bIsForwardTab = false; 488cdf0e10cSrcweir 489cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr.getLength(); 490cdf0e10cSrcweir pJumpTable = new TextSearchJumpTable; 491cdf0e10cSrcweir 492cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 493cdf0e10cSrcweir { 494cdf0e10cSrcweir sal_Unicode cCh = sSrchStr[n]; 495cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 496cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 497cdf0e10cSrcweir pJumpTable->insert( aEntry ); 498cdf0e10cSrcweir if ( !aPair.second ) 499cdf0e10cSrcweir (*(aPair.first)).second = n; 500cdf0e10cSrcweir } 501cdf0e10cSrcweir } 502cdf0e10cSrcweir 503cdf0e10cSrcweir void TextSearch::MakeBackwardTab2() 504cdf0e10cSrcweir { 505cdf0e10cSrcweir // create the jumptable for the search text 506cdf0e10cSrcweir if( pJumpTable2 ) 507cdf0e10cSrcweir { 508cdf0e10cSrcweir if( !bIsForwardTab ) 509cdf0e10cSrcweir return ; // the jumpTable is ok 510cdf0e10cSrcweir delete pJumpTable2; 511cdf0e10cSrcweir } 512cdf0e10cSrcweir bIsForwardTab = false; 513cdf0e10cSrcweir 514cdf0e10cSrcweir sal_Int32 n, nLen = sSrchStr2.getLength(); 515cdf0e10cSrcweir pJumpTable2 = new TextSearchJumpTable; 516cdf0e10cSrcweir 517cdf0e10cSrcweir for( n = nLen-1; n > 0; --n ) 518cdf0e10cSrcweir { 519cdf0e10cSrcweir sal_Unicode cCh = sSrchStr2[n]; 520cdf0e10cSrcweir TextSearchJumpTable::value_type aEntry( cCh, n ); 521cdf0e10cSrcweir ::std::pair< TextSearchJumpTable::iterator, bool > aPair = 522cdf0e10cSrcweir pJumpTable2->insert( aEntry ); 523cdf0e10cSrcweir if ( !aPair.second ) 524cdf0e10cSrcweir (*(aPair.first)).second = n; 525cdf0e10cSrcweir } 526cdf0e10cSrcweir } 527cdf0e10cSrcweir 528cdf0e10cSrcweir sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const 529cdf0e10cSrcweir { 530cdf0e10cSrcweir TextSearchJumpTable *pJump; 531cdf0e10cSrcweir OUString sSearchKey; 532cdf0e10cSrcweir 533cdf0e10cSrcweir if ( bUsePrimarySrchStr ) { 534cdf0e10cSrcweir pJump = pJumpTable; 535cdf0e10cSrcweir sSearchKey = sSrchStr; 536cdf0e10cSrcweir } else { 537cdf0e10cSrcweir pJump = pJumpTable2; 538cdf0e10cSrcweir sSearchKey = sSrchStr2; 539cdf0e10cSrcweir } 540cdf0e10cSrcweir 541cdf0e10cSrcweir TextSearchJumpTable::const_iterator iLook = pJump->find( cChr ); 542cdf0e10cSrcweir if ( iLook == pJump->end() ) 543cdf0e10cSrcweir return sSearchKey.getLength(); 544cdf0e10cSrcweir return (*iLook).second; 545cdf0e10cSrcweir } 546cdf0e10cSrcweir 547cdf0e10cSrcweir 548cdf0e10cSrcweir // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#) 549cdf0e10cSrcweir SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 550cdf0e10cSrcweir throw(RuntimeException) 551cdf0e10cSrcweir { 552cdf0e10cSrcweir SearchResult aRet; 553cdf0e10cSrcweir aRet.subRegExpressions = 0; 554cdf0e10cSrcweir 555cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 556cdf0e10cSrcweir 557cdf0e10cSrcweir OUString aStr( searchStr ); 558cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 559cdf0e10cSrcweir sal_Int32 nEnde = endPos; 560cdf0e10cSrcweir if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx ) 561cdf0e10cSrcweir return aRet; 562cdf0e10cSrcweir 563cdf0e10cSrcweir 564cdf0e10cSrcweir if( nEnde < sSearchKey.getLength() ) // position inside the search region ? 565cdf0e10cSrcweir return aRet; 566cdf0e10cSrcweir 567cdf0e10cSrcweir nEnde -= sSearchKey.getLength(); 568cdf0e10cSrcweir 569cdf0e10cSrcweir if (bUsePrimarySrchStr) 570cdf0e10cSrcweir MakeForwardTab(); // create the jumptable 571cdf0e10cSrcweir else 572cdf0e10cSrcweir MakeForwardTab2(); 573cdf0e10cSrcweir 574cdf0e10cSrcweir for (sal_Int32 nCmpIdx = startPos; // start position for the search 575cdf0e10cSrcweir nCmpIdx <= nEnde; 576cdf0e10cSrcweir nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1])) 577cdf0e10cSrcweir { 578cdf0e10cSrcweir // if the match would be the completed cells, skip it. 579cdf0e10cSrcweir if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd 580cdf0e10cSrcweir && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) ) 581cdf0e10cSrcweir continue; 582cdf0e10cSrcweir 583cdf0e10cSrcweir nSuchIdx = sSearchKey.getLength() - 1; 584cdf0e10cSrcweir while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx]) 585cdf0e10cSrcweir { 586cdf0e10cSrcweir if( nSuchIdx == 0 ) 587cdf0e10cSrcweir { 588cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 589cdf0e10cSrcweir { 590cdf0e10cSrcweir sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength(); 591cdf0e10cSrcweir bool bAtStart = !nCmpIdx; 592cdf0e10cSrcweir bool bAtEnd = nFndEnd == endPos; 593cdf0e10cSrcweir bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 ); 594cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nFndEnd ); 595cdf0e10cSrcweir // * 1 -> only one word in the paragraph 596cdf0e10cSrcweir // * 2 -> at begin of paragraph 597cdf0e10cSrcweir // * 3 -> at end of paragraph 598cdf0e10cSrcweir // * 4 -> inside the paragraph 599cdf0e10cSrcweir if( !( ( bAtStart && bAtEnd ) || // 1 600cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 601cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 602cdf0e10cSrcweir ( bDelimBefore && bDelimBehind ))) // 4 603cdf0e10cSrcweir break; 604cdf0e10cSrcweir } 605cdf0e10cSrcweir 606cdf0e10cSrcweir aRet.subRegExpressions = 1; 607cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 608cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 609cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 610cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength(); 611cdf0e10cSrcweir 612cdf0e10cSrcweir return aRet; 613cdf0e10cSrcweir } 614cdf0e10cSrcweir else 615cdf0e10cSrcweir nSuchIdx--; 616cdf0e10cSrcweir } 617cdf0e10cSrcweir } 618cdf0e10cSrcweir return aRet; 619cdf0e10cSrcweir } 620cdf0e10cSrcweir 621cdf0e10cSrcweir SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos ) 622cdf0e10cSrcweir throw(RuntimeException) 623cdf0e10cSrcweir { 624cdf0e10cSrcweir SearchResult aRet; 625cdf0e10cSrcweir aRet.subRegExpressions = 0; 626cdf0e10cSrcweir 627cdf0e10cSrcweir OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2; 628cdf0e10cSrcweir 629cdf0e10cSrcweir OUString aStr( searchStr ); 630cdf0e10cSrcweir sal_Int32 nSuchIdx = aStr.getLength(); 631cdf0e10cSrcweir sal_Int32 nEnde = endPos; 632cdf0e10cSrcweir if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx) 633cdf0e10cSrcweir return aRet; 634cdf0e10cSrcweir 635cdf0e10cSrcweir if (bUsePrimarySrchStr) 636cdf0e10cSrcweir MakeBackwardTab(); // create the jumptable 637cdf0e10cSrcweir else 638cdf0e10cSrcweir MakeBackwardTab2(); 639cdf0e10cSrcweir 640cdf0e10cSrcweir if( nEnde == nSuchIdx ) // end position for the search 641cdf0e10cSrcweir nEnde = sSearchKey.getLength(); 642cdf0e10cSrcweir else 643cdf0e10cSrcweir nEnde += sSearchKey.getLength(); 644cdf0e10cSrcweir 645cdf0e10cSrcweir sal_Int32 nCmpIdx = startPos; // start position for the search 646cdf0e10cSrcweir 647cdf0e10cSrcweir while (nCmpIdx >= nEnde) 648cdf0e10cSrcweir { 649cdf0e10cSrcweir // if the match would be the completed cells, skip it. 650cdf0e10cSrcweir if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx - 651cdf0e10cSrcweir sSearchKey.getLength() )) && (!checkCTLEnd || 652cdf0e10cSrcweir isCellStart( aStr, nCmpIdx))) 653cdf0e10cSrcweir { 654cdf0e10cSrcweir nSuchIdx = 0; 655cdf0e10cSrcweir while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] == 656cdf0e10cSrcweir aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] ) 657cdf0e10cSrcweir nSuchIdx++; 658cdf0e10cSrcweir if( nSuchIdx >= sSearchKey.getLength() ) 659cdf0e10cSrcweir { 660cdf0e10cSrcweir if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag ) 661cdf0e10cSrcweir { 662cdf0e10cSrcweir sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength(); 663cdf0e10cSrcweir bool bAtStart = !nFndStt; 664cdf0e10cSrcweir bool bAtEnd = nCmpIdx == startPos; 665cdf0e10cSrcweir bool bDelimBehind = IsDelimiter( aStr, nCmpIdx ); 666cdf0e10cSrcweir bool bDelimBefore = bAtStart || // begin of paragraph 667cdf0e10cSrcweir IsDelimiter( aStr, nFndStt-1 ); 668cdf0e10cSrcweir // * 1 -> only one word in the paragraph 669cdf0e10cSrcweir // * 2 -> at begin of paragraph 670cdf0e10cSrcweir // * 3 -> at end of paragraph 671cdf0e10cSrcweir // * 4 -> inside the paragraph 672cdf0e10cSrcweir if( ( bAtStart && bAtEnd ) || // 1 673cdf0e10cSrcweir ( bAtStart && bDelimBehind ) || // 2 674cdf0e10cSrcweir ( bAtEnd && bDelimBefore ) || // 3 675cdf0e10cSrcweir ( bDelimBefore && bDelimBehind )) // 4 676cdf0e10cSrcweir { 677cdf0e10cSrcweir aRet.subRegExpressions = 1; 678cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 679cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 680cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 681cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 682cdf0e10cSrcweir return aRet; 683cdf0e10cSrcweir } 684cdf0e10cSrcweir } 685cdf0e10cSrcweir else 686cdf0e10cSrcweir { 687cdf0e10cSrcweir aRet.subRegExpressions = 1; 688cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 689cdf0e10cSrcweir aRet.startOffset[ 0 ] = nCmpIdx; 690cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 691cdf0e10cSrcweir aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength(); 692cdf0e10cSrcweir return aRet; 693cdf0e10cSrcweir } 694cdf0e10cSrcweir } 695cdf0e10cSrcweir } 696cdf0e10cSrcweir nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] ); 697cdf0e10cSrcweir if( nCmpIdx < nSuchIdx ) 698cdf0e10cSrcweir return aRet; 699cdf0e10cSrcweir nCmpIdx -= nSuchIdx; 700cdf0e10cSrcweir } 701cdf0e10cSrcweir return aRet; 702cdf0e10cSrcweir } 703cdf0e10cSrcweir 7047f9f793fSHerbert Dürr void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions) 7057f9f793fSHerbert Dürr { 7067f9f793fSHerbert Dürr // select the transliterated pattern string 7077f9f793fSHerbert Dürr const OUString& rPatternStr = 7087f9f793fSHerbert Dürr (rOptions.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr 7097f9f793fSHerbert Dürr : ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : rOptions.searchString); 7107f9f793fSHerbert Dürr 7117c5e76a7SHerbert Dürr sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability 7127f9f793fSHerbert Dürr // map com::sun::star::util::SearchFlags to ICU uregex.h flags 7137f9f793fSHerbert Dürr // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE 7147f9f793fSHerbert Dürr // REG_NEWLINE is neither properly defined nor used anywhere => not implemented 7157f9f793fSHerbert Dürr // REG_NOSUB is not used anywhere => not implemented 7167f9f793fSHerbert Dürr // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute 7177f9f793fSHerbert Dürr // LEV_RELAXED is only used for SearchAlgorithm==Approximate 7187f9f793fSHerbert Dürr // why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it??? 7197f9f793fSHerbert Dürr if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0) 7207f9f793fSHerbert Dürr nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE; 7217f9f793fSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 7227f9f793fSHerbert Dürr // assumption: transliteration didn't mangle regexp control chars 72303c97e34SYuri Dario #ifdef OS2 72403c97e34SYuri Dario IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength()); 72503c97e34SYuri Dario #else 7267f9f793fSHerbert Dürr IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength()); 72703c97e34SYuri Dario #endif 7287f9f793fSHerbert Dürr #if 1 7297f9f793fSHerbert Dürr // for conveniance specific syntax elements of the old regex engine are emulated 7307f9f793fSHerbert Dürr // by using regular word boundary matching \b to replace \< and \> 7317f9f793fSHerbert Dürr static const IcuUniString aChevronPattern( "\\<|\\>", -1, IcuUniString::kInvariant); 7327f9f793fSHerbert Dürr static const IcuUniString aChevronReplace( "\\b", -1, IcuUniString::kInvariant); 7337f9f793fSHerbert Dürr static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr); 7347f9f793fSHerbert Dürr aChevronMatcher.reset( aIcuSearchPatStr); 7357f9f793fSHerbert Dürr aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr); 7367f9f793fSHerbert Dürr aChevronMatcher.reset(); 7377f9f793fSHerbert Dürr #endif 7387f9f793fSHerbert Dürr pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr); 7397f9f793fSHerbert Dürr if( nIcuErr) 7407f9f793fSHerbert Dürr { delete pRegexMatcher; pRegexMatcher = NULL;} 7417f9f793fSHerbert Dürr } 7427f9f793fSHerbert Dürr 743cdf0e10cSrcweir //--------------------------------------------------------------------------- 744cdf0e10cSrcweir 745cdf0e10cSrcweir SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, 746cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 747cdf0e10cSrcweir throw(RuntimeException) 748cdf0e10cSrcweir { 749cc450e3aSHerbert Dürr SearchResult aRet; 750cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 751cc450e3aSHerbert Dürr if( !pRegexMatcher) 752cc450e3aSHerbert Dürr return aRet; 753cc450e3aSHerbert Dürr 754cc450e3aSHerbert Dürr if( endPos > searchStr.getLength()) 755cc450e3aSHerbert Dürr endPos = searchStr.getLength(); 756cc450e3aSHerbert Dürr 757cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 758cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 75903c97e34SYuri Dario #ifdef OS2 76003c97e34SYuri Dario const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), endPos); 76103c97e34SYuri Dario #else 762cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos); 76303c97e34SYuri Dario #endif 764cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 76516b8677bSHerbert Dürr // search until there is a valid match 76616b8677bSHerbert Dürr for(;;) 76716b8677bSHerbert Dürr { 76816b8677bSHerbert Dürr if( !pRegexMatcher->find( startPos, nIcuErr)) 76916b8677bSHerbert Dürr return aRet; 77016b8677bSHerbert Dürr 77116b8677bSHerbert Dürr // #i118887# ignore zero-length matches e.g. "a*" in "bc" 77216b8677bSHerbert Dürr int nStartOfs = pRegexMatcher->start( nIcuErr); 77316b8677bSHerbert Dürr int nEndOfs = pRegexMatcher->end( nIcuErr); 77416b8677bSHerbert Dürr if( nStartOfs < nEndOfs) 77516b8677bSHerbert Dürr break; 77616b8677bSHerbert Dürr // try at next position if there was a zero-length match 77716b8677bSHerbert Dürr if( ++startPos >= endPos) 77816b8677bSHerbert Dürr return aRet; 77916b8677bSHerbert Dürr } 780cc450e3aSHerbert Dürr 78116b8677bSHerbert Dürr // extract the result of the search 7820c7ce76dSHerbert Dürr const int nGroupCount = pRegexMatcher->groupCount(); 7830c7ce76dSHerbert Dürr aRet.subRegExpressions = nGroupCount + 1; 784cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 785cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 786cc450e3aSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->start( nIcuErr); 787cc450e3aSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->end( nIcuErr); 7880c7ce76dSHerbert Dürr for( int i = 1; i <= nGroupCount; ++i) { 7890c7ce76dSHerbert Dürr aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr); 7900c7ce76dSHerbert Dürr aRet.endOffset[i] = pRegexMatcher->end( i, nIcuErr); 7910c7ce76dSHerbert Dürr } 792cc450e3aSHerbert Dürr 793cc450e3aSHerbert Dürr return aRet; 794cdf0e10cSrcweir } 795cdf0e10cSrcweir 796cdf0e10cSrcweir SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, 797cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 798cdf0e10cSrcweir throw(RuntimeException) 799cdf0e10cSrcweir { 800cc450e3aSHerbert Dürr // NOTE: for backwards search callers provide startPos/endPos inverted! 801cc450e3aSHerbert Dürr SearchResult aRet; 802cc450e3aSHerbert Dürr aRet.subRegExpressions = 0; 803cc450e3aSHerbert Dürr if( !pRegexMatcher) 804cc450e3aSHerbert Dürr return aRet; 805cc450e3aSHerbert Dürr 806cc450e3aSHerbert Dürr if( startPos > searchStr.getLength()) 807cc450e3aSHerbert Dürr startPos = searchStr.getLength(); 808cc450e3aSHerbert Dürr 809cc450e3aSHerbert Dürr // use the ICU RegexMatcher to find the matches 810cc450e3aSHerbert Dürr // TODO: use ICU's backward searching once it becomes available 8110c7ce76dSHerbert Dürr // as its replacement using forward search is not as good as the real thing 812cc450e3aSHerbert Dürr UErrorCode nIcuErr = U_ZERO_ERROR; 81303c97e34SYuri Dario #ifdef OS2 81403c97e34SYuri Dario const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), startPos); 81503c97e34SYuri Dario #else 816cc450e3aSHerbert Dürr const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos); 81703c97e34SYuri Dario #endif 818cc450e3aSHerbert Dürr pRegexMatcher->reset( aSearchTargetStr); 819cc450e3aSHerbert Dürr if( !pRegexMatcher->find( endPos, nIcuErr)) 820cc450e3aSHerbert Dürr return aRet; 821cc450e3aSHerbert Dürr 8220c7ce76dSHerbert Dürr // find the last match 8230c7ce76dSHerbert Dürr int nLastPos = 0; 8240c7ce76dSHerbert Dürr do { 8250c7ce76dSHerbert Dürr nLastPos = pRegexMatcher->start( nIcuErr); 8260c7ce76dSHerbert Dürr } while( pRegexMatcher->find( nLastPos + 1, nIcuErr)); 8270c7ce76dSHerbert Dürr 8280c7ce76dSHerbert Dürr // find last match again to get its details 8290c7ce76dSHerbert Dürr pRegexMatcher->find( nLastPos, nIcuErr); 8300c7ce76dSHerbert Dürr 8310c7ce76dSHerbert Dürr // fill in the details of the last match 8320c7ce76dSHerbert Dürr const int nGroupCount = pRegexMatcher->groupCount(); 8330c7ce76dSHerbert Dürr aRet.subRegExpressions = nGroupCount + 1; 834cc450e3aSHerbert Dürr aRet.startOffset.realloc( aRet.subRegExpressions); 835cc450e3aSHerbert Dürr aRet.endOffset.realloc( aRet.subRegExpressions); 8360c7ce76dSHerbert Dürr // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted! 8370c7ce76dSHerbert Dürr aRet.startOffset[0] = pRegexMatcher->end( nIcuErr); 8380c7ce76dSHerbert Dürr aRet.endOffset[0] = pRegexMatcher->start( nIcuErr); 8390c7ce76dSHerbert Dürr for( int i = 1; i <= nGroupCount; ++i) { 8400c7ce76dSHerbert Dürr aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr); 8410c7ce76dSHerbert Dürr aRet.endOffset[i] = pRegexMatcher->start( i, nIcuErr); 8420c7ce76dSHerbert Dürr } 843cc450e3aSHerbert Dürr 844cc450e3aSHerbert Dürr return aRet; 845cdf0e10cSrcweir } 846cdf0e10cSrcweir 847cc450e3aSHerbert Dürr //--------------------------------------------------------------------------- 848cc450e3aSHerbert Dürr 849cc450e3aSHerbert Dürr // search for words phonetically 850cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, 851cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 852cdf0e10cSrcweir throw(RuntimeException) 853cdf0e10cSrcweir { 854cdf0e10cSrcweir SearchResult aRet; 855cdf0e10cSrcweir aRet.subRegExpressions = 0; 856cdf0e10cSrcweir 857cdf0e10cSrcweir if( !xBreak.is() ) 858cdf0e10cSrcweir return aRet; 859cdf0e10cSrcweir 860cdf0e10cSrcweir OUString aWTemp( searchStr ); 861cdf0e10cSrcweir 862cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 863cdf0e10cSrcweir 864cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 865cdf0e10cSrcweir aSrchPara.Locale, 866cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 867cdf0e10cSrcweir 868cdf0e10cSrcweir do 869cdf0e10cSrcweir { 870cdf0e10cSrcweir if( aWBnd.startPos >= endPos ) 871cdf0e10cSrcweir break; 872cdf0e10cSrcweir nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos; 873cdf0e10cSrcweir nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos; 874cdf0e10cSrcweir 875cdf0e10cSrcweir if( nStt < nEnd && 876cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 877cdf0e10cSrcweir { 878cdf0e10cSrcweir aRet.subRegExpressions = 1; 879cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 880cdf0e10cSrcweir aRet.startOffset[ 0 ] = nStt; 881cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 882cdf0e10cSrcweir aRet.endOffset[ 0 ] = nEnd; 883cdf0e10cSrcweir break; 884cdf0e10cSrcweir } 885cdf0e10cSrcweir 886cdf0e10cSrcweir nStt = nEnd - 1; 887cdf0e10cSrcweir aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale, 888cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 889cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || 890cdf0e10cSrcweir (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) ); 891cdf0e10cSrcweir // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only 892cdf0e10cSrcweir // whitespace) in searchStr, getWordBoundary() returned startPos,startPos 893cdf0e10cSrcweir // and nextWord() does also => don't loop forever. 894cdf0e10cSrcweir return aRet; 895cdf0e10cSrcweir } 896cdf0e10cSrcweir 897cdf0e10cSrcweir SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr, 898cdf0e10cSrcweir sal_Int32 startPos, sal_Int32 endPos ) 899cdf0e10cSrcweir throw(RuntimeException) 900cdf0e10cSrcweir { 901cdf0e10cSrcweir SearchResult aRet; 902cdf0e10cSrcweir aRet.subRegExpressions = 0; 903cdf0e10cSrcweir 904cdf0e10cSrcweir if( !xBreak.is() ) 905cdf0e10cSrcweir return aRet; 906cdf0e10cSrcweir 907cdf0e10cSrcweir OUString aWTemp( searchStr ); 908cdf0e10cSrcweir 909cdf0e10cSrcweir register sal_Int32 nStt, nEnd; 910cdf0e10cSrcweir 911cdf0e10cSrcweir Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos, 912cdf0e10cSrcweir aSrchPara.Locale, 913cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES, sal_True ); 914cdf0e10cSrcweir 915cdf0e10cSrcweir do 916cdf0e10cSrcweir { 917cdf0e10cSrcweir if( aWBnd.endPos <= endPos ) 918cdf0e10cSrcweir break; 919cdf0e10cSrcweir nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos; 920cdf0e10cSrcweir nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos; 921cdf0e10cSrcweir 922cdf0e10cSrcweir if( nStt < nEnd && 923cdf0e10cSrcweir pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit ) 924cdf0e10cSrcweir { 925cdf0e10cSrcweir aRet.subRegExpressions = 1; 926cdf0e10cSrcweir aRet.startOffset.realloc( 1 ); 927cdf0e10cSrcweir aRet.startOffset[ 0 ] = nEnd; 928cdf0e10cSrcweir aRet.endOffset.realloc( 1 ); 929cdf0e10cSrcweir aRet.endOffset[ 0 ] = nStt; 930cdf0e10cSrcweir break; 931cdf0e10cSrcweir } 932cdf0e10cSrcweir if( !nStt ) 933cdf0e10cSrcweir break; 934cdf0e10cSrcweir 935cdf0e10cSrcweir aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale, 936cdf0e10cSrcweir WordType::ANYWORD_IGNOREWHITESPACES); 937cdf0e10cSrcweir } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() ); 938cdf0e10cSrcweir return aRet; 939cdf0e10cSrcweir } 940cdf0e10cSrcweir 941cdf0e10cSrcweir 942cdf0e10cSrcweir static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch"; 943cdf0e10cSrcweir static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n"; 944cdf0e10cSrcweir 945cdf0e10cSrcweir static OUString getServiceName_Static() 946cdf0e10cSrcweir { 947cdf0e10cSrcweir return OUString::createFromAscii( cSearchName ); 948cdf0e10cSrcweir } 949cdf0e10cSrcweir 950cdf0e10cSrcweir static OUString getImplementationName_Static() 951cdf0e10cSrcweir { 952cdf0e10cSrcweir return OUString::createFromAscii( cSearchImpl ); 953cdf0e10cSrcweir } 954cdf0e10cSrcweir 955cdf0e10cSrcweir OUString SAL_CALL 956cdf0e10cSrcweir TextSearch::getImplementationName() 957cdf0e10cSrcweir throw( RuntimeException ) 958cdf0e10cSrcweir { 959cdf0e10cSrcweir return getImplementationName_Static(); 960cdf0e10cSrcweir } 961cdf0e10cSrcweir 962cdf0e10cSrcweir sal_Bool SAL_CALL 963cdf0e10cSrcweir TextSearch::supportsService(const OUString& rServiceName) 964cdf0e10cSrcweir throw( RuntimeException ) 965cdf0e10cSrcweir { 966cdf0e10cSrcweir return !rServiceName.compareToAscii( cSearchName ); 967cdf0e10cSrcweir } 968cdf0e10cSrcweir 969cdf0e10cSrcweir Sequence< OUString > SAL_CALL 970cdf0e10cSrcweir TextSearch::getSupportedServiceNames(void) throw( RuntimeException ) 971cdf0e10cSrcweir { 972cdf0e10cSrcweir Sequence< OUString > aRet(1); 973cdf0e10cSrcweir aRet[0] = getServiceName_Static(); 974cdf0e10cSrcweir return aRet; 975cdf0e10cSrcweir } 976cdf0e10cSrcweir 977cdf0e10cSrcweir ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface > 978cdf0e10cSrcweir SAL_CALL TextSearch_CreateInstance( 979cdf0e10cSrcweir const ::com::sun::star::uno::Reference< 980cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory >& rxMSF ) 981cdf0e10cSrcweir { 982cdf0e10cSrcweir return ::com::sun::star::uno::Reference< 983cdf0e10cSrcweir ::com::sun::star::uno::XInterface >( 984cdf0e10cSrcweir (::cppu::OWeakObject*) new TextSearch( rxMSF ) ); 985cdf0e10cSrcweir } 986cdf0e10cSrcweir 987cdf0e10cSrcweir extern "C" 988cdf0e10cSrcweir { 989cdf0e10cSrcweir 990cdf0e10cSrcweir void SAL_CALL component_getImplementationEnvironment( 991cdf0e10cSrcweir const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ ) 992cdf0e10cSrcweir { 993cdf0e10cSrcweir *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME; 994cdf0e10cSrcweir } 995cdf0e10cSrcweir 996cdf0e10cSrcweir void* SAL_CALL component_getFactory( const sal_Char* sImplementationName, 997cdf0e10cSrcweir void* _pServiceManager, void* /*_pRegistryKey*/ ) 998cdf0e10cSrcweir { 999cdf0e10cSrcweir void* pRet = NULL; 1000cdf0e10cSrcweir 1001cdf0e10cSrcweir ::com::sun::star::lang::XMultiServiceFactory* pServiceManager = 1002cdf0e10cSrcweir reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* > 1003cdf0e10cSrcweir ( _pServiceManager ); 1004cdf0e10cSrcweir ::com::sun::star::uno::Reference< 1005cdf0e10cSrcweir ::com::sun::star::lang::XSingleServiceFactory > xFactory; 1006cdf0e10cSrcweir 1007cdf0e10cSrcweir if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) 1008cdf0e10cSrcweir { 1009cdf0e10cSrcweir ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); 1010cdf0e10cSrcweir aServiceNames[0] = getServiceName_Static(); 1011cdf0e10cSrcweir xFactory = ::cppu::createSingleFactory( 1012cdf0e10cSrcweir pServiceManager, getImplementationName_Static(), 1013cdf0e10cSrcweir &TextSearch_CreateInstance, aServiceNames ); 1014cdf0e10cSrcweir } 1015cdf0e10cSrcweir 1016cdf0e10cSrcweir if ( xFactory.is() ) 1017cdf0e10cSrcweir { 1018cdf0e10cSrcweir xFactory->acquire(); 1019cdf0e10cSrcweir pRet = xFactory.get(); 1020cdf0e10cSrcweir } 1021cdf0e10cSrcweir 1022cdf0e10cSrcweir return pRet; 1023cdf0e10cSrcweir } 1024cdf0e10cSrcweir 1025cdf0e10cSrcweir } // extern "C" 1026