1*cdf0e10cSrcweir/************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir#ifndef __com_sun_star_lang_XTextSearch_idl__ 29*cdf0e10cSrcweir#define __com_sun_star_lang_XTextSearch_idl__ 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir 32*cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 33*cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 34*cdf0e10cSrcweir//#include <com/sun/star/lang/CascadeTransliterator.idl> 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir//============================================================================= 37*cdf0e10cSrcweir 38*cdf0e10cSrcweirmodule com { module sun { module star { module util { 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir//============================================================================= 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir 43*cdf0e10cSrcweirpublished enum SearchAlgorithms 44*cdf0e10cSrcweir{ 45*cdf0e10cSrcweir /// Literal 46*cdf0e10cSrcweir ABSOLUTE, // implemented as a kind of Boyer-Moore 47*cdf0e10cSrcweir /// Regular expression 48*cdf0e10cSrcweir REGEXP, 49*cdf0e10cSrcweir /// Weighted Levenshtein Distance 50*cdf0e10cSrcweir APPROXIMATE 51*cdf0e10cSrcweir}; 52*cdf0e10cSrcweir 53*cdf0e10cSrcweir/// Flags for search methods 54*cdf0e10cSrcweirpublished constants SearchFlags 55*cdf0e10cSrcweir{ 56*cdf0e10cSrcweir /** 57*cdf0e10cSrcweir @deprecated The constant ALL_IGNORE_CASE is never supported - use 58*cdf0e10cSrcweir <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const> 59*cdf0e10cSrcweir with 60*cdf0e10cSrcweir <member>SearchOptions::transliterateFlags</member> 61*cdf0e10cSrcweir instead. 62*cdf0e10cSrcweir 63*cdf0e10cSrcweir @see <type scope="com::sun::star::i18n">TransliterationModules</type> 64*cdf0e10cSrcweir */ 65*cdf0e10cSrcweir const long ALL_IGNORE_CASE = 0x00000001; 66*cdf0e10cSrcweir 67*cdf0e10cSrcweir /** Flag for normal (Boyer-Moore) search / Search for word only. */ 68*cdf0e10cSrcweir const long NORM_WORD_ONLY = 0x00000010; 69*cdf0e10cSrcweir 70*cdf0e10cSrcweir /** Flag for "regular expression" search / Interpret as extended 71*cdf0e10cSrcweir regular expression. 72*cdf0e10cSrcweir 73*cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 74*cdf0e10cSrcweir */ 75*cdf0e10cSrcweir const long REG_EXTENDED = 0x00000100; 76*cdf0e10cSrcweir 77*cdf0e10cSrcweir /** Flag for "regular expression" search / No register information 78*cdf0e10cSrcweir or backreferences, i.e., avoid sub expressions. Return only 79*cdf0e10cSrcweir true/false if matched or not. 80*cdf0e10cSrcweir 81*cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 82*cdf0e10cSrcweir */ 83*cdf0e10cSrcweir const long REG_NOSUB = 0x00000200; 84*cdf0e10cSrcweir 85*cdf0e10cSrcweir /** Flag for "regular expression" search / Special new line 86*cdf0e10cSrcweir treatment. 87*cdf0e10cSrcweir 88*cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 89*cdf0e10cSrcweir 90*cdf0e10cSrcweir <p> A NEWLINE character in string will not be matched by a 91*cdf0e10cSrcweir period outside bracket expression or by any form of a non 92*cdf0e10cSrcweir matching list. </p> 93*cdf0e10cSrcweir 94*cdf0e10cSrcweir <p> A circumflex (^) in pattern when used to specify expression 95*cdf0e10cSrcweir anchoring will match the zero length string immediately after a 96*cdf0e10cSrcweir newline in string, regardless of the setting of 97*cdf0e10cSrcweir REG_NOT_BEGINOFLINE. </p> 98*cdf0e10cSrcweir 99*cdf0e10cSrcweir <p> A dollar-sign ($) in pattern when used to specify expression 100*cdf0e10cSrcweir anchoring, will match zero-length string immediately before a 101*cdf0e10cSrcweir new line in string, regardless of the setting of 102*cdf0e10cSrcweir REG_NOT_ENDOFLINE. </p> 103*cdf0e10cSrcweir */ 104*cdf0e10cSrcweir const long REG_NEWLINE = 0x00000400; 105*cdf0e10cSrcweir 106*cdf0e10cSrcweir /** The first character in the string is not the beginning of the 107*cdf0e10cSrcweir line therefore ^ will not match with first character of the 108*cdf0e10cSrcweir string. 109*cdf0e10cSrcweir */ 110*cdf0e10cSrcweir const long REG_NOT_BEGINOFLINE = 0x00000800; 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir /** The last character in the string is not the end of the line 113*cdf0e10cSrcweir therefore $ will not match with last character of the string. 114*cdf0e10cSrcweir */ 115*cdf0e10cSrcweir const long REG_NOT_ENDOFLINE = 0x00001000; 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir /** Flag for "Weighted Levenshtein Distance" search / Relaxed 118*cdf0e10cSrcweir checking of limit, split weigh pools. 119*cdf0e10cSrcweir 120*cdf0e10cSrcweir <p> If not specified (<b>strict</b>), the search is sucessful if 121*cdf0e10cSrcweir the WLD is within a calculated limit where each insertion, 122*cdf0e10cSrcweir deletion and replacement adds a weight to a common pool of 123*cdf0e10cSrcweir weights. This is the mathematically correct WLD. </p> 124*cdf0e10cSrcweir 125*cdf0e10cSrcweir <p> From a user's point of view the strict WLD is an 126*cdf0e10cSrcweir exclusive-OR of the arguments given, for example if allowed 127*cdf0e10cSrcweir insertions=2 and allowed replacements=2, the search fails if 2 128*cdf0e10cSrcweir characters had been inserted and an additional operation would 129*cdf0e10cSrcweir be needed to match. Depending on the weights it may also fail if 130*cdf0e10cSrcweir 1 character was inserted and 1 character replaced and an 131*cdf0e10cSrcweir additional operation would be needed to match. The strict 132*cdf0e10cSrcweir algorithm may match less than expected from a first glance of 133*cdf0e10cSrcweir the specified arguments, but does not return false positives. </p> 134*cdf0e10cSrcweir 135*cdf0e10cSrcweir <p> If specified (<b>relaxed</b>), the search is also successful 136*cdf0e10cSrcweir if the combined pool for insertions and deletions is below a 137*cdf0e10cSrcweir doubled calculated limit and replacements are treated 138*cdf0e10cSrcweir differently. Additionally, swapped characters are counted as one 139*cdf0e10cSrcweir replacement. </p> 140*cdf0e10cSrcweir 141*cdf0e10cSrcweir <p> From a user's point of view the relaxed WLD is an 142*cdf0e10cSrcweir inclusive-OR of the arguments given, for example if allowed 143*cdf0e10cSrcweir insertions=2 and allowed replacements=2, the search succeeds if 144*cdf0e10cSrcweir 2 characters had been inserted and an additional replacement is 145*cdf0e10cSrcweir needed to match. The relaxed algorithm may return false 146*cdf0e10cSrcweir positives, but meets user expectation better. </p> 147*cdf0e10cSrcweir */ 148*cdf0e10cSrcweir const long LEV_RELAXED = 0x00010000; 149*cdf0e10cSrcweir}; 150*cdf0e10cSrcweir 151*cdf0e10cSrcweir 152*cdf0e10cSrcweirpublished struct SearchOptions { 153*cdf0e10cSrcweir //------------------------------------------------------------------------- 154*cdf0e10cSrcweir /** search type */ 155*cdf0e10cSrcweir SearchAlgorithms algorithmType; 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir /** some flags - can be mixed 158*cdf0e10cSrcweir 159*cdf0e10cSrcweir @see <type>SearchFlags</type> 160*cdf0e10cSrcweir */ 161*cdf0e10cSrcweir long searchFlag; 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir /** The text or pattern to be searched. */ 164*cdf0e10cSrcweir string searchString; 165*cdf0e10cSrcweir 166*cdf0e10cSrcweir /** The replacement text 167*cdf0e10cSrcweir (is for optional replacing - SearchOption is only the data container for it) */ 168*cdf0e10cSrcweir string replaceString; 169*cdf0e10cSrcweir 170*cdf0e10cSrcweir /** The locale for case insensitive search. */ 171*cdf0e10cSrcweir ::com::sun::star::lang::Locale Locale; 172*cdf0e10cSrcweir 173*cdf0e10cSrcweir /** This many characters can be different (as a replacement) between 174*cdf0e10cSrcweir the found word and the search pattern in a "Weighted Levenshtein 175*cdf0e10cSrcweir Distance" search. */ 176*cdf0e10cSrcweir long changedChars; 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir /** This many characters can be missing in the found word in a 179*cdf0e10cSrcweir "Weighted Levenshtein Distance" search. */ 180*cdf0e10cSrcweir long deletedChars; 181*cdf0e10cSrcweir 182*cdf0e10cSrcweir /** This many characters can be additional in the found word in a 183*cdf0e10cSrcweir "Weighted Levenshtein Distance" search. */ 184*cdf0e10cSrcweir long insertedChars; 185*cdf0e10cSrcweir 186*cdf0e10cSrcweir /** Flags for the transliteration. Same meaning as the enum of 187*cdf0e10cSrcweir <type scope="com::sun::star::i18n">TransliterationModules</type> 188*cdf0e10cSrcweir */ 189*cdf0e10cSrcweir long transliterateFlags; 190*cdf0e10cSrcweir}; 191*cdf0e10cSrcweir 192*cdf0e10cSrcweir 193*cdf0e10cSrcweirpublished struct SearchResult { 194*cdf0e10cSrcweir //------------------------------------------------------------------------- 195*cdf0e10cSrcweir /** Number of subexpressions, 196*cdf0e10cSrcweir if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match. 197*cdf0e10cSrcweir The start and endOffset are always dependent on the search direction. 198*cdf0e10cSrcweir For example: 199*cdf0e10cSrcweir if you search "X" in the text "-X-" the offset are: 200*cdf0e10cSrcweir for forward: start = 1, end = 2 201*cdf0e10cSrcweir for backward: start = 2, end = 1 202*cdf0e10cSrcweir Forward, the startOffset is inclusive, the endOffset exclusive. 203*cdf0e10cSrcweir Backward, the startOffset is exclusive, the endOffset inclusive. 204*cdf0e10cSrcweir 205*cdf0e10cSrcweir For regular expressions it can be greater than 1. 206*cdf0e10cSrcweir If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string 207*cdf0e10cSrcweir if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression 208*cdf0e10cSrcweir startoffset[i] and endoffset[i] points to the matching substring of i th matching substring. 209*cdf0e10cSrcweir */ 210*cdf0e10cSrcweir long subRegExpressions; 211*cdf0e10cSrcweir sequence<long> startOffset; // inclusive 212*cdf0e10cSrcweir sequence<long> endOffset; // exclusive 213*cdf0e10cSrcweir}; 214*cdf0e10cSrcweir 215*cdf0e10cSrcweir 216*cdf0e10cSrcweir 217*cdf0e10cSrcweir/** enables an object to search in its content. 218*cdf0e10cSrcweir */ 219*cdf0e10cSrcweirpublished interface XTextSearch : com::sun::star::uno::XInterface 220*cdf0e10cSrcweir{ 221*cdf0e10cSrcweir //------------------------------------------------------------------------- 222*cdf0e10cSrcweir /** set the options for the forward or backward search. 223*cdf0e10cSrcweir 224*cdf0e10cSrcweir */ 225*cdf0e10cSrcweir void setOptions ([in] SearchOptions options); 226*cdf0e10cSrcweir //------------------------------------------------------------------------- 227*cdf0e10cSrcweir /** search forward in the searchStr, starts at startPos and ends by endpos. 228*cdf0e10cSrcweir The result is returned in the SearchResult. 229*cdf0e10cSrcweir 230*cdf0e10cSrcweir */ 231*cdf0e10cSrcweir SearchResult searchForward ([in] string searchStr, [in] long startPos, [in] long endPos ); 232*cdf0e10cSrcweir //------------------------------------------------------------------------- 233*cdf0e10cSrcweir /** search backward in the searchStr, starts at startPos and ends by endpos. 234*cdf0e10cSrcweir The endpos must be lower then the startpos, because the function searches backward! 235*cdf0e10cSrcweir The result is returned in the SearchResult. 236*cdf0e10cSrcweir 237*cdf0e10cSrcweir */ 238*cdf0e10cSrcweir SearchResult searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos ); 239*cdf0e10cSrcweir}; 240*cdf0e10cSrcweir 241*cdf0e10cSrcweir//============================================================================= 242*cdf0e10cSrcweir}; }; }; }; 243*cdf0e10cSrcweir 244*cdf0e10cSrcweir#endif 245