1/************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28#ifndef __com_sun_star_lang_XTextSearch_idl__ 29#define __com_sun_star_lang_XTextSearch_idl__ 30 31 32#include <com/sun/star/lang/Locale.idl> 33#include <com/sun/star/uno/XInterface.idl> 34//#include <com/sun/star/lang/CascadeTransliterator.idl> 35 36//============================================================================= 37 38module com { module sun { module star { module util { 39 40//============================================================================= 41 42 43published enum SearchAlgorithms 44{ 45 /// Literal 46 ABSOLUTE, // implemented as a kind of Boyer-Moore 47 /// Regular expression 48 REGEXP, 49 /// Weighted Levenshtein Distance 50 APPROXIMATE 51}; 52 53/// Flags for search methods 54published constants SearchFlags 55{ 56 /** 57 @deprecated The constant ALL_IGNORE_CASE is never supported - use 58 <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const> 59 with 60 <member>SearchOptions::transliterateFlags</member> 61 instead. 62 63 @see <type scope="com::sun::star::i18n">TransliterationModules</type> 64 */ 65 const long ALL_IGNORE_CASE = 0x00000001; 66 67 /** Flag for normal (Boyer-Moore) search / Search for word only. */ 68 const long NORM_WORD_ONLY = 0x00000010; 69 70 /** Flag for "regular expression" search / Interpret as extended 71 regular expression. 72 73 @deprecated The flag is currently not supported by OOo. 74 */ 75 const long REG_EXTENDED = 0x00000100; 76 77 /** Flag for "regular expression" search / No register information 78 or backreferences, i.e., avoid sub expressions. Return only 79 true/false if matched or not. 80 81 @deprecated The flag is currently not supported by OOo. 82 */ 83 const long REG_NOSUB = 0x00000200; 84 85 /** Flag for "regular expression" search / Special new line 86 treatment. 87 88 @deprecated The flag is currently not supported by OOo. 89 90 <p> A NEWLINE character in string will not be matched by a 91 period outside bracket expression or by any form of a non 92 matching list. </p> 93 94 <p> A circumflex (^) in pattern when used to specify expression 95 anchoring will match the zero length string immediately after a 96 newline in string, regardless of the setting of 97 REG_NOT_BEGINOFLINE. </p> 98 99 <p> A dollar-sign ($) in pattern when used to specify expression 100 anchoring, will match zero-length string immediately before a 101 new line in string, regardless of the setting of 102 REG_NOT_ENDOFLINE. </p> 103 */ 104 const long REG_NEWLINE = 0x00000400; 105 106 /** The first character in the string is not the beginning of the 107 line therefore ^ will not match with first character of the 108 string. 109 */ 110 const long REG_NOT_BEGINOFLINE = 0x00000800; 111 112 /** The last character in the string is not the end of the line 113 therefore $ will not match with last character of the string. 114 */ 115 const long REG_NOT_ENDOFLINE = 0x00001000; 116 117 /** Flag for "Weighted Levenshtein Distance" search / Relaxed 118 checking of limit, split weigh pools. 119 120 <p> If not specified (<b>strict</b>), the search is sucessful if 121 the WLD is within a calculated limit where each insertion, 122 deletion and replacement adds a weight to a common pool of 123 weights. This is the mathematically correct WLD. </p> 124 125 <p> From a user's point of view the strict WLD is an 126 exclusive-OR of the arguments given, for example if allowed 127 insertions=2 and allowed replacements=2, the search fails if 2 128 characters had been inserted and an additional operation would 129 be needed to match. Depending on the weights it may also fail if 130 1 character was inserted and 1 character replaced and an 131 additional operation would be needed to match. The strict 132 algorithm may match less than expected from a first glance of 133 the specified arguments, but does not return false positives. </p> 134 135 <p> If specified (<b>relaxed</b>), the search is also successful 136 if the combined pool for insertions and deletions is below a 137 doubled calculated limit and replacements are treated 138 differently. Additionally, swapped characters are counted as one 139 replacement. </p> 140 141 <p> From a user's point of view the relaxed WLD is an 142 inclusive-OR of the arguments given, for example if allowed 143 insertions=2 and allowed replacements=2, the search succeeds if 144 2 characters had been inserted and an additional replacement is 145 needed to match. The relaxed algorithm may return false 146 positives, but meets user expectation better. </p> 147 */ 148 const long LEV_RELAXED = 0x00010000; 149}; 150 151 152published struct SearchOptions { 153 //------------------------------------------------------------------------- 154 /** search type */ 155 SearchAlgorithms algorithmType; 156 157 /** some flags - can be mixed 158 159 @see <type>SearchFlags</type> 160 */ 161 long searchFlag; 162 163 /** The text or pattern to be searched. */ 164 string searchString; 165 166 /** The replacement text 167 (is for optional replacing - SearchOption is only the data container for it) */ 168 string replaceString; 169 170 /** The locale for case insensitive search. */ 171 ::com::sun::star::lang::Locale Locale; 172 173 /** This many characters can be different (as a replacement) between 174 the found word and the search pattern in a "Weighted Levenshtein 175 Distance" search. */ 176 long changedChars; 177 178 /** This many characters can be missing in the found word in a 179 "Weighted Levenshtein Distance" search. */ 180 long deletedChars; 181 182 /** This many characters can be additional in the found word in a 183 "Weighted Levenshtein Distance" search. */ 184 long insertedChars; 185 186 /** Flags for the transliteration. Same meaning as the enum of 187 <type scope="com::sun::star::i18n">TransliterationModules</type> 188 */ 189 long transliterateFlags; 190}; 191 192 193published struct SearchResult { 194 //------------------------------------------------------------------------- 195 /** Number of subexpressions, 196 if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match. 197 The start and endOffset are always dependent on the search direction. 198 For example: 199 if you search "X" in the text "-X-" the offset are: 200 for forward: start = 1, end = 2 201 for backward: start = 2, end = 1 202 Forward, the startOffset is inclusive, the endOffset exclusive. 203 Backward, the startOffset is exclusive, the endOffset inclusive. 204 205 For regular expressions it can be greater than 1. 206 If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string 207 if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression 208 startoffset[i] and endoffset[i] points to the matching substring of i th matching substring. 209 */ 210 long subRegExpressions; 211 sequence<long> startOffset; // inclusive 212 sequence<long> endOffset; // exclusive 213}; 214 215 216 217/** enables an object to search in its content. 218 */ 219published interface XTextSearch : com::sun::star::uno::XInterface 220{ 221 //------------------------------------------------------------------------- 222 /** set the options for the forward or backward search. 223 224 */ 225 void setOptions ([in] SearchOptions options); 226 //------------------------------------------------------------------------- 227 /** search forward in the searchStr, starts at startPos and ends by endpos. 228 The result is returned in the SearchResult. 229 230 */ 231 SearchResult searchForward ([in] string searchStr, [in] long startPos, [in] long endPos ); 232 //------------------------------------------------------------------------- 233 /** search backward in the searchStr, starts at startPos and ends by endpos. 234 The endpos must be lower then the startpos, because the function searches backward! 235 The result is returned in the SearchResult. 236 237 */ 238 SearchResult searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos ); 239}; 240 241//============================================================================= 242}; }; }; }; 243 244#endif 245