1*d1766043SAndrew Rist/************************************************************** 2cdf0e10cSrcweir * 3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*d1766043SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*d1766043SAndrew Rist * distributed with this work for additional information 6*d1766043SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance 9*d1766043SAndrew Rist * with the License. You may obtain a copy of the License at 10*d1766043SAndrew Rist * 11*d1766043SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*d1766043SAndrew Rist * 13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*d1766043SAndrew Rist * software distributed under the License is distributed on an 15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*d1766043SAndrew Rist * KIND, either express or implied. See the License for the 17*d1766043SAndrew Rist * specific language governing permissions and limitations 18*d1766043SAndrew Rist * under the License. 19*d1766043SAndrew Rist * 20*d1766043SAndrew Rist *************************************************************/ 21*d1766043SAndrew Rist 22*d1766043SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir#ifndef __com_sun_star_lang_XTextSearch_idl__ 25cdf0e10cSrcweir#define __com_sun_star_lang_XTextSearch_idl__ 26cdf0e10cSrcweir 27cdf0e10cSrcweir 28cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 29cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 30cdf0e10cSrcweir//#include <com/sun/star/lang/CascadeTransliterator.idl> 31cdf0e10cSrcweir 32cdf0e10cSrcweir//============================================================================= 33cdf0e10cSrcweir 34cdf0e10cSrcweirmodule com { module sun { module star { module util { 35cdf0e10cSrcweir 36cdf0e10cSrcweir//============================================================================= 37cdf0e10cSrcweir 38cdf0e10cSrcweir 39cdf0e10cSrcweirpublished enum SearchAlgorithms 40cdf0e10cSrcweir{ 41cdf0e10cSrcweir /// Literal 42cdf0e10cSrcweir ABSOLUTE, // implemented as a kind of Boyer-Moore 43cdf0e10cSrcweir /// Regular expression 44cdf0e10cSrcweir REGEXP, 45cdf0e10cSrcweir /// Weighted Levenshtein Distance 46cdf0e10cSrcweir APPROXIMATE 47cdf0e10cSrcweir}; 48cdf0e10cSrcweir 49cdf0e10cSrcweir/// Flags for search methods 50cdf0e10cSrcweirpublished constants SearchFlags 51cdf0e10cSrcweir{ 52cdf0e10cSrcweir /** 53cdf0e10cSrcweir @deprecated The constant ALL_IGNORE_CASE is never supported - use 54cdf0e10cSrcweir <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const> 55cdf0e10cSrcweir with 56cdf0e10cSrcweir <member>SearchOptions::transliterateFlags</member> 57cdf0e10cSrcweir instead. 58cdf0e10cSrcweir 59cdf0e10cSrcweir @see <type scope="com::sun::star::i18n">TransliterationModules</type> 60cdf0e10cSrcweir */ 61cdf0e10cSrcweir const long ALL_IGNORE_CASE = 0x00000001; 62cdf0e10cSrcweir 63cdf0e10cSrcweir /** Flag for normal (Boyer-Moore) search / Search for word only. */ 64cdf0e10cSrcweir const long NORM_WORD_ONLY = 0x00000010; 65cdf0e10cSrcweir 66cdf0e10cSrcweir /** Flag for "regular expression" search / Interpret as extended 67cdf0e10cSrcweir regular expression. 68cdf0e10cSrcweir 69cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 70cdf0e10cSrcweir */ 71cdf0e10cSrcweir const long REG_EXTENDED = 0x00000100; 72cdf0e10cSrcweir 73cdf0e10cSrcweir /** Flag for "regular expression" search / No register information 74cdf0e10cSrcweir or backreferences, i.e., avoid sub expressions. Return only 75cdf0e10cSrcweir true/false if matched or not. 76cdf0e10cSrcweir 77cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 78cdf0e10cSrcweir */ 79cdf0e10cSrcweir const long REG_NOSUB = 0x00000200; 80cdf0e10cSrcweir 81cdf0e10cSrcweir /** Flag for "regular expression" search / Special new line 82cdf0e10cSrcweir treatment. 83cdf0e10cSrcweir 84cdf0e10cSrcweir @deprecated The flag is currently not supported by OOo. 85cdf0e10cSrcweir 86cdf0e10cSrcweir <p> A NEWLINE character in string will not be matched by a 87cdf0e10cSrcweir period outside bracket expression or by any form of a non 88cdf0e10cSrcweir matching list. </p> 89cdf0e10cSrcweir 90cdf0e10cSrcweir <p> A circumflex (^) in pattern when used to specify expression 91cdf0e10cSrcweir anchoring will match the zero length string immediately after a 92cdf0e10cSrcweir newline in string, regardless of the setting of 93cdf0e10cSrcweir REG_NOT_BEGINOFLINE. </p> 94cdf0e10cSrcweir 95cdf0e10cSrcweir <p> A dollar-sign ($) in pattern when used to specify expression 96cdf0e10cSrcweir anchoring, will match zero-length string immediately before a 97cdf0e10cSrcweir new line in string, regardless of the setting of 98cdf0e10cSrcweir REG_NOT_ENDOFLINE. </p> 99cdf0e10cSrcweir */ 100cdf0e10cSrcweir const long REG_NEWLINE = 0x00000400; 101cdf0e10cSrcweir 102cdf0e10cSrcweir /** The first character in the string is not the beginning of the 103cdf0e10cSrcweir line therefore ^ will not match with first character of the 104cdf0e10cSrcweir string. 105cdf0e10cSrcweir */ 106cdf0e10cSrcweir const long REG_NOT_BEGINOFLINE = 0x00000800; 107cdf0e10cSrcweir 108cdf0e10cSrcweir /** The last character in the string is not the end of the line 109cdf0e10cSrcweir therefore $ will not match with last character of the string. 110cdf0e10cSrcweir */ 111cdf0e10cSrcweir const long REG_NOT_ENDOFLINE = 0x00001000; 112cdf0e10cSrcweir 113cdf0e10cSrcweir /** Flag for "Weighted Levenshtein Distance" search / Relaxed 114cdf0e10cSrcweir checking of limit, split weigh pools. 115cdf0e10cSrcweir 116cdf0e10cSrcweir <p> If not specified (<b>strict</b>), the search is sucessful if 117cdf0e10cSrcweir the WLD is within a calculated limit where each insertion, 118cdf0e10cSrcweir deletion and replacement adds a weight to a common pool of 119cdf0e10cSrcweir weights. This is the mathematically correct WLD. </p> 120cdf0e10cSrcweir 121cdf0e10cSrcweir <p> From a user's point of view the strict WLD is an 122cdf0e10cSrcweir exclusive-OR of the arguments given, for example if allowed 123cdf0e10cSrcweir insertions=2 and allowed replacements=2, the search fails if 2 124cdf0e10cSrcweir characters had been inserted and an additional operation would 125cdf0e10cSrcweir be needed to match. Depending on the weights it may also fail if 126cdf0e10cSrcweir 1 character was inserted and 1 character replaced and an 127cdf0e10cSrcweir additional operation would be needed to match. The strict 128cdf0e10cSrcweir algorithm may match less than expected from a first glance of 129cdf0e10cSrcweir the specified arguments, but does not return false positives. </p> 130cdf0e10cSrcweir 131cdf0e10cSrcweir <p> If specified (<b>relaxed</b>), the search is also successful 132cdf0e10cSrcweir if the combined pool for insertions and deletions is below a 133cdf0e10cSrcweir doubled calculated limit and replacements are treated 134cdf0e10cSrcweir differently. Additionally, swapped characters are counted as one 135cdf0e10cSrcweir replacement. </p> 136cdf0e10cSrcweir 137cdf0e10cSrcweir <p> From a user's point of view the relaxed WLD is an 138cdf0e10cSrcweir inclusive-OR of the arguments given, for example if allowed 139cdf0e10cSrcweir insertions=2 and allowed replacements=2, the search succeeds if 140cdf0e10cSrcweir 2 characters had been inserted and an additional replacement is 141cdf0e10cSrcweir needed to match. The relaxed algorithm may return false 142cdf0e10cSrcweir positives, but meets user expectation better. </p> 143cdf0e10cSrcweir */ 144cdf0e10cSrcweir const long LEV_RELAXED = 0x00010000; 145cdf0e10cSrcweir}; 146cdf0e10cSrcweir 147cdf0e10cSrcweir 148cdf0e10cSrcweirpublished struct SearchOptions { 149cdf0e10cSrcweir //------------------------------------------------------------------------- 150cdf0e10cSrcweir /** search type */ 151cdf0e10cSrcweir SearchAlgorithms algorithmType; 152cdf0e10cSrcweir 153cdf0e10cSrcweir /** some flags - can be mixed 154cdf0e10cSrcweir 155cdf0e10cSrcweir @see <type>SearchFlags</type> 156cdf0e10cSrcweir */ 157cdf0e10cSrcweir long searchFlag; 158cdf0e10cSrcweir 159cdf0e10cSrcweir /** The text or pattern to be searched. */ 160cdf0e10cSrcweir string searchString; 161cdf0e10cSrcweir 162cdf0e10cSrcweir /** The replacement text 163cdf0e10cSrcweir (is for optional replacing - SearchOption is only the data container for it) */ 164cdf0e10cSrcweir string replaceString; 165cdf0e10cSrcweir 166cdf0e10cSrcweir /** The locale for case insensitive search. */ 167cdf0e10cSrcweir ::com::sun::star::lang::Locale Locale; 168cdf0e10cSrcweir 169cdf0e10cSrcweir /** This many characters can be different (as a replacement) between 170cdf0e10cSrcweir the found word and the search pattern in a "Weighted Levenshtein 171cdf0e10cSrcweir Distance" search. */ 172cdf0e10cSrcweir long changedChars; 173cdf0e10cSrcweir 174cdf0e10cSrcweir /** This many characters can be missing in the found word in a 175cdf0e10cSrcweir "Weighted Levenshtein Distance" search. */ 176cdf0e10cSrcweir long deletedChars; 177cdf0e10cSrcweir 178cdf0e10cSrcweir /** This many characters can be additional in the found word in a 179cdf0e10cSrcweir "Weighted Levenshtein Distance" search. */ 180cdf0e10cSrcweir long insertedChars; 181cdf0e10cSrcweir 182cdf0e10cSrcweir /** Flags for the transliteration. Same meaning as the enum of 183cdf0e10cSrcweir <type scope="com::sun::star::i18n">TransliterationModules</type> 184cdf0e10cSrcweir */ 185cdf0e10cSrcweir long transliterateFlags; 186cdf0e10cSrcweir}; 187cdf0e10cSrcweir 188cdf0e10cSrcweir 189cdf0e10cSrcweirpublished struct SearchResult { 190cdf0e10cSrcweir //------------------------------------------------------------------------- 191cdf0e10cSrcweir /** Number of subexpressions, 192cdf0e10cSrcweir if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match. 193cdf0e10cSrcweir The start and endOffset are always dependent on the search direction. 194cdf0e10cSrcweir For example: 195cdf0e10cSrcweir if you search "X" in the text "-X-" the offset are: 196cdf0e10cSrcweir for forward: start = 1, end = 2 197cdf0e10cSrcweir for backward: start = 2, end = 1 198cdf0e10cSrcweir Forward, the startOffset is inclusive, the endOffset exclusive. 199cdf0e10cSrcweir Backward, the startOffset is exclusive, the endOffset inclusive. 200cdf0e10cSrcweir 201cdf0e10cSrcweir For regular expressions it can be greater than 1. 202cdf0e10cSrcweir If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string 203cdf0e10cSrcweir if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression 204cdf0e10cSrcweir startoffset[i] and endoffset[i] points to the matching substring of i th matching substring. 205cdf0e10cSrcweir */ 206cdf0e10cSrcweir long subRegExpressions; 207cdf0e10cSrcweir sequence<long> startOffset; // inclusive 208cdf0e10cSrcweir sequence<long> endOffset; // exclusive 209cdf0e10cSrcweir}; 210cdf0e10cSrcweir 211cdf0e10cSrcweir 212cdf0e10cSrcweir 213cdf0e10cSrcweir/** enables an object to search in its content. 214cdf0e10cSrcweir */ 215cdf0e10cSrcweirpublished interface XTextSearch : com::sun::star::uno::XInterface 216cdf0e10cSrcweir{ 217cdf0e10cSrcweir //------------------------------------------------------------------------- 218cdf0e10cSrcweir /** set the options for the forward or backward search. 219cdf0e10cSrcweir 220cdf0e10cSrcweir */ 221cdf0e10cSrcweir void setOptions ([in] SearchOptions options); 222cdf0e10cSrcweir //------------------------------------------------------------------------- 223cdf0e10cSrcweir /** search forward in the searchStr, starts at startPos and ends by endpos. 224cdf0e10cSrcweir The result is returned in the SearchResult. 225cdf0e10cSrcweir 226cdf0e10cSrcweir */ 227cdf0e10cSrcweir SearchResult searchForward ([in] string searchStr, [in] long startPos, [in] long endPos ); 228cdf0e10cSrcweir //------------------------------------------------------------------------- 229cdf0e10cSrcweir /** search backward in the searchStr, starts at startPos and ends by endpos. 230cdf0e10cSrcweir The endpos must be lower then the startpos, because the function searches backward! 231cdf0e10cSrcweir The result is returned in the SearchResult. 232cdf0e10cSrcweir 233cdf0e10cSrcweir */ 234cdf0e10cSrcweir SearchResult searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos ); 235cdf0e10cSrcweir}; 236cdf0e10cSrcweir 237cdf0e10cSrcweir//============================================================================= 238cdf0e10cSrcweir}; }; }; }; 239cdf0e10cSrcweir 240cdf0e10cSrcweir#endif 241