xref: /trunk/main/offapi/com/sun/star/util/XTextSearch.idl (revision cdf0e10c)
1/*************************************************************************
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
6 *
7 * OpenOffice.org - a multi-platform office productivity suite
8 *
9 * This file is part of OpenOffice.org.
10 *
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
14 *
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
20 *
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org.  If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
25 *
26 ************************************************************************/
27
28#ifndef __com_sun_star_lang_XTextSearch_idl__
29#define __com_sun_star_lang_XTextSearch_idl__
30
31
32#include <com/sun/star/lang/Locale.idl>
33#include <com/sun/star/uno/XInterface.idl>
34//#include <com/sun/star/lang/CascadeTransliterator.idl>
35
36//=============================================================================
37
38module com { module sun { module star { module util {
39
40//=============================================================================
41
42
43published enum SearchAlgorithms
44{
45    /// Literal
46    ABSOLUTE,   // implemented as a kind of Boyer-Moore
47    /// Regular expression
48    REGEXP,
49    /// Weighted Levenshtein Distance
50    APPROXIMATE
51};
52
53/// Flags for search methods
54published constants SearchFlags
55{
56    /**
57        @deprecated The constant ALL_IGNORE_CASE is never supported - use
58                    <const scope="com::sun::star::i18n">TransliterationModules::IGNORE_CASE</const>
59                    with
60                    <member>SearchOptions::transliterateFlags</member>
61                    instead.
62
63        @see <type scope="com::sun::star::i18n">TransliterationModules</type>
64    */
65    const long  ALL_IGNORE_CASE     = 0x00000001;
66
67    /** Flag for normal (Boyer-Moore) search / Search for word only. */
68    const long  NORM_WORD_ONLY      = 0x00000010;
69
70    /** Flag for "regular expression" search / Interpret as extended
71        regular expression.
72
73        @deprecated The flag is currently not supported by OOo.
74    */
75    const long  REG_EXTENDED        = 0x00000100;
76
77    /** Flag for "regular expression" search / No register information
78        or backreferences, i.e., avoid sub expressions. Return only
79        true/false if matched or not.
80
81        @deprecated The flag is currently not supported by OOo.
82    */
83    const long  REG_NOSUB           = 0x00000200;
84
85    /** Flag for "regular expression" search / Special new line
86        treatment.
87
88        @deprecated The flag is currently not supported by OOo.
89
90        <p> A NEWLINE character in string will not be matched by a
91        period outside bracket expression or by any form of a non
92        matching list. </p>
93
94        <p> A circumflex (^) in pattern when used to specify expression
95        anchoring will match the zero length string immediately after a
96        newline in string, regardless of the setting of
97        REG_NOT_BEGINOFLINE. </p>
98
99        <p> A dollar-sign ($) in pattern when used to specify expression
100        anchoring, will match zero-length string immediately before a
101        new line in string, regardless of the setting of
102        REG_NOT_ENDOFLINE. </p>
103    */
104    const long  REG_NEWLINE         = 0x00000400;
105
106    /** The first character in the string is not the beginning of the
107        line therefore ^ will not match with first character of the
108        string.
109    */
110    const long  REG_NOT_BEGINOFLINE = 0x00000800;
111
112    /** The last character in the string is not the end of the line
113        therefore $ will not match with last character of the string.
114    */
115    const long  REG_NOT_ENDOFLINE   = 0x00001000;
116
117    /** Flag for "Weighted Levenshtein Distance" search / Relaxed
118        checking of limit, split weigh pools.
119
120        <p> If not specified (<b>strict</b>), the search is sucessful if
121        the WLD is within a calculated limit where each insertion,
122        deletion and replacement adds a weight to a common pool of
123        weights. This is the mathematically correct WLD. </p>
124
125        <p> From a user's point of view the strict WLD is an
126        exclusive-OR of the arguments given, for example if allowed
127        insertions=2 and allowed replacements=2, the search fails if 2
128        characters had been inserted and an additional operation would
129        be needed to match. Depending on the weights it may also fail if
130        1 character was inserted and 1 character replaced and an
131        additional operation would be needed to match. The strict
132        algorithm may match less than expected from a first glance of
133        the specified arguments, but does not return false positives. </p>
134
135        <p> If specified (<b>relaxed</b>), the search is also successful
136        if the combined pool for insertions and deletions is below a
137        doubled calculated limit and replacements are treated
138        differently. Additionally, swapped characters are counted as one
139        replacement. </p>
140
141        <p> From a user's point of view the relaxed WLD is an
142        inclusive-OR of the arguments given, for example if allowed
143        insertions=2 and allowed replacements=2, the search succeeds if
144        2 characters had been inserted and an additional replacement is
145        needed to match. The relaxed algorithm may return false
146        positives, but meets user expectation better. </p>
147    */
148    const long  LEV_RELAXED     = 0x00010000;
149};
150
151
152published  struct SearchOptions  {
153	//-------------------------------------------------------------------------
154    /** search type */
155	SearchAlgorithms	algorithmType;
156
157	/** some flags - can be mixed
158
159		@see <type>SearchFlags</type>
160	*/
161	long 			searchFlag;
162
163    /** The text or pattern to be searched. */
164	string			searchString;
165
166    /** The replacement text
167        (is for optional replacing - SearchOption is only the data container for it) */
168	string			replaceString;
169
170    /** The locale for case insensitive search. */
171	::com::sun::star::lang::Locale  Locale;
172
173    /** This many characters can be different (as a replacement) between
174        the found word and the search pattern in a "Weighted Levenshtein
175        Distance" search. */
176	long			changedChars;
177
178    /** This many characters can be missing in the found word in a
179        "Weighted Levenshtein Distance" search. */
180	long			deletedChars;
181
182    /** This many characters can be additional in the found word in a
183        "Weighted Levenshtein Distance" search. */
184	long			insertedChars;
185
186    /** Flags for the transliteration. Same meaning as the enum of
187        <type scope="com::sun::star::i18n">TransliterationModules</type>
188	*/
189	long			transliterateFlags;
190};
191
192
193published  struct SearchResult  {
194	//-------------------------------------------------------------------------
195	/** Number of subexpressions,
196	if it is 0, then no match found; this value is 1 for ABSOLUTE and APPROXIMATE match.
197	The start and endOffset are always dependent on the search direction.
198	For example:
199	if you search "X" in the text "-X-" the offset are:
200		for forward: 	start = 1, end = 2
201        for backward:   start = 2, end = 1
202    Forward, the startOffset is inclusive, the endOffset exclusive.
203    Backward, the startOffset is exclusive, the endOffset inclusive.
204
205	For regular expressions it can be greater than 1.
206	If the value is 1, startoffset[0] and endoffset[0] points to the matching sub string
207	if value is > 1, still startoffset[0] and endoffset[0] points to the matching substring for whole regular expression
208	startoffset[i] and endoffset[i] points to the matching substring of i th matching substring.
209	*/
210	long subRegExpressions;
211	sequence<long> startOffset;		// inclusive
212	sequence<long> endOffset;  		// exclusive
213};
214
215
216
217/** enables an object to search in its content.
218 */
219published interface XTextSearch : com::sun::star::uno::XInterface
220{
221	//-------------------------------------------------------------------------
222	/** set the options for the forward or backward search.
223
224	*/
225	void setOptions ([in] SearchOptions options);
226	//-------------------------------------------------------------------------
227	/** search forward in the searchStr, starts at startPos and ends by endpos.
228		The result is returned in the SearchResult.
229
230	*/
231	SearchResult  searchForward  ([in] string searchStr, [in] long startPos, [in] long endPos );
232	//-------------------------------------------------------------------------
233	/** search backward in the searchStr, starts at startPos and ends by endpos.
234		The endpos must be lower then the startpos, because the function searches backward!
235		The result is returned in the SearchResult.
236
237	*/
238	SearchResult  searchBackward ([in] string searchStr, [in] long startPos, [in] long endPos );
239};
240
241//=============================================================================
242}; }; }; };
243
244#endif
245