1*d1766043SAndrew Rist/**************************************************************
2cdf0e10cSrcweir *
3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*d1766043SAndrew Rist * or more contributor license agreements.  See the NOTICE file
5*d1766043SAndrew Rist * distributed with this work for additional information
6*d1766043SAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance
9*d1766043SAndrew Rist * with the License.  You may obtain a copy of the License at
10*d1766043SAndrew Rist *
11*d1766043SAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12*d1766043SAndrew Rist *
13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*d1766043SAndrew Rist * software distributed under the License is distributed on an
15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*d1766043SAndrew Rist * KIND, either express or implied.  See the License for the
17*d1766043SAndrew Rist * specific language governing permissions and limitations
18*d1766043SAndrew Rist * under the License.
19*d1766043SAndrew Rist *
20*d1766043SAndrew Rist *************************************************************/
21*d1766043SAndrew Rist
22*d1766043SAndrew Rist
23cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XBreakIterator_idl__
24cdf0e10cSrcweir#define __com_sun_star_i18n_XBreakIterator_idl__
25cdf0e10cSrcweir
26cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__
27cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
28cdf0e10cSrcweir#endif
29cdf0e10cSrcweir
30cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakUserOptions_idl__
31cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakUserOptions.idl>
32cdf0e10cSrcweir#endif
33cdf0e10cSrcweir
34cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakHyphenationOptions_idl__
35cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakHyphenationOptions.idl>
36cdf0e10cSrcweir#endif
37cdf0e10cSrcweir
38cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakResults_idl__
39cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakResults.idl>
40cdf0e10cSrcweir#endif
41cdf0e10cSrcweir
42cdf0e10cSrcweir#ifndef __com_sun_star_i18n_Boundary_idl__
43cdf0e10cSrcweir#include <com/sun/star/i18n/Boundary.idl>
44cdf0e10cSrcweir#endif
45cdf0e10cSrcweir
46cdf0e10cSrcweir//============================================================================
47cdf0e10cSrcweir
48cdf0e10cSrcweirmodule com {  module sun {  module star {  module i18n {
49cdf0e10cSrcweir
50cdf0e10cSrcweir//============================================================================
51cdf0e10cSrcweir
52cdf0e10cSrcweir/**
53cdf0e10cSrcweir    contains the base routines for iteration in Unicode string. Iterates over
54cdf0e10cSrcweir    characters, words, sentences and line breaks.
55cdf0e10cSrcweir
56cdf0e10cSrcweir    <p> Assumption: StartPos is inclusive and EndPos is exclusive. </p>
57cdf0e10cSrcweir */
58cdf0e10cSrcweir
59cdf0e10cSrcweirpublished interface XBreakIterator: com::sun::star::uno::XInterface
60cdf0e10cSrcweir{
61cdf0e10cSrcweir    //------------------------------------------------------------------------
62cdf0e10cSrcweir    /** Traverses specified number of characters/cells in Text from
63cdf0e10cSrcweir        <em>nStartPos</em> forwards.
64cdf0e10cSrcweir        <type>CharacterIteratorMode</type> can be cell based or
65cdf0e10cSrcweir        character based. A cell is made of more than one character.
66cdf0e10cSrcweir
67cdf0e10cSrcweir        @param nCount
68cdf0e10cSrcweir            Number of characters to traverse, it should not be less than 0.
69cdf0e10cSrcweir            If you want to traverse in the opposite direction use
70cdf0e10cSrcweir            <member>XBreakIterator::previousCharacters()</member> instead.
71cdf0e10cSrcweir     */
72cdf0e10cSrcweir    long nextCharacters( [in] string aText, [in] long nStartPos,
73cdf0e10cSrcweir                     [in] ::com::sun::star::lang::Locale aLocale,
74cdf0e10cSrcweir                     [in] short nCharacterIteratorMode,
75cdf0e10cSrcweir                     [in] long nCount, [out] long nDone );
76cdf0e10cSrcweir
77cdf0e10cSrcweir    //------------------------------------------------------------------------
78cdf0e10cSrcweir    /** Traverses specified number of characters/cells in Text from
79cdf0e10cSrcweir        <em>nStartPos</em> backwards.
80cdf0e10cSrcweir        <type>CharacterIteratorMode</type> can be cell based or
81cdf0e10cSrcweir        character based. A cell is made of more than one character.
82cdf0e10cSrcweir
83cdf0e10cSrcweir        @param nCount
84cdf0e10cSrcweir            Number of characters to traverse, it should not be less than 0.
85cdf0e10cSrcweir            If you want to traverse in the opposite direction use
86cdf0e10cSrcweir            <member>XBreakIterator::nextCharacters()</member> instead.
87cdf0e10cSrcweir     */
88cdf0e10cSrcweir    long previousCharacters( [in] string aText, [in] long nStartPos,
89cdf0e10cSrcweir                     [in] ::com::sun::star::lang::Locale aLocale,
90cdf0e10cSrcweir                     [in] short nCharacterIteratorMode,
91cdf0e10cSrcweir                     [in] long nCount, [out] long nDone );
92cdf0e10cSrcweir
93cdf0e10cSrcweir    //------------------------------------------------------------------------
94cdf0e10cSrcweir    /** Traverses one word in Text from <em>nStartPos</em> forwards.
95cdf0e10cSrcweir
96cdf0e10cSrcweir        @param nWordType
97cdf0e10cSrcweir            One of <type>WordType</type>, specifies the type of
98cdf0e10cSrcweir            travelling.
99cdf0e10cSrcweir
100cdf0e10cSrcweir        @returns
101cdf0e10cSrcweir            The <type>Boundary</type> of the found word. Normally used for
102cdf0e10cSrcweir            CTRL-Right.
103cdf0e10cSrcweir     */
104cdf0e10cSrcweir    Boundary nextWord( [in] string aText, [in] long nStartPos,
105cdf0e10cSrcweir                   [in] ::com::sun::star::lang::Locale aLocale,
106cdf0e10cSrcweir                   [in] short nWordType);
107cdf0e10cSrcweir
108cdf0e10cSrcweir    //------------------------------------------------------------------------
109cdf0e10cSrcweir    /** Traverses one word in Text from <em>nStartPos</em> backwards.
110cdf0e10cSrcweir
111cdf0e10cSrcweir        @param aLocale
112cdf0e10cSrcweir            The locale of the character preceding <em>nStartPos</em>.
113cdf0e10cSrcweir
114cdf0e10cSrcweir            <p> If the previous character is a space character and
115cdf0e10cSrcweir            <em>nWordType</em> indicates spaces should be skipped, and
116cdf0e10cSrcweir            if the first non-space character is an Asian character,
117cdf0e10cSrcweir            then, since Asian word break needs language specific
118cdf0e10cSrcweir            wordbreak dictionaries, the method will return -1 in
119cdf0e10cSrcweir            <member>Boundary::endPos</member> and the position after the
120cdf0e10cSrcweir            Asian character (i.e. the space character) in
121cdf0e10cSrcweir            <member>Boundary::startPos</member>. The caller then has to
122cdf0e10cSrcweir            call this method again with a correct <em>aLocale</em>
123cdf0e10cSrcweir            referring to the Asian character, which is then the previous
124cdf0e10cSrcweir            character of the space character where <em>nStartPos</em>
125cdf0e10cSrcweir            points to. </p>
126cdf0e10cSrcweir
127cdf0e10cSrcweir            <p> <b>Note</b> that the OpenOffice.org 1.0 / StarOffice 6.0
128cdf0e10cSrcweir            / StarSuite 6.0 i18n framework doesn't behave like this and
129cdf0e10cSrcweir            mixed Western/CJK text may lead to wrong word iteration.
130cdf0e10cSrcweir            This is fixed in later versions. </p>
131cdf0e10cSrcweir
132cdf0e10cSrcweir        @param nWordType
133cdf0e10cSrcweir            One of <type>WordType</type>, specifies the type of
134cdf0e10cSrcweir            travelling.
135cdf0e10cSrcweir
136cdf0e10cSrcweir        @returns
137cdf0e10cSrcweir            The <type>Boundary</type> of the found word. Normally used for
138cdf0e10cSrcweir            CTRL-Left.
139cdf0e10cSrcweir    */
140cdf0e10cSrcweir    Boundary previousWord( [in] string aText, [in] long nStartPos,
141cdf0e10cSrcweir                       [in] ::com::sun::star::lang::Locale aLocale,
142cdf0e10cSrcweir                       [in] short nWordType);
143cdf0e10cSrcweir
144cdf0e10cSrcweir    //------------------------------------------------------------------------
145cdf0e10cSrcweir    /** Identifies StartPos and EndPos of current word.
146cdf0e10cSrcweir
147cdf0e10cSrcweir        <p> If <em>nPos</em> is the boundary of a word, it is StartPos
148cdf0e10cSrcweir        of one word and EndPos of previous word. In this situation, the
149cdf0e10cSrcweir        outcome of the algorithm can be indeterminate. In this situation
150cdf0e10cSrcweir        the <em>bPreferForward</em> flag is used. If bPreferForward ==
151cdf0e10cSrcweir        <FALSE/>, <em>nPos</em> is considered to be the end of the word
152cdf0e10cSrcweir        and we look backwards for beginning of word, otherwise
153cdf0e10cSrcweir        <em>nPos</em> is considered to be the start of the next word and
154cdf0e10cSrcweir        we look forwards for the end of the word. </p>
155cdf0e10cSrcweir
156cdf0e10cSrcweir        @param nWordType
157cdf0e10cSrcweir            One of <type>WordType</type>.
158cdf0e10cSrcweir
159cdf0e10cSrcweir        @returns
160cdf0e10cSrcweir            The Boundary of the current word.
161cdf0e10cSrcweir    */
162cdf0e10cSrcweir    Boundary getWordBoundary( [in] string aText, [in] long nPos,
163cdf0e10cSrcweir                      [in] ::com::sun::star::lang::Locale aLocale,
164cdf0e10cSrcweir                      [in] short nWordType,
165cdf0e10cSrcweir                      [in] boolean bPreferForward );
166cdf0e10cSrcweir
167cdf0e10cSrcweir    //------------------------------------------------------------------------
168cdf0e10cSrcweir    /** @deprecated
169cdf0e10cSrcweir        Get the <type>WordType</type> of the word that starts at
170cdf0e10cSrcweir        position <em>nPos</em>.
171cdf0e10cSrcweir
172cdf0e10cSrcweir        <p> This method is mis-defined, since <type>WordType</type>
173cdf0e10cSrcweir        is not an attribute of a word, but a way to break words,
174cdf0e10cSrcweir        like excluding or including tail spaces for spellchecker
175cdf0e10cSrcweir        or cursor traveling. It returns 0 always.
176cdf0e10cSrcweir        </p>
177cdf0e10cSrcweir     */
178cdf0e10cSrcweir    short getWordType( [in] string aText, [in] long nPos,
179cdf0e10cSrcweir                   [in] ::com::sun::star::lang::Locale aLocale);
180cdf0e10cSrcweir
181cdf0e10cSrcweir    //------------------------------------------------------------------------
182cdf0e10cSrcweir    /** If a word starts at position <em>nPos</em>.
183cdf0e10cSrcweir
184cdf0e10cSrcweir        <p> It is possible that both of this method
185cdf0e10cSrcweir        and following method <em>isEndWord</em> all return
186cdf0e10cSrcweir        <TRUE/>, since StartPos of a word is inclusive
187cdf0e10cSrcweir        while EndPos of a word is exclusive.
188cdf0e10cSrcweir        </p>
189cdf0e10cSrcweir
190cdf0e10cSrcweir     */
191cdf0e10cSrcweir    boolean isBeginWord( [in] string aText, [in] long nPos,
192cdf0e10cSrcweir                     [in] ::com::sun::star::lang::Locale aLocale,
193cdf0e10cSrcweir                     [in] short nWordType);
194cdf0e10cSrcweir
195cdf0e10cSrcweir    //------------------------------------------------------------------------
196cdf0e10cSrcweir    /** If a word ends at position <em>nPos</em>.
197cdf0e10cSrcweir     */
198cdf0e10cSrcweir    boolean isEndWord( [in] string aText, [in] long nPos,
199cdf0e10cSrcweir                   [in] ::com::sun::star::lang::Locale aLocale,
200cdf0e10cSrcweir                   [in] short nWordType);
201cdf0e10cSrcweir
202cdf0e10cSrcweir    //------------------------------------------------------------------------
203cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the start of a
204cdf0e10cSrcweir        sentence.
205cdf0e10cSrcweir
206cdf0e10cSrcweir        @returns
207cdf0e10cSrcweir            The position where the sentence starts.
208cdf0e10cSrcweir     */
209cdf0e10cSrcweir    long beginOfSentence( [in] string aText, [in] long nStartPos,
210cdf0e10cSrcweir                      [in] ::com::sun::star::lang::Locale aLocale );
211cdf0e10cSrcweir
212cdf0e10cSrcweir    //------------------------------------------------------------------------
213cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the end of a
214cdf0e10cSrcweir        sentence.
215cdf0e10cSrcweir
216cdf0e10cSrcweir        @returns
217cdf0e10cSrcweir            The position where the sentence ends.
218cdf0e10cSrcweir     */
219cdf0e10cSrcweir    long endOfSentence( [in] string aText, [in] long nStartPos,
220cdf0e10cSrcweir                    [in] ::com::sun::star::lang::Locale aLocale );
221cdf0e10cSrcweir
222cdf0e10cSrcweir    //------------------------------------------------------------------------
223cdf0e10cSrcweir    /** Calculate the line break position in the Text from the specified
224cdf0e10cSrcweir        <em>nStartPos</em>.
225cdf0e10cSrcweir
226cdf0e10cSrcweir        @param nMinBreakPos
227cdf0e10cSrcweir            Defines a minimum break position for hyphenated line break.
228cdf0e10cSrcweir            When the position for hyphenated line break is less than
229cdf0e10cSrcweir            <em>nMinBreakPos</em>, break position in
230cdf0e10cSrcweir            <type>LineBreakResults</type> is set to -1.
231cdf0e10cSrcweir
232cdf0e10cSrcweir        @param aHyphOptions
233cdf0e10cSrcweir            Defines if the hyphenator is to be used.
234cdf0e10cSrcweir
235cdf0e10cSrcweir        @param aUserOptions
236cdf0e10cSrcweir            Defines how to handle hanging punctuations and forbidden
237cdf0e10cSrcweir            characters at the start/end of a line.
238cdf0e10cSrcweir
239cdf0e10cSrcweir        @returns
240cdf0e10cSrcweir            The <type>LineBreakResults</type> contain the break
241cdf0e10cSrcweir            position of the line, <type>BreakType</type> and
242cdf0e10cSrcweir            <type scope="com::sun::star::linguistic2">XHyphenatedWord</type>
243cdf0e10cSrcweir     */
244cdf0e10cSrcweir    LineBreakResults getLineBreak( [in] string aText, [in] long nStartPos,
245cdf0e10cSrcweir                    [in] ::com::sun::star::lang::Locale aLocale,
246cdf0e10cSrcweir                    [in] long nMinBreakPos,
247cdf0e10cSrcweir                    [in] LineBreakHyphenationOptions aHyphOptions,
248cdf0e10cSrcweir                    [in] LineBreakUserOptions aUserOptions );
249cdf0e10cSrcweir
250cdf0e10cSrcweir    //------------------------------------------------------------------------
251cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the beginning of
252cdf0e10cSrcweir        the specified script type.
253cdf0e10cSrcweir
254cdf0e10cSrcweir        @param nScriptType
255cdf0e10cSrcweir            One of <type>ScriptType</type>.
256cdf0e10cSrcweir
257cdf0e10cSrcweir        @returns
258cdf0e10cSrcweir            The position where the script type starts.
259cdf0e10cSrcweir     */
260cdf0e10cSrcweir    long beginOfScript( [in] string aText, [in] long nStartPos,
261cdf0e10cSrcweir                    [in] short nScriptType );
262cdf0e10cSrcweir
263cdf0e10cSrcweir    //------------------------------------------------------------------------
264cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the end of the
265cdf0e10cSrcweir        specified script type.
266cdf0e10cSrcweir
267cdf0e10cSrcweir        @param nScriptType
268cdf0e10cSrcweir            One of <type>ScriptType</type>.
269cdf0e10cSrcweir
270cdf0e10cSrcweir        @returns
271cdf0e10cSrcweir            The position where the script type ends.
272cdf0e10cSrcweir     */
273cdf0e10cSrcweir    long endOfScript( [in] string aText, [in] long nStartPos,
274cdf0e10cSrcweir                  [in] short nScriptType );
275cdf0e10cSrcweir
276cdf0e10cSrcweir    //------------------------------------------------------------------------
277cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the next start of
278cdf0e10cSrcweir        the specified script type.
279cdf0e10cSrcweir
280cdf0e10cSrcweir        @param nScriptType
281cdf0e10cSrcweir            One of <type>ScriptType</type>.
282cdf0e10cSrcweir
283cdf0e10cSrcweir        @returns
284cdf0e10cSrcweir            The position where the next script type starts.
285cdf0e10cSrcweir     */
286cdf0e10cSrcweir    long nextScript( [in] string aText, [in] long nStartPos,
287cdf0e10cSrcweir                 [in] short nScriptType );
288cdf0e10cSrcweir
289cdf0e10cSrcweir    //------------------------------------------------------------------------
290cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the previous start
291cdf0e10cSrcweir        of the specified script type.
292cdf0e10cSrcweir
293cdf0e10cSrcweir        @param nScriptType
294cdf0e10cSrcweir            One of <type>ScriptType</type>.
295cdf0e10cSrcweir
296cdf0e10cSrcweir        @returns
297cdf0e10cSrcweir            The position where the previous script type starts.
298cdf0e10cSrcweir     */
299cdf0e10cSrcweir    long previousScript( [in] string aText, [in] long nStartPos,
300cdf0e10cSrcweir                      [in] short nScriptType );
301cdf0e10cSrcweir
302cdf0e10cSrcweir    //------------------------------------------------------------------------
303cdf0e10cSrcweir    /** Get the script type of the character at position <em>nPos</em>.
304cdf0e10cSrcweir
305cdf0e10cSrcweir        @returns
306cdf0e10cSrcweir            One of <type>ScriptType</type>.
307cdf0e10cSrcweir     */
308cdf0e10cSrcweir    short   getScriptType( [in] string aText, [in] long nPos);
309cdf0e10cSrcweir
310cdf0e10cSrcweir    //------------------------------------------------------------------------
311cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the beginning of
312cdf0e10cSrcweir        the specified character type.
313cdf0e10cSrcweir
314cdf0e10cSrcweir        @param nCharType
315cdf0e10cSrcweir            One of <type>CharType</type>
316cdf0e10cSrcweir
317cdf0e10cSrcweir        @returns
318cdf0e10cSrcweir            The position where the character type starts
319cdf0e10cSrcweir     */
320cdf0e10cSrcweir    long beginOfCharBlock( [in] string aText, [in] long nStartPos,
321cdf0e10cSrcweir                       [in] ::com::sun::star::lang::Locale aLocale,
322cdf0e10cSrcweir                       [in] short nCharType );
323cdf0e10cSrcweir
324cdf0e10cSrcweir    //------------------------------------------------------------------------
325cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the end of the
326cdf0e10cSrcweir        specified character type.
327cdf0e10cSrcweir
328cdf0e10cSrcweir        @param nCharType
329cdf0e10cSrcweir            One of <type>CharType</type>
330cdf0e10cSrcweir
331cdf0e10cSrcweir        @returns
332cdf0e10cSrcweir            The position where the character type ends.
333cdf0e10cSrcweir     */
334cdf0e10cSrcweir    long endOfCharBlock( [in] string aText, [in] long nStartPos,
335cdf0e10cSrcweir                     [in] ::com::sun::star::lang::Locale aLocale,
336cdf0e10cSrcweir                     [in] short nCharType );
337cdf0e10cSrcweir
338cdf0e10cSrcweir    //------------------------------------------------------------------------
339cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the next start of
340cdf0e10cSrcweir        the specified character type.
341cdf0e10cSrcweir
342cdf0e10cSrcweir        @param nCharType
343cdf0e10cSrcweir            One of <type>CharType</type>
344cdf0e10cSrcweir
345cdf0e10cSrcweir        @returns
346cdf0e10cSrcweir            The position where the next character type starts.
347cdf0e10cSrcweir     */
348cdf0e10cSrcweir    long nextCharBlock( [in] string aText, [in] long nStartPos,
349cdf0e10cSrcweir                    [in] ::com::sun::star::lang::Locale aLocale,
350cdf0e10cSrcweir                    [in] short nCharType );
351cdf0e10cSrcweir
352cdf0e10cSrcweir    //------------------------------------------------------------------------
353cdf0e10cSrcweir    /** Traverses in Text from <em>nStartPos</em> to the previous start
354cdf0e10cSrcweir        of the specified character type.
355cdf0e10cSrcweir
356cdf0e10cSrcweir        @param nCharType
357cdf0e10cSrcweir            One of <type>CharType</type>
358cdf0e10cSrcweir
359cdf0e10cSrcweir        @returns
360cdf0e10cSrcweir            The position where the previous character type starts.
361cdf0e10cSrcweir     */
362cdf0e10cSrcweir    long previousCharBlock ( [in] string aText, [in] long nStartPos,
363cdf0e10cSrcweir                     [in] ::com::sun::star::lang::Locale aLocale,
364cdf0e10cSrcweir                     [in] short nCharType );
365cdf0e10cSrcweir};
366cdf0e10cSrcweir
367cdf0e10cSrcweir//============================================================================
368cdf0e10cSrcweir}; }; }; };
369cdf0e10cSrcweir
370cdf0e10cSrcweir#endif
371