1*d1766043SAndrew Rist/************************************************************** 2cdf0e10cSrcweir * 3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*d1766043SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*d1766043SAndrew Rist * distributed with this work for additional information 6*d1766043SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance 9*d1766043SAndrew Rist * with the License. You may obtain a copy of the License at 10*d1766043SAndrew Rist * 11*d1766043SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*d1766043SAndrew Rist * 13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*d1766043SAndrew Rist * software distributed under the License is distributed on an 15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*d1766043SAndrew Rist * KIND, either express or implied. See the License for the 17*d1766043SAndrew Rist * specific language governing permissions and limitations 18*d1766043SAndrew Rist * under the License. 19*d1766043SAndrew Rist * 20*d1766043SAndrew Rist *************************************************************/ 21*d1766043SAndrew Rist 22*d1766043SAndrew Rist 23cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XBreakIterator_idl__ 24cdf0e10cSrcweir#define __com_sun_star_i18n_XBreakIterator_idl__ 25cdf0e10cSrcweir 26cdf0e10cSrcweir#ifndef __com_sun_star_lang_Locale_idl__ 27cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 28cdf0e10cSrcweir#endif 29cdf0e10cSrcweir 30cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakUserOptions_idl__ 31cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakUserOptions.idl> 32cdf0e10cSrcweir#endif 33cdf0e10cSrcweir 34cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakHyphenationOptions_idl__ 35cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakHyphenationOptions.idl> 36cdf0e10cSrcweir#endif 37cdf0e10cSrcweir 38cdf0e10cSrcweir#ifndef __com_sun_star_i18n_LineBreakResults_idl__ 39cdf0e10cSrcweir#include <com/sun/star/i18n/LineBreakResults.idl> 40cdf0e10cSrcweir#endif 41cdf0e10cSrcweir 42cdf0e10cSrcweir#ifndef __com_sun_star_i18n_Boundary_idl__ 43cdf0e10cSrcweir#include <com/sun/star/i18n/Boundary.idl> 44cdf0e10cSrcweir#endif 45cdf0e10cSrcweir 46cdf0e10cSrcweir//============================================================================ 47cdf0e10cSrcweir 48cdf0e10cSrcweirmodule com { module sun { module star { module i18n { 49cdf0e10cSrcweir 50cdf0e10cSrcweir//============================================================================ 51cdf0e10cSrcweir 52cdf0e10cSrcweir/** 53cdf0e10cSrcweir contains the base routines for iteration in Unicode string. Iterates over 54cdf0e10cSrcweir characters, words, sentences and line breaks. 55cdf0e10cSrcweir 56cdf0e10cSrcweir <p> Assumption: StartPos is inclusive and EndPos is exclusive. </p> 57cdf0e10cSrcweir */ 58cdf0e10cSrcweir 59cdf0e10cSrcweirpublished interface XBreakIterator: com::sun::star::uno::XInterface 60cdf0e10cSrcweir{ 61cdf0e10cSrcweir //------------------------------------------------------------------------ 62cdf0e10cSrcweir /** Traverses specified number of characters/cells in Text from 63cdf0e10cSrcweir <em>nStartPos</em> forwards. 64cdf0e10cSrcweir <type>CharacterIteratorMode</type> can be cell based or 65cdf0e10cSrcweir character based. A cell is made of more than one character. 66cdf0e10cSrcweir 67cdf0e10cSrcweir @param nCount 68cdf0e10cSrcweir Number of characters to traverse, it should not be less than 0. 69cdf0e10cSrcweir If you want to traverse in the opposite direction use 70cdf0e10cSrcweir <member>XBreakIterator::previousCharacters()</member> instead. 71cdf0e10cSrcweir */ 72cdf0e10cSrcweir long nextCharacters( [in] string aText, [in] long nStartPos, 73cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 74cdf0e10cSrcweir [in] short nCharacterIteratorMode, 75cdf0e10cSrcweir [in] long nCount, [out] long nDone ); 76cdf0e10cSrcweir 77cdf0e10cSrcweir //------------------------------------------------------------------------ 78cdf0e10cSrcweir /** Traverses specified number of characters/cells in Text from 79cdf0e10cSrcweir <em>nStartPos</em> backwards. 80cdf0e10cSrcweir <type>CharacterIteratorMode</type> can be cell based or 81cdf0e10cSrcweir character based. A cell is made of more than one character. 82cdf0e10cSrcweir 83cdf0e10cSrcweir @param nCount 84cdf0e10cSrcweir Number of characters to traverse, it should not be less than 0. 85cdf0e10cSrcweir If you want to traverse in the opposite direction use 86cdf0e10cSrcweir <member>XBreakIterator::nextCharacters()</member> instead. 87cdf0e10cSrcweir */ 88cdf0e10cSrcweir long previousCharacters( [in] string aText, [in] long nStartPos, 89cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 90cdf0e10cSrcweir [in] short nCharacterIteratorMode, 91cdf0e10cSrcweir [in] long nCount, [out] long nDone ); 92cdf0e10cSrcweir 93cdf0e10cSrcweir //------------------------------------------------------------------------ 94cdf0e10cSrcweir /** Traverses one word in Text from <em>nStartPos</em> forwards. 95cdf0e10cSrcweir 96cdf0e10cSrcweir @param nWordType 97cdf0e10cSrcweir One of <type>WordType</type>, specifies the type of 98cdf0e10cSrcweir travelling. 99cdf0e10cSrcweir 100cdf0e10cSrcweir @returns 101cdf0e10cSrcweir The <type>Boundary</type> of the found word. Normally used for 102cdf0e10cSrcweir CTRL-Right. 103cdf0e10cSrcweir */ 104cdf0e10cSrcweir Boundary nextWord( [in] string aText, [in] long nStartPos, 105cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 106cdf0e10cSrcweir [in] short nWordType); 107cdf0e10cSrcweir 108cdf0e10cSrcweir //------------------------------------------------------------------------ 109cdf0e10cSrcweir /** Traverses one word in Text from <em>nStartPos</em> backwards. 110cdf0e10cSrcweir 111cdf0e10cSrcweir @param aLocale 112cdf0e10cSrcweir The locale of the character preceding <em>nStartPos</em>. 113cdf0e10cSrcweir 114cdf0e10cSrcweir <p> If the previous character is a space character and 115cdf0e10cSrcweir <em>nWordType</em> indicates spaces should be skipped, and 116cdf0e10cSrcweir if the first non-space character is an Asian character, 117cdf0e10cSrcweir then, since Asian word break needs language specific 118cdf0e10cSrcweir wordbreak dictionaries, the method will return -1 in 119cdf0e10cSrcweir <member>Boundary::endPos</member> and the position after the 120cdf0e10cSrcweir Asian character (i.e. the space character) in 121cdf0e10cSrcweir <member>Boundary::startPos</member>. The caller then has to 122cdf0e10cSrcweir call this method again with a correct <em>aLocale</em> 123cdf0e10cSrcweir referring to the Asian character, which is then the previous 124cdf0e10cSrcweir character of the space character where <em>nStartPos</em> 125cdf0e10cSrcweir points to. </p> 126cdf0e10cSrcweir 127cdf0e10cSrcweir <p> <b>Note</b> that the OpenOffice.org 1.0 / StarOffice 6.0 128cdf0e10cSrcweir / StarSuite 6.0 i18n framework doesn't behave like this and 129cdf0e10cSrcweir mixed Western/CJK text may lead to wrong word iteration. 130cdf0e10cSrcweir This is fixed in later versions. </p> 131cdf0e10cSrcweir 132cdf0e10cSrcweir @param nWordType 133cdf0e10cSrcweir One of <type>WordType</type>, specifies the type of 134cdf0e10cSrcweir travelling. 135cdf0e10cSrcweir 136cdf0e10cSrcweir @returns 137cdf0e10cSrcweir The <type>Boundary</type> of the found word. Normally used for 138cdf0e10cSrcweir CTRL-Left. 139cdf0e10cSrcweir */ 140cdf0e10cSrcweir Boundary previousWord( [in] string aText, [in] long nStartPos, 141cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 142cdf0e10cSrcweir [in] short nWordType); 143cdf0e10cSrcweir 144cdf0e10cSrcweir //------------------------------------------------------------------------ 145cdf0e10cSrcweir /** Identifies StartPos and EndPos of current word. 146cdf0e10cSrcweir 147cdf0e10cSrcweir <p> If <em>nPos</em> is the boundary of a word, it is StartPos 148cdf0e10cSrcweir of one word and EndPos of previous word. In this situation, the 149cdf0e10cSrcweir outcome of the algorithm can be indeterminate. In this situation 150cdf0e10cSrcweir the <em>bPreferForward</em> flag is used. If bPreferForward == 151cdf0e10cSrcweir <FALSE/>, <em>nPos</em> is considered to be the end of the word 152cdf0e10cSrcweir and we look backwards for beginning of word, otherwise 153cdf0e10cSrcweir <em>nPos</em> is considered to be the start of the next word and 154cdf0e10cSrcweir we look forwards for the end of the word. </p> 155cdf0e10cSrcweir 156cdf0e10cSrcweir @param nWordType 157cdf0e10cSrcweir One of <type>WordType</type>. 158cdf0e10cSrcweir 159cdf0e10cSrcweir @returns 160cdf0e10cSrcweir The Boundary of the current word. 161cdf0e10cSrcweir */ 162cdf0e10cSrcweir Boundary getWordBoundary( [in] string aText, [in] long nPos, 163cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 164cdf0e10cSrcweir [in] short nWordType, 165cdf0e10cSrcweir [in] boolean bPreferForward ); 166cdf0e10cSrcweir 167cdf0e10cSrcweir //------------------------------------------------------------------------ 168cdf0e10cSrcweir /** @deprecated 169cdf0e10cSrcweir Get the <type>WordType</type> of the word that starts at 170cdf0e10cSrcweir position <em>nPos</em>. 171cdf0e10cSrcweir 172cdf0e10cSrcweir <p> This method is mis-defined, since <type>WordType</type> 173cdf0e10cSrcweir is not an attribute of a word, but a way to break words, 174cdf0e10cSrcweir like excluding or including tail spaces for spellchecker 175cdf0e10cSrcweir or cursor traveling. It returns 0 always. 176cdf0e10cSrcweir </p> 177cdf0e10cSrcweir */ 178cdf0e10cSrcweir short getWordType( [in] string aText, [in] long nPos, 179cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale); 180cdf0e10cSrcweir 181cdf0e10cSrcweir //------------------------------------------------------------------------ 182cdf0e10cSrcweir /** If a word starts at position <em>nPos</em>. 183cdf0e10cSrcweir 184cdf0e10cSrcweir <p> It is possible that both of this method 185cdf0e10cSrcweir and following method <em>isEndWord</em> all return 186cdf0e10cSrcweir <TRUE/>, since StartPos of a word is inclusive 187cdf0e10cSrcweir while EndPos of a word is exclusive. 188cdf0e10cSrcweir </p> 189cdf0e10cSrcweir 190cdf0e10cSrcweir */ 191cdf0e10cSrcweir boolean isBeginWord( [in] string aText, [in] long nPos, 192cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 193cdf0e10cSrcweir [in] short nWordType); 194cdf0e10cSrcweir 195cdf0e10cSrcweir //------------------------------------------------------------------------ 196cdf0e10cSrcweir /** If a word ends at position <em>nPos</em>. 197cdf0e10cSrcweir */ 198cdf0e10cSrcweir boolean isEndWord( [in] string aText, [in] long nPos, 199cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 200cdf0e10cSrcweir [in] short nWordType); 201cdf0e10cSrcweir 202cdf0e10cSrcweir //------------------------------------------------------------------------ 203cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the start of a 204cdf0e10cSrcweir sentence. 205cdf0e10cSrcweir 206cdf0e10cSrcweir @returns 207cdf0e10cSrcweir The position where the sentence starts. 208cdf0e10cSrcweir */ 209cdf0e10cSrcweir long beginOfSentence( [in] string aText, [in] long nStartPos, 210cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 211cdf0e10cSrcweir 212cdf0e10cSrcweir //------------------------------------------------------------------------ 213cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the end of a 214cdf0e10cSrcweir sentence. 215cdf0e10cSrcweir 216cdf0e10cSrcweir @returns 217cdf0e10cSrcweir The position where the sentence ends. 218cdf0e10cSrcweir */ 219cdf0e10cSrcweir long endOfSentence( [in] string aText, [in] long nStartPos, 220cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 221cdf0e10cSrcweir 222cdf0e10cSrcweir //------------------------------------------------------------------------ 223cdf0e10cSrcweir /** Calculate the line break position in the Text from the specified 224cdf0e10cSrcweir <em>nStartPos</em>. 225cdf0e10cSrcweir 226cdf0e10cSrcweir @param nMinBreakPos 227cdf0e10cSrcweir Defines a minimum break position for hyphenated line break. 228cdf0e10cSrcweir When the position for hyphenated line break is less than 229cdf0e10cSrcweir <em>nMinBreakPos</em>, break position in 230cdf0e10cSrcweir <type>LineBreakResults</type> is set to -1. 231cdf0e10cSrcweir 232cdf0e10cSrcweir @param aHyphOptions 233cdf0e10cSrcweir Defines if the hyphenator is to be used. 234cdf0e10cSrcweir 235cdf0e10cSrcweir @param aUserOptions 236cdf0e10cSrcweir Defines how to handle hanging punctuations and forbidden 237cdf0e10cSrcweir characters at the start/end of a line. 238cdf0e10cSrcweir 239cdf0e10cSrcweir @returns 240cdf0e10cSrcweir The <type>LineBreakResults</type> contain the break 241cdf0e10cSrcweir position of the line, <type>BreakType</type> and 242cdf0e10cSrcweir <type scope="com::sun::star::linguistic2">XHyphenatedWord</type> 243cdf0e10cSrcweir */ 244cdf0e10cSrcweir LineBreakResults getLineBreak( [in] string aText, [in] long nStartPos, 245cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 246cdf0e10cSrcweir [in] long nMinBreakPos, 247cdf0e10cSrcweir [in] LineBreakHyphenationOptions aHyphOptions, 248cdf0e10cSrcweir [in] LineBreakUserOptions aUserOptions ); 249cdf0e10cSrcweir 250cdf0e10cSrcweir //------------------------------------------------------------------------ 251cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the beginning of 252cdf0e10cSrcweir the specified script type. 253cdf0e10cSrcweir 254cdf0e10cSrcweir @param nScriptType 255cdf0e10cSrcweir One of <type>ScriptType</type>. 256cdf0e10cSrcweir 257cdf0e10cSrcweir @returns 258cdf0e10cSrcweir The position where the script type starts. 259cdf0e10cSrcweir */ 260cdf0e10cSrcweir long beginOfScript( [in] string aText, [in] long nStartPos, 261cdf0e10cSrcweir [in] short nScriptType ); 262cdf0e10cSrcweir 263cdf0e10cSrcweir //------------------------------------------------------------------------ 264cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the end of the 265cdf0e10cSrcweir specified script type. 266cdf0e10cSrcweir 267cdf0e10cSrcweir @param nScriptType 268cdf0e10cSrcweir One of <type>ScriptType</type>. 269cdf0e10cSrcweir 270cdf0e10cSrcweir @returns 271cdf0e10cSrcweir The position where the script type ends. 272cdf0e10cSrcweir */ 273cdf0e10cSrcweir long endOfScript( [in] string aText, [in] long nStartPos, 274cdf0e10cSrcweir [in] short nScriptType ); 275cdf0e10cSrcweir 276cdf0e10cSrcweir //------------------------------------------------------------------------ 277cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the next start of 278cdf0e10cSrcweir the specified script type. 279cdf0e10cSrcweir 280cdf0e10cSrcweir @param nScriptType 281cdf0e10cSrcweir One of <type>ScriptType</type>. 282cdf0e10cSrcweir 283cdf0e10cSrcweir @returns 284cdf0e10cSrcweir The position where the next script type starts. 285cdf0e10cSrcweir */ 286cdf0e10cSrcweir long nextScript( [in] string aText, [in] long nStartPos, 287cdf0e10cSrcweir [in] short nScriptType ); 288cdf0e10cSrcweir 289cdf0e10cSrcweir //------------------------------------------------------------------------ 290cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the previous start 291cdf0e10cSrcweir of the specified script type. 292cdf0e10cSrcweir 293cdf0e10cSrcweir @param nScriptType 294cdf0e10cSrcweir One of <type>ScriptType</type>. 295cdf0e10cSrcweir 296cdf0e10cSrcweir @returns 297cdf0e10cSrcweir The position where the previous script type starts. 298cdf0e10cSrcweir */ 299cdf0e10cSrcweir long previousScript( [in] string aText, [in] long nStartPos, 300cdf0e10cSrcweir [in] short nScriptType ); 301cdf0e10cSrcweir 302cdf0e10cSrcweir //------------------------------------------------------------------------ 303cdf0e10cSrcweir /** Get the script type of the character at position <em>nPos</em>. 304cdf0e10cSrcweir 305cdf0e10cSrcweir @returns 306cdf0e10cSrcweir One of <type>ScriptType</type>. 307cdf0e10cSrcweir */ 308cdf0e10cSrcweir short getScriptType( [in] string aText, [in] long nPos); 309cdf0e10cSrcweir 310cdf0e10cSrcweir //------------------------------------------------------------------------ 311cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the beginning of 312cdf0e10cSrcweir the specified character type. 313cdf0e10cSrcweir 314cdf0e10cSrcweir @param nCharType 315cdf0e10cSrcweir One of <type>CharType</type> 316cdf0e10cSrcweir 317cdf0e10cSrcweir @returns 318cdf0e10cSrcweir The position where the character type starts 319cdf0e10cSrcweir */ 320cdf0e10cSrcweir long beginOfCharBlock( [in] string aText, [in] long nStartPos, 321cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 322cdf0e10cSrcweir [in] short nCharType ); 323cdf0e10cSrcweir 324cdf0e10cSrcweir //------------------------------------------------------------------------ 325cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the end of the 326cdf0e10cSrcweir specified character type. 327cdf0e10cSrcweir 328cdf0e10cSrcweir @param nCharType 329cdf0e10cSrcweir One of <type>CharType</type> 330cdf0e10cSrcweir 331cdf0e10cSrcweir @returns 332cdf0e10cSrcweir The position where the character type ends. 333cdf0e10cSrcweir */ 334cdf0e10cSrcweir long endOfCharBlock( [in] string aText, [in] long nStartPos, 335cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 336cdf0e10cSrcweir [in] short nCharType ); 337cdf0e10cSrcweir 338cdf0e10cSrcweir //------------------------------------------------------------------------ 339cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the next start of 340cdf0e10cSrcweir the specified character type. 341cdf0e10cSrcweir 342cdf0e10cSrcweir @param nCharType 343cdf0e10cSrcweir One of <type>CharType</type> 344cdf0e10cSrcweir 345cdf0e10cSrcweir @returns 346cdf0e10cSrcweir The position where the next character type starts. 347cdf0e10cSrcweir */ 348cdf0e10cSrcweir long nextCharBlock( [in] string aText, [in] long nStartPos, 349cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 350cdf0e10cSrcweir [in] short nCharType ); 351cdf0e10cSrcweir 352cdf0e10cSrcweir //------------------------------------------------------------------------ 353cdf0e10cSrcweir /** Traverses in Text from <em>nStartPos</em> to the previous start 354cdf0e10cSrcweir of the specified character type. 355cdf0e10cSrcweir 356cdf0e10cSrcweir @param nCharType 357cdf0e10cSrcweir One of <type>CharType</type> 358cdf0e10cSrcweir 359cdf0e10cSrcweir @returns 360cdf0e10cSrcweir The position where the previous character type starts. 361cdf0e10cSrcweir */ 362cdf0e10cSrcweir long previousCharBlock ( [in] string aText, [in] long nStartPos, 363cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 364cdf0e10cSrcweir [in] short nCharType ); 365cdf0e10cSrcweir}; 366cdf0e10cSrcweir 367cdf0e10cSrcweir//============================================================================ 368cdf0e10cSrcweir}; }; }; }; 369cdf0e10cSrcweir 370cdf0e10cSrcweir#endif 371