1d1766043SAndrew Rist/************************************************************** 2*90ed883bSmseidel * 3d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4d1766043SAndrew Rist * or more contributor license agreements. See the NOTICE file 5d1766043SAndrew Rist * distributed with this work for additional information 6d1766043SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the 8d1766043SAndrew Rist * "License"); you may not use this file except in compliance 9d1766043SAndrew Rist * with the License. You may obtain a copy of the License at 10*90ed883bSmseidel * 11d1766043SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*90ed883bSmseidel * 13d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing, 14d1766043SAndrew Rist * software distributed under the License is distributed on an 15d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16d1766043SAndrew Rist * KIND, either express or implied. See the License for the 17d1766043SAndrew Rist * specific language governing permissions and limitations 18d1766043SAndrew Rist * under the License. 19*90ed883bSmseidel * 20d1766043SAndrew Rist *************************************************************/ 21d1766043SAndrew Rist 22d1766043SAndrew Rist 23*90ed883bSmseidel 24cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XTransliteration_idl__ 25cdf0e10cSrcweir#define __com_sun_star_i18n_XTransliteration_idl__ 26cdf0e10cSrcweir 27cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl> 28cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl> 29cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModules.idl> 30cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModulesNew.idl> 31cdf0e10cSrcweir 32cdf0e10cSrcweir//============================================================================= 33cdf0e10cSrcweir 34cdf0e10cSrcweirmodule com { module sun { module star { module i18n { 35cdf0e10cSrcweir 36cdf0e10cSrcweir//============================================================================= 37cdf0e10cSrcweir 38cdf0e10cSrcweir/** 39cdf0e10cSrcweir Character conversions like case folding or Hiragana to Katakana. 40cdf0e10cSrcweir 41cdf0e10cSrcweir <p> Transliteration is a character to character conversion but it is 42cdf0e10cSrcweir not always a one to one mapping between characters. Transliteration 43cdf0e10cSrcweir modules are primarily used by collation, and search and replace 44cdf0e10cSrcweir modules to perform approximate search. It can also be used to format 45cdf0e10cSrcweir the numbers in different numbering systems. <p/> 46cdf0e10cSrcweir 47cdf0e10cSrcweir <p> In order to select transliteration modules for different 48cdf0e10cSrcweir purposes, they are classified with attributes of 49cdf0e10cSrcweir <type>TransliterationType</type>. <p/> 50cdf0e10cSrcweir 51cdf0e10cSrcweir <p> For Western languages there would be three transliteration 52cdf0e10cSrcweir modules available to compare two mixed case strings: upper to lower, 53cdf0e10cSrcweir lower to upper, and ignore case. </p> 54cdf0e10cSrcweir 55cdf0e10cSrcweir <p> A typical calling sequence of transliteration is 56cdf0e10cSrcweir <ol> 57cdf0e10cSrcweir <li> getAvailableModules() </li> 58cdf0e10cSrcweir <li> loadModulesByImplNames() </li> 59cdf0e10cSrcweir <li> equals() </li> 60cdf0e10cSrcweir </ol> 61cdf0e10cSrcweir or another one is 62cdf0e10cSrcweir <ol> 63cdf0e10cSrcweir <li> loadModule() </li> 64cdf0e10cSrcweir <li> transliterate() </li> 65cdf0e10cSrcweir </ol> 66cdf0e10cSrcweir </p> 67cdf0e10cSrcweir 68cdf0e10cSrcweir*/ 69cdf0e10cSrcweir 70cdf0e10cSrcweir/* comment: 71cdf0e10cSrcweir * 0. 72cdf0e10cSrcweir * All the IGNORE-type functionalities (Range, equals) are based on mapping. 73cdf0e10cSrcweir * except equals() method in IGNORE_CASE, which is based on Locale-independent 74cdf0e10cSrcweir * casefolding 75*90ed883bSmseidel * (This second assumption is very complicated and may cause confusion of use) 76cdf0e10cSrcweir * 77cdf0e10cSrcweir * 1. 78cdf0e10cSrcweir * We are assuming Upper to Lower mapping as one of transliteration. 79cdf0e10cSrcweir * The mapping depends on Locale. 80cdf0e10cSrcweir * Upper <-> Lower methods are just wrappers to provide equals() and Range() 81cdf0e10cSrcweir * 82cdf0e10cSrcweir * 2. 83cdf0e10cSrcweir * equals() in IGNORE_CASE module is locale-independent and 84cdf0e10cSrcweir * we don't provide locale-sensitive ones. 85cdf0e10cSrcweir * The reason we provided locale-independent ones is that IGNORE_CASE is mainly 86cdf0e10cSrcweir * dedicated to StarOffice internal code. 87cdf0e10cSrcweir * 88cdf0e10cSrcweir * 3. 89cdf0e10cSrcweir * TransliterationModules is used just for convenience without calling 90cdf0e10cSrcweir * getAvailableModule. 91cdf0e10cSrcweir * 92cdf0e10cSrcweir * 4. 93cdf0e10cSrcweir * Implementation name in the methods below is not the same as 94*90ed883bSmseidel * the true implementation name registered. 95cdf0e10cSrcweir * In particular, for generic modules:"UPPERCASE_LOWERCASE", 96cdf0e10cSrcweir * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name. 97cdf0e10cSrcweir */ 98cdf0e10cSrcweir 99cdf0e10cSrcweir 100cdf0e10cSrcweirpublished interface XTransliteration: com::sun::star::uno::XInterface 101cdf0e10cSrcweir{ 102cdf0e10cSrcweir 103cdf0e10cSrcweir //------------------------------------------------------------------------ 104cdf0e10cSrcweir /** Unique ASCII name to identify a module. This name is used 105cdf0e10cSrcweir to get its localized name for menus, dialogs etc. The behavior 106cdf0e10cSrcweir is undefined for <const>TransliterationType::CASCADE</const> 107cdf0e10cSrcweir modules. 108cdf0e10cSrcweir */ 109cdf0e10cSrcweir string getName(); 110cdf0e10cSrcweir 111cdf0e10cSrcweir //------------------------------------------------------------------------ 112cdf0e10cSrcweir /** Return the attribute(s) associated with this transliterator 113cdf0e10cSrcweir object, as defined in <type>TransliterationType</type>. The 114cdf0e10cSrcweir value is determined by the transliteration modules. For example, 115cdf0e10cSrcweir for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for 116cdf0e10cSrcweir IGNORE_CASE, IGNORE is returned. 117cdf0e10cSrcweir */ 118cdf0e10cSrcweir short getType(); 119cdf0e10cSrcweir 120cdf0e10cSrcweir //------------------------------------------------------------------------ 121cdf0e10cSrcweir /** Load instance of predefined module - old style method. 122cdf0e10cSrcweir */ 123cdf0e10cSrcweir void loadModule( [in] TransliterationModules eModType, 124cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 125cdf0e10cSrcweir 126cdf0e10cSrcweir //------------------------------------------------------------------------ 127cdf0e10cSrcweir /** Load a sequence of instances of predefined modules - supersedes 128cdf0e10cSrcweir method <member>XTransliteration::loadModule()</member>. 129cdf0e10cSrcweir */ 130cdf0e10cSrcweir void loadModuleNew( [in] sequence <TransliterationModulesNew> aModType, 131cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 132cdf0e10cSrcweir 133cdf0e10cSrcweir //------------------------------------------------------------------------ 134cdf0e10cSrcweir /** Load instance of UNO registered module. 135cdf0e10cSrcweir 136cdf0e10cSrcweir <p> Each transliteration module is registered under a different 137cdf0e10cSrcweir service name. The convention for the service name is 138cdf0e10cSrcweir com.sun.star.i18n.Transliteration.l10n.{implName}. The 139cdf0e10cSrcweir {implName} is a unique name used to identify a module. The 140cdf0e10cSrcweir implName is used to get a localized name for the transliteration 141cdf0e10cSrcweir module. The implName is used in locale data to list the 142cdf0e10cSrcweir available transliteration modules for the locale. There are some 143cdf0e10cSrcweir transliteration modules that are always available. The names of 144cdf0e10cSrcweir those modules are listed as enum 145cdf0e10cSrcweir <type>TransliterationModules</type> names. For modules not 146cdf0e10cSrcweir listed there it is possible to load them directly by their 147cdf0e10cSrcweir implName. 148cdf0e10cSrcweir 149cdf0e10cSrcweir @param aImplName 150cdf0e10cSrcweir The module's {implName} under which it is registered with 151cdf0e10cSrcweir com.sun.star.i18n.Transliteration.l10n.{implName}. 152cdf0e10cSrcweir */ 153cdf0e10cSrcweir void loadModuleByImplName( [in] string aImplName, 154cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 155cdf0e10cSrcweir 156cdf0e10cSrcweir //------------------------------------------------------------------------ 157cdf0e10cSrcweir /** Load a sequence of instances of transliteration modules. 158cdf0e10cSrcweir Output of one module is feeded as input to the next module in 159cdf0e10cSrcweir the sequence. The object created by this call has 160cdf0e10cSrcweir <type>TransliterationType</type> CASCADE and IGNORE types. 161cdf0e10cSrcweir 162cdf0e10cSrcweir @param aImplNameList 163cdf0e10cSrcweir Only IGNORE type modules can be specified. 164cdf0e10cSrcweir */ 165cdf0e10cSrcweir void loadModulesByImplNames( [in] sequence <string> aImplNameList, 166cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale ); 167cdf0e10cSrcweir 168cdf0e10cSrcweir //------------------------------------------------------------------------ 169cdf0e10cSrcweir /** List the available transliteration modules for a given locale. 170*90ed883bSmseidel It can be filtered based on its type. 171cdf0e10cSrcweir 172cdf0e10cSrcweir @param nType 173cdf0e10cSrcweir A bitmask field of values defined in 174cdf0e10cSrcweir <type>TransliterationType</type> 175cdf0e10cSrcweir */ 176cdf0e10cSrcweir sequence<string> getAvailableModules( 177cdf0e10cSrcweir [in] ::com::sun::star::lang::Locale aLocale, 178cdf0e10cSrcweir [in] short nType ); 179cdf0e10cSrcweir 180cdf0e10cSrcweir 181cdf0e10cSrcweir //------------------------------------------------------------------------ 182cdf0e10cSrcweir /** Transliterate a substring. This method can be called if the 183cdf0e10cSrcweir object doesn't have <type>TransliterationType</type> IGNORE 184cdf0e10cSrcweir attribute. 185cdf0e10cSrcweir 186cdf0e10cSrcweir @param aStr 187cdf0e10cSrcweir The input string. 188cdf0e10cSrcweir 189cdf0e10cSrcweir @param nStartPos 190cdf0e10cSrcweir Start position within aStr from where transliteration starts. 191*90ed883bSmseidel 192cdf0e10cSrcweir @param nCount 193cdf0e10cSrcweir Number of codepoints to be transliterated. 194cdf0e10cSrcweir 195cdf0e10cSrcweir @param rOffset 196cdf0e10cSrcweir To find the grapheme of input string corresponding to the 197cdf0e10cSrcweir grapheme of output string, rOffset provides the offset array 198cdf0e10cSrcweir whose index is the offset of output string, the element 199cdf0e10cSrcweir containing the position within the input string before 200cdf0e10cSrcweir transliteration. 201cdf0e10cSrcweir */ 202cdf0e10cSrcweir string transliterate( [in] string aInStr, [in] long nStartPos, 203cdf0e10cSrcweir [in] long nCount, [out] sequence <long> rOffset ); 204cdf0e10cSrcweir 205cdf0e10cSrcweir //------------------------------------------------------------------------ 206cdf0e10cSrcweir /** @deprecated 207cdf0e10cSrcweir For internal use, this method is supported to get the 208cdf0e10cSrcweir "transliteration", which equals() is based on. 209cdf0e10cSrcweir */ 210cdf0e10cSrcweir string folding( [in] string aInStr, [in] long nStartPos, 211cdf0e10cSrcweir [in] long nCount, [out] sequence <long> rOffset ); 212cdf0e10cSrcweir 213cdf0e10cSrcweir //------------------------------------------------------------------------ 214cdf0e10cSrcweir /** Match two substrings and find if they are equivalent as per this 215cdf0e10cSrcweir transliteration. 216*90ed883bSmseidel 217cdf0e10cSrcweir <p> This method can be called if the object has 218cdf0e10cSrcweir <type>TransliterationType</type> IGNORE attribute. </p> 219cdf0e10cSrcweir 220cdf0e10cSrcweir <p> Returns the number of matched code points in any case, even if 221cdf0e10cSrcweir strings are not equal, for example: <br/> 222cdf0e10cSrcweir equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 223cdf0e10cSrcweir returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/> 224cdf0e10cSrcweir equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/> 225cdf0e10cSrcweir returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p> 226cdf0e10cSrcweir 227cdf0e10cSrcweir @param aStr1 228cdf0e10cSrcweir First string to match. 229cdf0e10cSrcweir 230cdf0e10cSrcweir @param nPos1 231cdf0e10cSrcweir Start position within aStr1. 232cdf0e10cSrcweir 233cdf0e10cSrcweir @param nCount1 234cdf0e10cSrcweir Number of code points to use of aStr1. 235cdf0e10cSrcweir 236cdf0e10cSrcweir @param rMatch1 237cdf0e10cSrcweir Returns number of matched code points in aStr1. 238cdf0e10cSrcweir 239cdf0e10cSrcweir @param aStr2 240cdf0e10cSrcweir Second string to match. 241cdf0e10cSrcweir 242cdf0e10cSrcweir @param nPos2 243cdf0e10cSrcweir Start position within aStr2. 244cdf0e10cSrcweir 245cdf0e10cSrcweir @param nCount2 246cdf0e10cSrcweir Number of code points to use of aStr2. 247cdf0e10cSrcweir 248cdf0e10cSrcweir @param rMatch2 249cdf0e10cSrcweir Returns number of matched code points in aStr2. 250cdf0e10cSrcweir 251cdf0e10cSrcweir @returns 252cdf0e10cSrcweir <TRUE/> if the substrings are equal per this 253cdf0e10cSrcweir transliteration <br/> 254cdf0e10cSrcweir <FALSE/> else. 255cdf0e10cSrcweir */ 256cdf0e10cSrcweir 257*90ed883bSmseidel boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1, 258cdf0e10cSrcweir [out] long rMatch1, 259cdf0e10cSrcweir [in] string aStr2, [in] long nPos2, [in] long nCount2, 260cdf0e10cSrcweir [out] long rMatch2 ); 261cdf0e10cSrcweir 262cdf0e10cSrcweir //------------------------------------------------------------------------ 263cdf0e10cSrcweir /** Transliterate one set of characters to another. 264*90ed883bSmseidel 265cdf0e10cSrcweir <p> This method is intended for getting corresponding ranges and 266cdf0e10cSrcweir can be called if the object has <type>TransliterationType</type> 267cdf0e10cSrcweir IGNORE attribute. </p> 268*90ed883bSmseidel 269cdf0e10cSrcweir <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" ) 270cdf0e10cSrcweir returns {"A","I","a","i"}, transliterateRange( "a", "a" ) 271cdf0e10cSrcweir returns {"A","A","a","a"}. </p> 272*90ed883bSmseidel 273*90ed883bSmseidel <p> Use this transliteration to create regular expressions like 274cdf0e10cSrcweir [a-i] --> [A-Ia-i]. </p> 275*90ed883bSmseidel 276cdf0e10cSrcweir @returns 277cdf0e10cSrcweir String sequence containing corresponding transliterated 278cdf0e10cSrcweir pairs of characters to represent a range. 279cdf0e10cSrcweir */ 280cdf0e10cSrcweir sequence <string> transliterateRange( [in] string aStr1, [in] string aStr2 ); 281cdf0e10cSrcweir 282cdf0e10cSrcweir //------------------------------------------------------------------------ 283*90ed883bSmseidel /** Compare 2 substrings as per this transliteration. It translates both 284cdf0e10cSrcweir substrings before comparing them. 285cdf0e10cSrcweir 286cdf0e10cSrcweir @param aStr1 287cdf0e10cSrcweir First string. 288cdf0e10cSrcweir 289cdf0e10cSrcweir @param nOff1 290cdf0e10cSrcweir Offset (from 0) of the first substring. 291cdf0e10cSrcweir 292cdf0e10cSrcweir @param nLen1 293cdf0e10cSrcweir Length (from offset) of the first substring. 294cdf0e10cSrcweir 295cdf0e10cSrcweir @param aStr2 296cdf0e10cSrcweir Second string. 297cdf0e10cSrcweir 298cdf0e10cSrcweir @param nOff2 299cdf0e10cSrcweir Offset (from 0) of the second substring. 300cdf0e10cSrcweir 301cdf0e10cSrcweir @param nLen2 302cdf0e10cSrcweir Length (from offset) of the second substring. 303cdf0e10cSrcweir 304cdf0e10cSrcweir @returns 305cdf0e10cSrcweir 1 if the first substring is greater than the second substring <br/> 306cdf0e10cSrcweir 0 if the first substring is equal to the second substring <br/> 307cdf0e10cSrcweir -1 if the first substring is less than the second substring 308cdf0e10cSrcweir */ 309cdf0e10cSrcweir long compareSubstring( [in] string aStr1, [in] long nOff1, [in] long nLen1, 310cdf0e10cSrcweir [in] string aStr2, [in] long nOff2, [in] long nLen2 ); 311cdf0e10cSrcweir 312cdf0e10cSrcweir //------------------------------------------------------------------------ 313cdf0e10cSrcweir /** Compare 2 strings as per this transliteration. It translates both 314cdf0e10cSrcweir strings before comparing them. 315cdf0e10cSrcweir 316cdf0e10cSrcweir @returns 317cdf0e10cSrcweir 1 if the first string is greater than the second string <br/> 318cdf0e10cSrcweir 0 if the first string is equal to the second string <br/> 319cdf0e10cSrcweir -1 if the first string is less than the second string 320cdf0e10cSrcweir */ 321cdf0e10cSrcweir long compareString( [in] string aStr1, [in] string aStr2 ); 322cdf0e10cSrcweir 323cdf0e10cSrcweir}; 324cdf0e10cSrcweir 325cdf0e10cSrcweir//============================================================================= 326*90ed883bSmseidel 327cdf0e10cSrcweir}; }; }; }; 328cdf0e10cSrcweir 329cdf0e10cSrcweir#endif 330