1d1766043SAndrew Rist/**************************************************************
2*90ed883bSmseidel *
3d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4d1766043SAndrew Rist * or more contributor license agreements.  See the NOTICE file
5d1766043SAndrew Rist * distributed with this work for additional information
6d1766043SAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the
8d1766043SAndrew Rist * "License"); you may not use this file except in compliance
9d1766043SAndrew Rist * with the License.  You may obtain a copy of the License at
10*90ed883bSmseidel *
11d1766043SAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12*90ed883bSmseidel *
13d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing,
14d1766043SAndrew Rist * software distributed under the License is distributed on an
15d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16d1766043SAndrew Rist * KIND, either express or implied.  See the License for the
17d1766043SAndrew Rist * specific language governing permissions and limitations
18d1766043SAndrew Rist * under the License.
19*90ed883bSmseidel *
20d1766043SAndrew Rist *************************************************************/
21d1766043SAndrew Rist
22d1766043SAndrew Rist
23*90ed883bSmseidel
24cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XTransliteration_idl__
25cdf0e10cSrcweir#define __com_sun_star_i18n_XTransliteration_idl__
26cdf0e10cSrcweir
27cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
28cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
29cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModules.idl>
30cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModulesNew.idl>
31cdf0e10cSrcweir
32cdf0e10cSrcweir//=============================================================================
33cdf0e10cSrcweir
34cdf0e10cSrcweirmodule com { module sun { module star { module i18n {
35cdf0e10cSrcweir
36cdf0e10cSrcweir//=============================================================================
37cdf0e10cSrcweir
38cdf0e10cSrcweir/**
39cdf0e10cSrcweir    Character conversions like case folding or Hiragana to Katakana.
40cdf0e10cSrcweir
41cdf0e10cSrcweir    <p> Transliteration is a character to character conversion but it is
42cdf0e10cSrcweir    not always a one to one mapping between characters. Transliteration
43cdf0e10cSrcweir    modules are primarily used by collation, and search and replace
44cdf0e10cSrcweir    modules to perform approximate search. It can also be used to format
45cdf0e10cSrcweir    the numbers in different numbering systems. <p/>
46cdf0e10cSrcweir
47cdf0e10cSrcweir    <p> In order to select transliteration modules for different
48cdf0e10cSrcweir    purposes, they are classified with attributes of
49cdf0e10cSrcweir    <type>TransliterationType</type>. <p/>
50cdf0e10cSrcweir
51cdf0e10cSrcweir    <p> For Western languages there would be three transliteration
52cdf0e10cSrcweir    modules available to compare two mixed case strings: upper to lower,
53cdf0e10cSrcweir    lower to upper, and ignore case. </p>
54cdf0e10cSrcweir
55cdf0e10cSrcweir    <p> A typical calling sequence of transliteration is
56cdf0e10cSrcweir        <ol>
57cdf0e10cSrcweir            <li> getAvailableModules() </li>
58cdf0e10cSrcweir            <li> loadModulesByImplNames() </li>
59cdf0e10cSrcweir            <li> equals() </li>
60cdf0e10cSrcweir        </ol>
61cdf0e10cSrcweir    or another one is
62cdf0e10cSrcweir        <ol>
63cdf0e10cSrcweir            <li> loadModule() </li>
64cdf0e10cSrcweir            <li> transliterate() </li>
65cdf0e10cSrcweir        </ol>
66cdf0e10cSrcweir    </p>
67cdf0e10cSrcweir
68cdf0e10cSrcweir*/
69cdf0e10cSrcweir
70cdf0e10cSrcweir/* comment:
71cdf0e10cSrcweir * 0.
72cdf0e10cSrcweir * All the IGNORE-type functionalities (Range, equals) are based on mapping.
73cdf0e10cSrcweir * except equals() method in IGNORE_CASE, which is based on Locale-independent
74cdf0e10cSrcweir * casefolding
75*90ed883bSmseidel * (This second assumption is very complicated and may cause confusion of use)
76cdf0e10cSrcweir *
77cdf0e10cSrcweir * 1.
78cdf0e10cSrcweir * We are assuming Upper to Lower mapping as one of transliteration.
79cdf0e10cSrcweir * The mapping depends on Locale.
80cdf0e10cSrcweir * Upper <-> Lower methods are just wrappers to provide equals() and Range()
81cdf0e10cSrcweir *
82cdf0e10cSrcweir * 2.
83cdf0e10cSrcweir * equals() in IGNORE_CASE module is locale-independent and
84cdf0e10cSrcweir * we don't provide locale-sensitive ones.
85cdf0e10cSrcweir * The reason we provided locale-independent ones is that IGNORE_CASE is mainly
86cdf0e10cSrcweir * dedicated to StarOffice internal code.
87cdf0e10cSrcweir *
88cdf0e10cSrcweir * 3.
89cdf0e10cSrcweir * TransliterationModules is used just for convenience without calling
90cdf0e10cSrcweir * getAvailableModule.
91cdf0e10cSrcweir *
92cdf0e10cSrcweir * 4.
93cdf0e10cSrcweir * Implementation name in the methods below is not the same as
94*90ed883bSmseidel * the true implementation name registered.
95cdf0e10cSrcweir * In particular, for generic modules:"UPPERCASE_LOWERCASE",
96cdf0e10cSrcweir * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name.
97cdf0e10cSrcweir */
98cdf0e10cSrcweir
99cdf0e10cSrcweir
100cdf0e10cSrcweirpublished interface XTransliteration: com::sun::star::uno::XInterface
101cdf0e10cSrcweir{
102cdf0e10cSrcweir
103cdf0e10cSrcweir    //------------------------------------------------------------------------
104cdf0e10cSrcweir    /** Unique ASCII name to identify a module. This name is used
105cdf0e10cSrcweir        to get its localized name for menus, dialogs etc. The behavior
106cdf0e10cSrcweir        is undefined for <const>TransliterationType::CASCADE</const>
107cdf0e10cSrcweir        modules.
108cdf0e10cSrcweir     */
109cdf0e10cSrcweir    string  getName();
110cdf0e10cSrcweir
111cdf0e10cSrcweir    //------------------------------------------------------------------------
112cdf0e10cSrcweir    /** Return the attribute(s) associated with this transliterator
113cdf0e10cSrcweir        object, as defined in <type>TransliterationType</type>. The
114cdf0e10cSrcweir        value is determined by the transliteration modules. For example,
115cdf0e10cSrcweir        for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for
116cdf0e10cSrcweir        IGNORE_CASE, IGNORE is returned.
117cdf0e10cSrcweir     */
118cdf0e10cSrcweir    short   getType();
119cdf0e10cSrcweir
120cdf0e10cSrcweir    //------------------------------------------------------------------------
121cdf0e10cSrcweir    /** Load instance of predefined module - old style method.
122cdf0e10cSrcweir     */
123cdf0e10cSrcweir    void    loadModule( [in] TransliterationModules eModType,
124cdf0e10cSrcweir                        [in] ::com::sun::star::lang::Locale aLocale );
125cdf0e10cSrcweir
126cdf0e10cSrcweir    //------------------------------------------------------------------------
127cdf0e10cSrcweir    /** Load a sequence of instances of predefined modules - supersedes
128cdf0e10cSrcweir        method <member>XTransliteration::loadModule()</member>.
129cdf0e10cSrcweir     */
130cdf0e10cSrcweir    void    loadModuleNew(  [in] sequence <TransliterationModulesNew> aModType,
131cdf0e10cSrcweir                            [in] ::com::sun::star::lang::Locale aLocale );
132cdf0e10cSrcweir
133cdf0e10cSrcweir    //------------------------------------------------------------------------
134cdf0e10cSrcweir    /** Load instance of UNO registered module.
135cdf0e10cSrcweir
136cdf0e10cSrcweir        <p> Each transliteration module is registered under a different
137cdf0e10cSrcweir        service name. The convention for the service name is
138cdf0e10cSrcweir        com.sun.star.i18n.Transliteration.l10n.{implName}. The
139cdf0e10cSrcweir        {implName} is a unique name used to identify a module. The
140cdf0e10cSrcweir        implName is used to get a localized name for the transliteration
141cdf0e10cSrcweir        module. The implName is used in locale data to list the
142cdf0e10cSrcweir        available transliteration modules for the locale. There are some
143cdf0e10cSrcweir        transliteration modules that are always available. The names of
144cdf0e10cSrcweir        those modules are listed as enum
145cdf0e10cSrcweir        <type>TransliterationModules</type> names. For modules not
146cdf0e10cSrcweir        listed there it is possible to load them directly by their
147cdf0e10cSrcweir        implName.
148cdf0e10cSrcweir
149cdf0e10cSrcweir        @param aImplName
150cdf0e10cSrcweir            The module's {implName} under which it is registered with
151cdf0e10cSrcweir            com.sun.star.i18n.Transliteration.l10n.{implName}.
152cdf0e10cSrcweir     */
153cdf0e10cSrcweir    void    loadModuleByImplName(   [in] string aImplName,
154cdf0e10cSrcweir                                    [in] ::com::sun::star::lang::Locale aLocale );
155cdf0e10cSrcweir
156cdf0e10cSrcweir    //------------------------------------------------------------------------
157cdf0e10cSrcweir    /** Load a sequence of instances of transliteration modules.
158cdf0e10cSrcweir        Output of one module is feeded as input to the next module in
159cdf0e10cSrcweir        the sequence. The object created by this call has
160cdf0e10cSrcweir        <type>TransliterationType</type> CASCADE and IGNORE types.
161cdf0e10cSrcweir
162cdf0e10cSrcweir        @param aImplNameList
163cdf0e10cSrcweir            Only IGNORE type modules can be specified.
164cdf0e10cSrcweir    */
165cdf0e10cSrcweir    void    loadModulesByImplNames( [in] sequence <string> aImplNameList,
166cdf0e10cSrcweir                                    [in] ::com::sun::star::lang::Locale aLocale );
167cdf0e10cSrcweir
168cdf0e10cSrcweir    //------------------------------------------------------------------------
169cdf0e10cSrcweir    /** List the available transliteration modules for a given locale.
170*90ed883bSmseidel        It can be filtered based on its type.
171cdf0e10cSrcweir
172cdf0e10cSrcweir        @param nType
173cdf0e10cSrcweir            A bitmask field of values defined in
174cdf0e10cSrcweir            <type>TransliterationType</type>
175cdf0e10cSrcweir    */
176cdf0e10cSrcweir    sequence<string>    getAvailableModules(
177cdf0e10cSrcweir                            [in] ::com::sun::star::lang::Locale aLocale,
178cdf0e10cSrcweir                            [in] short nType );
179cdf0e10cSrcweir
180cdf0e10cSrcweir
181cdf0e10cSrcweir    //------------------------------------------------------------------------
182cdf0e10cSrcweir    /** Transliterate a substring. This method can be called if the
183cdf0e10cSrcweir        object doesn't have <type>TransliterationType</type> IGNORE
184cdf0e10cSrcweir        attribute.
185cdf0e10cSrcweir
186cdf0e10cSrcweir        @param aStr
187cdf0e10cSrcweir            The input string.
188cdf0e10cSrcweir
189cdf0e10cSrcweir        @param nStartPos
190cdf0e10cSrcweir            Start position within aStr from where transliteration starts.
191*90ed883bSmseidel
192cdf0e10cSrcweir        @param nCount
193cdf0e10cSrcweir            Number of codepoints to be transliterated.
194cdf0e10cSrcweir
195cdf0e10cSrcweir        @param rOffset
196cdf0e10cSrcweir            To find the grapheme of input string corresponding to the
197cdf0e10cSrcweir            grapheme of output string, rOffset provides the offset array
198cdf0e10cSrcweir            whose index is the offset of output string, the element
199cdf0e10cSrcweir            containing the position within the input string before
200cdf0e10cSrcweir            transliteration.
201cdf0e10cSrcweir     */
202cdf0e10cSrcweir    string  transliterate(  [in] string aInStr, [in] long nStartPos,
203cdf0e10cSrcweir                            [in] long nCount, [out] sequence <long> rOffset );
204cdf0e10cSrcweir
205cdf0e10cSrcweir    //------------------------------------------------------------------------
206cdf0e10cSrcweir    /** @deprecated
207cdf0e10cSrcweir        For internal use, this method is supported to get the
208cdf0e10cSrcweir        "transliteration", which equals() is based on.
209cdf0e10cSrcweir     */
210cdf0e10cSrcweir    string  folding(    [in] string aInStr, [in] long nStartPos,
211cdf0e10cSrcweir                        [in] long nCount, [out] sequence <long> rOffset );
212cdf0e10cSrcweir
213cdf0e10cSrcweir    //------------------------------------------------------------------------
214cdf0e10cSrcweir    /** Match two substrings and find if they are equivalent as per this
215cdf0e10cSrcweir        transliteration.
216*90ed883bSmseidel
217cdf0e10cSrcweir        <p> This method can be called if the object has
218cdf0e10cSrcweir        <type>TransliterationType</type> IGNORE attribute. </p>
219cdf0e10cSrcweir
220cdf0e10cSrcweir        <p> Returns the number of matched code points in any case, even if
221cdf0e10cSrcweir        strings are not equal, for example: <br/>
222cdf0e10cSrcweir        equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
223cdf0e10cSrcweir        returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/>
224cdf0e10cSrcweir        equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
225cdf0e10cSrcweir        returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p>
226cdf0e10cSrcweir
227cdf0e10cSrcweir        @param aStr1
228cdf0e10cSrcweir            First string to match.
229cdf0e10cSrcweir
230cdf0e10cSrcweir        @param nPos1
231cdf0e10cSrcweir            Start position within aStr1.
232cdf0e10cSrcweir
233cdf0e10cSrcweir        @param nCount1
234cdf0e10cSrcweir            Number of code points to use of aStr1.
235cdf0e10cSrcweir
236cdf0e10cSrcweir        @param rMatch1
237cdf0e10cSrcweir            Returns number of matched code points in aStr1.
238cdf0e10cSrcweir
239cdf0e10cSrcweir        @param aStr2
240cdf0e10cSrcweir            Second string to match.
241cdf0e10cSrcweir
242cdf0e10cSrcweir        @param nPos2
243cdf0e10cSrcweir            Start position within aStr2.
244cdf0e10cSrcweir
245cdf0e10cSrcweir        @param nCount2
246cdf0e10cSrcweir            Number of code points to use of aStr2.
247cdf0e10cSrcweir
248cdf0e10cSrcweir        @param rMatch2
249cdf0e10cSrcweir            Returns number of matched code points in aStr2.
250cdf0e10cSrcweir
251cdf0e10cSrcweir        @returns
252cdf0e10cSrcweir            <TRUE/> if the substrings are equal per this
253cdf0e10cSrcweir            transliteration <br/>
254cdf0e10cSrcweir            <FALSE/> else.
255cdf0e10cSrcweir     */
256cdf0e10cSrcweir
257*90ed883bSmseidel    boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1,
258cdf0e10cSrcweir                    [out] long rMatch1,
259cdf0e10cSrcweir                    [in] string aStr2, [in] long nPos2, [in] long nCount2,
260cdf0e10cSrcweir                    [out] long rMatch2 );
261cdf0e10cSrcweir
262cdf0e10cSrcweir    //------------------------------------------------------------------------
263cdf0e10cSrcweir    /** Transliterate one set of characters to another.
264*90ed883bSmseidel
265cdf0e10cSrcweir        <p> This method is intended for getting corresponding ranges and
266cdf0e10cSrcweir        can be called if the object has <type>TransliterationType</type>
267cdf0e10cSrcweir        IGNORE attribute. </p>
268*90ed883bSmseidel
269cdf0e10cSrcweir        <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" )
270cdf0e10cSrcweir        returns {"A","I","a","i"}, transliterateRange( "a", "a" )
271cdf0e10cSrcweir        returns {"A","A","a","a"}. </p>
272*90ed883bSmseidel
273*90ed883bSmseidel        <p> Use this transliteration to create regular expressions like
274cdf0e10cSrcweir        [a-i] --> [A-Ia-i]. </p>
275*90ed883bSmseidel
276cdf0e10cSrcweir        @returns
277cdf0e10cSrcweir            String sequence containing corresponding transliterated
278cdf0e10cSrcweir            pairs of characters to represent a range.
279cdf0e10cSrcweir     */
280cdf0e10cSrcweir    sequence <string>   transliterateRange( [in] string aStr1, [in] string aStr2 );
281cdf0e10cSrcweir
282cdf0e10cSrcweir    //------------------------------------------------------------------------
283*90ed883bSmseidel    /** Compare 2 substrings as per this transliteration. It translates both
284cdf0e10cSrcweir        substrings before comparing them.
285cdf0e10cSrcweir
286cdf0e10cSrcweir        @param aStr1
287cdf0e10cSrcweir            First string.
288cdf0e10cSrcweir
289cdf0e10cSrcweir        @param nOff1
290cdf0e10cSrcweir            Offset (from 0) of the first substring.
291cdf0e10cSrcweir
292cdf0e10cSrcweir        @param nLen1
293cdf0e10cSrcweir            Length (from offset) of the first substring.
294cdf0e10cSrcweir
295cdf0e10cSrcweir        @param aStr2
296cdf0e10cSrcweir            Second string.
297cdf0e10cSrcweir
298cdf0e10cSrcweir        @param nOff2
299cdf0e10cSrcweir            Offset (from 0) of the second substring.
300cdf0e10cSrcweir
301cdf0e10cSrcweir        @param nLen2
302cdf0e10cSrcweir            Length (from offset) of the second substring.
303cdf0e10cSrcweir
304cdf0e10cSrcweir        @returns
305cdf0e10cSrcweir            1 if the first substring is greater than the second substring <br/>
306cdf0e10cSrcweir            0 if the first substring is equal to the second substring <br/>
307cdf0e10cSrcweir            -1 if the first substring is less than the second substring
308cdf0e10cSrcweir    */
309cdf0e10cSrcweir    long    compareSubstring(   [in] string aStr1, [in] long nOff1, [in] long nLen1,
310cdf0e10cSrcweir                                [in] string aStr2, [in] long nOff2, [in] long nLen2 );
311cdf0e10cSrcweir
312cdf0e10cSrcweir    //------------------------------------------------------------------------
313cdf0e10cSrcweir    /** Compare 2 strings as per this transliteration. It translates both
314cdf0e10cSrcweir        strings before comparing them.
315cdf0e10cSrcweir
316cdf0e10cSrcweir        @returns
317cdf0e10cSrcweir            1 if the first string is greater than the second string <br/>
318cdf0e10cSrcweir            0 if the first string is equal to the second string <br/>
319cdf0e10cSrcweir            -1 if the first string is less than the second string
320cdf0e10cSrcweir    */
321cdf0e10cSrcweir    long    compareString( [in] string aStr1, [in] string aStr2 );
322cdf0e10cSrcweir
323cdf0e10cSrcweir};
324cdf0e10cSrcweir
325cdf0e10cSrcweir//=============================================================================
326*90ed883bSmseidel
327cdf0e10cSrcweir}; }; }; };
328cdf0e10cSrcweir
329cdf0e10cSrcweir#endif
330