1*d1766043SAndrew Rist/**************************************************************
2cdf0e10cSrcweir *
3*d1766043SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*d1766043SAndrew Rist * or more contributor license agreements.  See the NOTICE file
5*d1766043SAndrew Rist * distributed with this work for additional information
6*d1766043SAndrew Rist * regarding copyright ownership.  The ASF licenses this file
7*d1766043SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*d1766043SAndrew Rist * "License"); you may not use this file except in compliance
9*d1766043SAndrew Rist * with the License.  You may obtain a copy of the License at
10*d1766043SAndrew Rist *
11*d1766043SAndrew Rist *   http://www.apache.org/licenses/LICENSE-2.0
12*d1766043SAndrew Rist *
13*d1766043SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*d1766043SAndrew Rist * software distributed under the License is distributed on an
15*d1766043SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*d1766043SAndrew Rist * KIND, either express or implied.  See the License for the
17*d1766043SAndrew Rist * specific language governing permissions and limitations
18*d1766043SAndrew Rist * under the License.
19*d1766043SAndrew Rist *
20*d1766043SAndrew Rist *************************************************************/
21*d1766043SAndrew Rist
22*d1766043SAndrew Rist
23cdf0e10cSrcweir#ifndef __com_sun_star_i18n_XTransliteration_idl__
24cdf0e10cSrcweir#define __com_sun_star_i18n_XTransliteration_idl__
25cdf0e10cSrcweir
26cdf0e10cSrcweir#include <com/sun/star/lang/Locale.idl>
27cdf0e10cSrcweir#include <com/sun/star/uno/XInterface.idl>
28cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModules.idl>
29cdf0e10cSrcweir#include <com/sun/star/i18n/TransliterationModulesNew.idl>
30cdf0e10cSrcweir
31cdf0e10cSrcweir//=============================================================================
32cdf0e10cSrcweir
33cdf0e10cSrcweirmodule com { module sun { module star { module i18n {
34cdf0e10cSrcweir
35cdf0e10cSrcweir//=============================================================================
36cdf0e10cSrcweir
37cdf0e10cSrcweir/**
38cdf0e10cSrcweir    Character conversions like case folding or Hiragana to Katakana.
39cdf0e10cSrcweir
40cdf0e10cSrcweir    <p> Transliteration is a character to character conversion but it is
41cdf0e10cSrcweir    not always a one to one mapping between characters. Transliteration
42cdf0e10cSrcweir    modules are primarily used by collation, and search and replace
43cdf0e10cSrcweir    modules to perform approximate search. It can also be used to format
44cdf0e10cSrcweir    the numbers in different numbering systems. <p/>
45cdf0e10cSrcweir
46cdf0e10cSrcweir    <p> In order to select transliteration modules for different
47cdf0e10cSrcweir    purposes, they are classified with attributes of
48cdf0e10cSrcweir    <type>TransliterationType</type>. <p/>
49cdf0e10cSrcweir
50cdf0e10cSrcweir    <p> For Western languages there would be three transliteration
51cdf0e10cSrcweir    modules available to compare two mixed case strings: upper to lower,
52cdf0e10cSrcweir    lower to upper, and ignore case. </p>
53cdf0e10cSrcweir
54cdf0e10cSrcweir    <p> A typical calling sequence of transliteration is
55cdf0e10cSrcweir        <ol>
56cdf0e10cSrcweir            <li> getAvailableModules() </li>
57cdf0e10cSrcweir            <li> loadModulesByImplNames() </li>
58cdf0e10cSrcweir            <li> equals() </li>
59cdf0e10cSrcweir        </ol>
60cdf0e10cSrcweir    or another one is
61cdf0e10cSrcweir        <ol>
62cdf0e10cSrcweir            <li> loadModule() </li>
63cdf0e10cSrcweir            <li> transliterate() </li>
64cdf0e10cSrcweir        </ol>
65cdf0e10cSrcweir    </p>
66cdf0e10cSrcweir
67cdf0e10cSrcweir*/
68cdf0e10cSrcweir
69cdf0e10cSrcweir/* comment:
70cdf0e10cSrcweir * 0.
71cdf0e10cSrcweir * All the IGNORE-type functionalities (Range, equals) are based on mapping.
72cdf0e10cSrcweir * except equals() method in IGNORE_CASE, which is based on Locale-independent
73cdf0e10cSrcweir * casefolding
74cdf0e10cSrcweir * ( This second assumption is very complicated and may cause confusion of use)
75cdf0e10cSrcweir *
76cdf0e10cSrcweir * 1.
77cdf0e10cSrcweir * We are assuming Upper to Lower mapping as one of transliteration.
78cdf0e10cSrcweir * The mapping depends on Locale.
79cdf0e10cSrcweir * Upper <-> Lower methods are just wrappers to provide equals() and Range()
80cdf0e10cSrcweir *
81cdf0e10cSrcweir * 2.
82cdf0e10cSrcweir * equals() in IGNORE_CASE module is locale-independent and
83cdf0e10cSrcweir * we don't provide locale-sensitive ones.
84cdf0e10cSrcweir * The reason we provided locale-independent ones is that IGNORE_CASE is mainly
85cdf0e10cSrcweir * dedicated to StarOffice internal code.
86cdf0e10cSrcweir *
87cdf0e10cSrcweir * 3.
88cdf0e10cSrcweir * TransliterationModules is used just for convenience without calling
89cdf0e10cSrcweir * getAvailableModule.
90cdf0e10cSrcweir *
91cdf0e10cSrcweir * 4.
92cdf0e10cSrcweir * Implementation name in the methods below is not the same as
93cdf0e10cSrcweir * the true implemenation name registered.
94cdf0e10cSrcweir * In particular, for generic modules:"UPPERCASE_LOWERCASE",
95cdf0e10cSrcweir * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name.
96cdf0e10cSrcweir */
97cdf0e10cSrcweir
98cdf0e10cSrcweir
99cdf0e10cSrcweirpublished interface XTransliteration: com::sun::star::uno::XInterface
100cdf0e10cSrcweir{
101cdf0e10cSrcweir
102cdf0e10cSrcweir    //------------------------------------------------------------------------
103cdf0e10cSrcweir    /** Unique ASCII name to identify a module. This name is used
104cdf0e10cSrcweir        to get its localized name for menus, dialogs etc. The behavior
105cdf0e10cSrcweir        is undefined for <const>TransliterationType::CASCADE</const>
106cdf0e10cSrcweir        modules.
107cdf0e10cSrcweir     */
108cdf0e10cSrcweir    string  getName();
109cdf0e10cSrcweir
110cdf0e10cSrcweir    //------------------------------------------------------------------------
111cdf0e10cSrcweir    /** Return the attribute(s) associated with this transliterator
112cdf0e10cSrcweir        object, as defined in <type>TransliterationType</type>. The
113cdf0e10cSrcweir        value is determined by the transliteration modules. For example,
114cdf0e10cSrcweir        for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for
115cdf0e10cSrcweir        IGNORE_CASE, IGNORE is returned.
116cdf0e10cSrcweir     */
117cdf0e10cSrcweir    short   getType();
118cdf0e10cSrcweir
119cdf0e10cSrcweir    //------------------------------------------------------------------------
120cdf0e10cSrcweir    /** Load instance of predefined module - old style method.
121cdf0e10cSrcweir     */
122cdf0e10cSrcweir    void    loadModule( [in] TransliterationModules eModType,
123cdf0e10cSrcweir                        [in] ::com::sun::star::lang::Locale aLocale );
124cdf0e10cSrcweir
125cdf0e10cSrcweir    //------------------------------------------------------------------------
126cdf0e10cSrcweir    /** Load a sequence of instances of predefined modules - supersedes
127cdf0e10cSrcweir        method <member>XTransliteration::loadModule()</member>.
128cdf0e10cSrcweir     */
129cdf0e10cSrcweir    void    loadModuleNew(  [in] sequence <TransliterationModulesNew> aModType,
130cdf0e10cSrcweir                            [in] ::com::sun::star::lang::Locale aLocale );
131cdf0e10cSrcweir
132cdf0e10cSrcweir    //------------------------------------------------------------------------
133cdf0e10cSrcweir    /** Load instance of UNO registered module.
134cdf0e10cSrcweir
135cdf0e10cSrcweir        <p> Each transliteration module is registered under a different
136cdf0e10cSrcweir        service name. The convention for the service name is
137cdf0e10cSrcweir        com.sun.star.i18n.Transliteration.l10n.{implName}. The
138cdf0e10cSrcweir        {implName} is a unique name used to identify a module. The
139cdf0e10cSrcweir        implName is used to get a localized name for the transliteration
140cdf0e10cSrcweir        module. The implName is used in locale data to list the
141cdf0e10cSrcweir        available transliteration modules for the locale. There are some
142cdf0e10cSrcweir        transliteration modules that are always available. The names of
143cdf0e10cSrcweir        those modules are listed as enum
144cdf0e10cSrcweir        <type>TransliterationModules</type> names. For modules not
145cdf0e10cSrcweir        listed there it is possible to load them directly by their
146cdf0e10cSrcweir        implName.
147cdf0e10cSrcweir
148cdf0e10cSrcweir        @param aImplName
149cdf0e10cSrcweir            The module's {implName} under which it is registered with
150cdf0e10cSrcweir            com.sun.star.i18n.Transliteration.l10n.{implName}.
151cdf0e10cSrcweir     */
152cdf0e10cSrcweir    void    loadModuleByImplName(   [in] string aImplName,
153cdf0e10cSrcweir                                    [in] ::com::sun::star::lang::Locale aLocale );
154cdf0e10cSrcweir
155cdf0e10cSrcweir    //------------------------------------------------------------------------
156cdf0e10cSrcweir    /** Load a sequence of instances of transliteration modules.
157cdf0e10cSrcweir        Output of one module is feeded as input to the next module in
158cdf0e10cSrcweir        the sequence. The object created by this call has
159cdf0e10cSrcweir        <type>TransliterationType</type> CASCADE and IGNORE types.
160cdf0e10cSrcweir
161cdf0e10cSrcweir        @param aImplNameList
162cdf0e10cSrcweir            Only IGNORE type modules can be specified.
163cdf0e10cSrcweir    */
164cdf0e10cSrcweir    void    loadModulesByImplNames( [in] sequence <string> aImplNameList,
165cdf0e10cSrcweir                                    [in] ::com::sun::star::lang::Locale aLocale );
166cdf0e10cSrcweir
167cdf0e10cSrcweir    //------------------------------------------------------------------------
168cdf0e10cSrcweir    /** List the available transliteration modules for a given locale.
169cdf0e10cSrcweir        It can be filtered based on its type.
170cdf0e10cSrcweir
171cdf0e10cSrcweir        @param nType
172cdf0e10cSrcweir            A bitmask field of values defined in
173cdf0e10cSrcweir            <type>TransliterationType</type>
174cdf0e10cSrcweir    */
175cdf0e10cSrcweir    sequence<string>    getAvailableModules(
176cdf0e10cSrcweir                            [in] ::com::sun::star::lang::Locale aLocale,
177cdf0e10cSrcweir                            [in] short nType );
178cdf0e10cSrcweir
179cdf0e10cSrcweir
180cdf0e10cSrcweir    //------------------------------------------------------------------------
181cdf0e10cSrcweir    /** Transliterate a substring. This method can be called if the
182cdf0e10cSrcweir        object doesn't have <type>TransliterationType</type> IGNORE
183cdf0e10cSrcweir        attribute.
184cdf0e10cSrcweir
185cdf0e10cSrcweir        @param aStr
186cdf0e10cSrcweir            The input string.
187cdf0e10cSrcweir
188cdf0e10cSrcweir        @param nStartPos
189cdf0e10cSrcweir            Start position within aStr from where transliteration starts.
190cdf0e10cSrcweir
191cdf0e10cSrcweir        @param nCount
192cdf0e10cSrcweir            Number of codepoints to be transliterated.
193cdf0e10cSrcweir
194cdf0e10cSrcweir        @param rOffset
195cdf0e10cSrcweir            To find the grapheme of input string corresponding to the
196cdf0e10cSrcweir            grapheme of output string, rOffset provides the offset array
197cdf0e10cSrcweir            whose index is the offset of output string, the element
198cdf0e10cSrcweir            containing the position within the input string before
199cdf0e10cSrcweir            transliteration.
200cdf0e10cSrcweir     */
201cdf0e10cSrcweir    string  transliterate(  [in] string aInStr, [in] long nStartPos,
202cdf0e10cSrcweir                            [in] long nCount, [out] sequence <long> rOffset );
203cdf0e10cSrcweir
204cdf0e10cSrcweir    //------------------------------------------------------------------------
205cdf0e10cSrcweir    /** @deprecated
206cdf0e10cSrcweir        For internal use, this method is supported to get the
207cdf0e10cSrcweir        "transliteration", which equals() is based on.
208cdf0e10cSrcweir     */
209cdf0e10cSrcweir    string  folding(    [in] string aInStr, [in] long nStartPos,
210cdf0e10cSrcweir                        [in] long nCount, [out] sequence <long> rOffset );
211cdf0e10cSrcweir
212cdf0e10cSrcweir    //------------------------------------------------------------------------
213cdf0e10cSrcweir    /** Match two substrings and find if they are equivalent as per this
214cdf0e10cSrcweir        transliteration.
215cdf0e10cSrcweir
216cdf0e10cSrcweir        <p> This method can be called if the object has
217cdf0e10cSrcweir        <type>TransliterationType</type> IGNORE attribute. </p>
218cdf0e10cSrcweir
219cdf0e10cSrcweir        <p> Returns the number of matched code points in any case, even if
220cdf0e10cSrcweir        strings are not equal, for example: <br/>
221cdf0e10cSrcweir        equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
222cdf0e10cSrcweir        returns <FALSE/> and nMatch:=1 and nMatch2:=1 <br/>
223cdf0e10cSrcweir        equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br/>
224cdf0e10cSrcweir        returns <FALSE/> and nMatch:=2 and nMatch2:=2 <br/> </p>
225cdf0e10cSrcweir
226cdf0e10cSrcweir        @param aStr1
227cdf0e10cSrcweir            First string to match.
228cdf0e10cSrcweir
229cdf0e10cSrcweir        @param nPos1
230cdf0e10cSrcweir            Start position within aStr1.
231cdf0e10cSrcweir
232cdf0e10cSrcweir        @param nCount1
233cdf0e10cSrcweir            Number of code points to use of aStr1.
234cdf0e10cSrcweir
235cdf0e10cSrcweir        @param rMatch1
236cdf0e10cSrcweir            Returns number of matched code points in aStr1.
237cdf0e10cSrcweir
238cdf0e10cSrcweir        @param aStr2
239cdf0e10cSrcweir            Second string to match.
240cdf0e10cSrcweir
241cdf0e10cSrcweir        @param nPos2
242cdf0e10cSrcweir            Start position within aStr2.
243cdf0e10cSrcweir
244cdf0e10cSrcweir        @param nCount2
245cdf0e10cSrcweir            Number of code points to use of aStr2.
246cdf0e10cSrcweir
247cdf0e10cSrcweir        @param rMatch2
248cdf0e10cSrcweir            Returns number of matched code points in aStr2.
249cdf0e10cSrcweir
250cdf0e10cSrcweir        @returns
251cdf0e10cSrcweir            <TRUE/> if the substrings are equal per this
252cdf0e10cSrcweir            transliteration <br/>
253cdf0e10cSrcweir            <FALSE/> else.
254cdf0e10cSrcweir     */
255cdf0e10cSrcweir
256cdf0e10cSrcweir    boolean equals( [in] string aStr1, [in] long nPos1, [in] long nCount1,
257cdf0e10cSrcweir                    [out] long rMatch1,
258cdf0e10cSrcweir                    [in] string aStr2, [in] long nPos2, [in] long nCount2,
259cdf0e10cSrcweir                    [out] long rMatch2 );
260cdf0e10cSrcweir
261cdf0e10cSrcweir    //------------------------------------------------------------------------
262cdf0e10cSrcweir    /** Transliterate one set of characters to another.
263cdf0e10cSrcweir
264cdf0e10cSrcweir        <p> This method is intended for getting corresponding ranges and
265cdf0e10cSrcweir        can be called if the object has <type>TransliterationType</type>
266cdf0e10cSrcweir        IGNORE attribute. </p>
267cdf0e10cSrcweir
268cdf0e10cSrcweir        <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" )
269cdf0e10cSrcweir        returns {"A","I","a","i"}, transliterateRange( "a", "a" )
270cdf0e10cSrcweir        returns {"A","A","a","a"}. </p>
271cdf0e10cSrcweir
272cdf0e10cSrcweir        <p> Use this transliteration to create regular expresssions like
273cdf0e10cSrcweir        [a-i] --> [A-Ia-i]. </p>
274cdf0e10cSrcweir
275cdf0e10cSrcweir        @returns
276cdf0e10cSrcweir            String sequence containing corresponding transliterated
277cdf0e10cSrcweir            pairs of characters to represent a range.
278cdf0e10cSrcweir     */
279cdf0e10cSrcweir    sequence <string>   transliterateRange( [in] string aStr1, [in] string aStr2 );
280cdf0e10cSrcweir
281cdf0e10cSrcweir    //------------------------------------------------------------------------
282cdf0e10cSrcweir    /** Compare 2 substrings as per this transliteration. It translates both
283cdf0e10cSrcweir        substrings before comparing them.
284cdf0e10cSrcweir
285cdf0e10cSrcweir        @param aStr1
286cdf0e10cSrcweir            First string.
287cdf0e10cSrcweir
288cdf0e10cSrcweir        @param nOff1
289cdf0e10cSrcweir            Offset (from 0) of the first substring.
290cdf0e10cSrcweir
291cdf0e10cSrcweir        @param nLen1
292cdf0e10cSrcweir            Length (from offset) of the first substring.
293cdf0e10cSrcweir
294cdf0e10cSrcweir        @param aStr2
295cdf0e10cSrcweir            Second string.
296cdf0e10cSrcweir
297cdf0e10cSrcweir        @param nOff2
298cdf0e10cSrcweir            Offset (from 0) of the second substring.
299cdf0e10cSrcweir
300cdf0e10cSrcweir        @param nLen2
301cdf0e10cSrcweir            Length (from offset) of the second substring.
302cdf0e10cSrcweir
303cdf0e10cSrcweir        @returns
304cdf0e10cSrcweir            1 if the first substring is greater than the second substring <br/>
305cdf0e10cSrcweir            0 if the first substring is equal to the second substring <br/>
306cdf0e10cSrcweir            -1 if the first substring is less than the second substring
307cdf0e10cSrcweir    */
308cdf0e10cSrcweir    long    compareSubstring(   [in] string aStr1, [in] long nOff1, [in] long nLen1,
309cdf0e10cSrcweir                                [in] string aStr2, [in] long nOff2, [in] long nLen2 );
310cdf0e10cSrcweir
311cdf0e10cSrcweir    //------------------------------------------------------------------------
312cdf0e10cSrcweir    /** Compare 2 strings as per this transliteration. It translates both
313cdf0e10cSrcweir        strings before comparing them.
314cdf0e10cSrcweir
315cdf0e10cSrcweir        @returns
316cdf0e10cSrcweir            1 if the first string is greater than the second string <br/>
317cdf0e10cSrcweir            0 if the first string is equal to the second string <br/>
318cdf0e10cSrcweir            -1 if the first string is less than the second string
319cdf0e10cSrcweir    */
320cdf0e10cSrcweir    long    compareString( [in] string aStr1, [in] string aStr2 );
321cdf0e10cSrcweir
322cdf0e10cSrcweir};
323cdf0e10cSrcweir
324cdf0e10cSrcweir//=============================================================================
325cdf0e10cSrcweir}; }; }; };
326cdf0e10cSrcweir
327cdf0e10cSrcweir#endif
328