1*75272fefSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*75272fefSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*75272fefSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*75272fefSAndrew Rist  * distributed with this work for additional information
6*75272fefSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*75272fefSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*75272fefSAndrew Rist  * "License"); you may not use this file except in compliance
9*75272fefSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*75272fefSAndrew Rist  *
11*75272fefSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*75272fefSAndrew Rist  *
13*75272fefSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*75272fefSAndrew Rist  * software distributed under the License is distributed on an
15*75272fefSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*75272fefSAndrew Rist  * KIND, either express or implied.  See the License for the
17*75272fefSAndrew Rist  * specific language governing permissions and limitations
18*75272fefSAndrew Rist  * under the License.
19*75272fefSAndrew Rist  *
20*75272fefSAndrew Rist  *************************************************************/
21*75272fefSAndrew Rist 
22*75272fefSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #include "i18nutil/casefolding.hxx"
25cdf0e10cSrcweir #include "casefolding_data.h"
26cdf0e10cSrcweir #include "i18nutil/widthfolding.hxx"
27cdf0e10cSrcweir 
28cdf0e10cSrcweir using namespace com::sun::star::lang;
29cdf0e10cSrcweir using namespace com::sun::star::uno;
30cdf0e10cSrcweir 
31cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
32cdf0e10cSrcweir 
33cdf0e10cSrcweir static Mapping mapping_03a3[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}};
34cdf0e10cSrcweir static Mapping mapping_0307[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}};
35cdf0e10cSrcweir static Mapping mapping_004a[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}};
36cdf0e10cSrcweir static Mapping mapping_012e[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}};
37cdf0e10cSrcweir static Mapping mapping_00cc[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}};
38cdf0e10cSrcweir static Mapping mapping_00cd[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}};
39cdf0e10cSrcweir static Mapping mapping_0128[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}};
40cdf0e10cSrcweir static Mapping mapping_0049[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}};
41cdf0e10cSrcweir static Mapping mapping_0069[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}};
42cdf0e10cSrcweir static Mapping mapping_0130[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}};
43cdf0e10cSrcweir 
44cdf0e10cSrcweir #define langIs(lang) (aLocale.Language.compareToAscii(lang) == 0)
45cdf0e10cSrcweir 
46cdf0e10cSrcweir // only check simple case, there is more complicated case need to be checked.
47cdf0e10cSrcweir #define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a)
48cdf0e10cSrcweir 
49cdf0e10cSrcweir #define cased_letter(ch) (CaseMappingIndex[(ch)>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[(ch)>>8] << 8) + ((ch)&0xff)].type & CasedLetter))
50cdf0e10cSrcweir 
51cdf0e10cSrcweir // for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's
52cdf0e10cSrcweir // whenever there are more accents above.
53cdf0e10cSrcweir #define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C))
54cdf0e10cSrcweir 
getConditionalValue(const sal_Unicode * str,sal_Int32 pos,sal_Int32 len,Locale & aLocale,sal_uInt8 nMappingType)55cdf0e10cSrcweir Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
56cdf0e10cSrcweir {
57cdf0e10cSrcweir         switch(str[pos]) {
58cdf0e10cSrcweir         case 0x03a3:
59cdf0e10cSrcweir             // final_sigma (not followed by cased and preceded by cased character)
60cdf0e10cSrcweir             // DOES NOT check ignorable sequence yet (more complicated implementation).
61cdf0e10cSrcweir             return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ?
62cdf0e10cSrcweir                 mapping_03a3[0] : mapping_03a3[1];
63cdf0e10cSrcweir         case 0x0307:
64cdf0e10cSrcweir             return (((nMappingType == MappingTypeLowerToUpper && langIs("lt")) ||
65cdf0e10cSrcweir                 (nMappingType == MappingTypeUpperToLower && (langIs("tr") || langIs("az")))) &&
66cdf0e10cSrcweir                 (pos > 0 && type_i(str[pos-1]))) ?      // after_i
67cdf0e10cSrcweir                     mapping_0307[0] : mapping_0307[1];
68cdf0e10cSrcweir         case 0x0130:
69cdf0e10cSrcweir             return (langIs("tr") || langIs("az")) ? mapping_0130[0] : mapping_0130[1];
70cdf0e10cSrcweir         case 0x0069:
71cdf0e10cSrcweir             return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1];
72cdf0e10cSrcweir         case 0x0049: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_0049[0] :
73cdf0e10cSrcweir                     (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2];
74cdf0e10cSrcweir         case 0x004a: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_004a[0] : mapping_004a[1];
75cdf0e10cSrcweir         case 0x012e: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_012e[0] : mapping_012e[1];
76cdf0e10cSrcweir         case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1];
77cdf0e10cSrcweir         case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1];
78cdf0e10cSrcweir         case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1];
79cdf0e10cSrcweir         }
80cdf0e10cSrcweir         // Should not come here
81cdf0e10cSrcweir         throw RuntimeException();
82cdf0e10cSrcweir }
83cdf0e10cSrcweir 
getValue(const sal_Unicode * str,sal_Int32 pos,sal_Int32 len,Locale & aLocale,sal_uInt8 nMappingType)84cdf0e10cSrcweir Mapping& casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
85cdf0e10cSrcweir {
86cdf0e10cSrcweir         static Mapping dummy = { 0, 1, { 0, 0, 0 } };
87cdf0e10cSrcweir         sal_Int16 address = CaseMappingIndex[str[pos] >> 8] << 8;
88cdf0e10cSrcweir 
89cdf0e10cSrcweir         dummy.map[0] = str[pos];
90cdf0e10cSrcweir 
91cdf0e10cSrcweir         if (address >= 0 && (CaseMappingValue[address += (str[pos] & 0xFF)].type & nMappingType)) {
92cdf0e10cSrcweir             sal_uInt8 type = CaseMappingValue[address].type;
93cdf0e10cSrcweir             if (type & ValueTypeNotValue) {
94cdf0e10cSrcweir                 if (CaseMappingValue[address].value == 0)
95cdf0e10cSrcweir                     return getConditionalValue(str, pos, len, aLocale, nMappingType);
96cdf0e10cSrcweir                 else {
97cdf0e10cSrcweir                     for (int map = CaseMappingValue[address].value;
98cdf0e10cSrcweir                             map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) {
99cdf0e10cSrcweir                         if (CaseMappingExtra[map].type & nMappingType) {
100cdf0e10cSrcweir                             if (CaseMappingExtra[map].type & ValueTypeNotValue)
101cdf0e10cSrcweir                                 return getConditionalValue(str, pos, len, aLocale, nMappingType);
102cdf0e10cSrcweir                             else
103cdf0e10cSrcweir                                 return CaseMappingExtra[map];
104cdf0e10cSrcweir                         }
105cdf0e10cSrcweir                     }
106cdf0e10cSrcweir                     // Should not come here
107cdf0e10cSrcweir                     throw RuntimeException();
108cdf0e10cSrcweir                 }
109cdf0e10cSrcweir             } else
110cdf0e10cSrcweir                 dummy.map[0] = CaseMappingValue[address].value;
111cdf0e10cSrcweir         }
112cdf0e10cSrcweir         return dummy;
113cdf0e10cSrcweir }
114cdf0e10cSrcweir 
115cdf0e10cSrcweir inline sal_Bool SAL_CALL
is_ja_voice_sound_mark(sal_Unicode & current,sal_Unicode next)116cdf0e10cSrcweir is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next)
117cdf0e10cSrcweir {
118cdf0e10cSrcweir         sal_Unicode c = 0;
119cdf0e10cSrcweir 
120cdf0e10cSrcweir         if ((next == 0x3099 || next == 0x309a) && ( (c = widthfolding::getCompositionChar(current, next)) != 0 ))
121cdf0e10cSrcweir             current = c;
122cdf0e10cSrcweir         return c != 0;
123cdf0e10cSrcweir }
124cdf0e10cSrcweir 
getNextChar(const sal_Unicode * str,sal_Int32 & idx,sal_Int32 len,MappingElement & e,Locale & aLocale,sal_uInt8 nMappingType,TransliterationModules moduleLoaded)125cdf0e10cSrcweir sal_Unicode casefolding::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, Locale& aLocale, sal_uInt8 nMappingType, TransliterationModules moduleLoaded) throw (RuntimeException)
126cdf0e10cSrcweir {
127cdf0e10cSrcweir         if( idx >= len )
128cdf0e10cSrcweir         {
129cdf0e10cSrcweir             e = MappingElement();
130cdf0e10cSrcweir             return 0;
131cdf0e10cSrcweir         }
132cdf0e10cSrcweir 
133cdf0e10cSrcweir         sal_Unicode c;
134cdf0e10cSrcweir 
135cdf0e10cSrcweir         if (moduleLoaded & TransliterationModules_IGNORE_CASE) {
136cdf0e10cSrcweir             if( e.current >= e.element.nmap ) {
137cdf0e10cSrcweir                 e.element = getValue(str, idx++, len, aLocale, nMappingType);
138cdf0e10cSrcweir                 e.current = 0;
139cdf0e10cSrcweir             }
140cdf0e10cSrcweir             c = e.element.map[e.current++];
141cdf0e10cSrcweir         } else {
142cdf0e10cSrcweir             c = *(str + idx++);
143cdf0e10cSrcweir         }
144cdf0e10cSrcweir 
145cdf0e10cSrcweir         if (moduleLoaded & TransliterationModules_IGNORE_KANA) {
146cdf0e10cSrcweir             if ((0x3040 <= c && c <= 0x3094) || (0x309d <= c && c <= 0x309f))
147cdf0e10cSrcweir                 c += 0x60;
148cdf0e10cSrcweir         }
149cdf0e10cSrcweir 
150cdf0e10cSrcweir         // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail
151cdf0e10cSrcweir         if (moduleLoaded & TransliterationModules_IGNORE_WIDTH) {
152cdf0e10cSrcweir             static oneToOneMapping& half2fullTable = widthfolding::gethalf2fullTable();
153cdf0e10cSrcweir             c = half2fullTable[c];
154cdf0e10cSrcweir             if (0x3040 <= c && c <= 0x30ff && idx < len &&
155cdf0e10cSrcweir                     is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)]))
156cdf0e10cSrcweir                 idx++;
157cdf0e10cSrcweir         }
158cdf0e10cSrcweir 
159cdf0e10cSrcweir         return c;
160cdf0e10cSrcweir }
161cdf0e10cSrcweir 
162cdf0e10cSrcweir } } } }
163cdf0e10cSrcweir 
164