1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 #include "i18nutil/casefolding.hxx"
29 #include "casefolding_data.h"
30 #include "i18nutil/widthfolding.hxx"
31 
32 using namespace com::sun::star::lang;
33 using namespace com::sun::star::uno;
34 
35 namespace com { namespace sun { namespace star { namespace i18n {
36 
37 static Mapping mapping_03a3[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}};
38 static Mapping mapping_0307[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}};
39 static Mapping mapping_004a[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}};
40 static Mapping mapping_012e[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}};
41 static Mapping mapping_00cc[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}};
42 static Mapping mapping_00cd[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}};
43 static Mapping mapping_0128[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}};
44 static Mapping mapping_0049[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}};
45 static Mapping mapping_0069[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}};
46 static Mapping mapping_0130[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}};
47 
48 #define langIs(lang) (aLocale.Language.compareToAscii(lang) == 0)
49 
50 // only check simple case, there is more complicated case need to be checked.
51 #define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a)
52 
53 #define cased_letter(ch) (CaseMappingIndex[(ch)>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[(ch)>>8] << 8) + ((ch)&0xff)].type & CasedLetter))
54 
55 // for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's
56 // whenever there are more accents above.
57 #define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C))
58 
59 Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
60 {
61         switch(str[pos]) {
62         case 0x03a3:
63             // final_sigma (not followed by cased and preceded by cased character)
64             // DOES NOT check ignorable sequence yet (more complicated implementation).
65             return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ?
66                 mapping_03a3[0] : mapping_03a3[1];
67         case 0x0307:
68             return (((nMappingType == MappingTypeLowerToUpper && langIs("lt")) ||
69                 (nMappingType == MappingTypeUpperToLower && (langIs("tr") || langIs("az")))) &&
70                 (pos > 0 && type_i(str[pos-1]))) ?      // after_i
71                     mapping_0307[0] : mapping_0307[1];
72         case 0x0130:
73             return (langIs("tr") || langIs("az")) ? mapping_0130[0] : mapping_0130[1];
74         case 0x0069:
75             return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1];
76         case 0x0049: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_0049[0] :
77                     (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2];
78         case 0x004a: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_004a[0] : mapping_004a[1];
79         case 0x012e: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_012e[0] : mapping_012e[1];
80         case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1];
81         case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1];
82         case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1];
83         }
84         // Should not come here
85         throw RuntimeException();
86 }
87 
88 Mapping& casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
89 {
90         static Mapping dummy = { 0, 1, { 0, 0, 0 } };
91         sal_Int16 address = CaseMappingIndex[str[pos] >> 8] << 8;
92 
93         dummy.map[0] = str[pos];
94 
95         if (address >= 0 && (CaseMappingValue[address += (str[pos] & 0xFF)].type & nMappingType)) {
96             sal_uInt8 type = CaseMappingValue[address].type;
97             if (type & ValueTypeNotValue) {
98                 if (CaseMappingValue[address].value == 0)
99                     return getConditionalValue(str, pos, len, aLocale, nMappingType);
100                 else {
101                     for (int map = CaseMappingValue[address].value;
102                             map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) {
103                         if (CaseMappingExtra[map].type & nMappingType) {
104                             if (CaseMappingExtra[map].type & ValueTypeNotValue)
105                                 return getConditionalValue(str, pos, len, aLocale, nMappingType);
106                             else
107                                 return CaseMappingExtra[map];
108                         }
109                     }
110                     // Should not come here
111                     throw RuntimeException();
112                 }
113             } else
114                 dummy.map[0] = CaseMappingValue[address].value;
115         }
116         return dummy;
117 }
118 
119 inline sal_Bool SAL_CALL
120 is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next)
121 {
122         sal_Unicode c = 0;
123 
124         if ((next == 0x3099 || next == 0x309a) && ( (c = widthfolding::getCompositionChar(current, next)) != 0 ))
125             current = c;
126         return c != 0;
127 }
128 
129 sal_Unicode casefolding::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, Locale& aLocale, sal_uInt8 nMappingType, TransliterationModules moduleLoaded) throw (RuntimeException)
130 {
131         if( idx >= len )
132         {
133             e = MappingElement();
134             return 0;
135         }
136 
137         sal_Unicode c;
138 
139         if (moduleLoaded & TransliterationModules_IGNORE_CASE) {
140             if( e.current >= e.element.nmap ) {
141                 e.element = getValue(str, idx++, len, aLocale, nMappingType);
142                 e.current = 0;
143             }
144             c = e.element.map[e.current++];
145         } else {
146             c = *(str + idx++);
147         }
148 
149         if (moduleLoaded & TransliterationModules_IGNORE_KANA) {
150             if ((0x3040 <= c && c <= 0x3094) || (0x309d <= c && c <= 0x309f))
151                 c += 0x60;
152         }
153 
154         // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail
155         if (moduleLoaded & TransliterationModules_IGNORE_WIDTH) {
156             static oneToOneMapping& half2fullTable = widthfolding::gethalf2fullTable();
157             c = half2fullTable[c];
158             if (0x3040 <= c && c <= 0x30ff && idx < len &&
159                     is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)]))
160                 idx++;
161         }
162 
163         return c;
164 }
165 
166 } } } }
167 
168