1*75272fefSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*75272fefSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*75272fefSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*75272fefSAndrew Rist  * distributed with this work for additional information
6*75272fefSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*75272fefSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*75272fefSAndrew Rist  * "License"); you may not use this file except in compliance
9*75272fefSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*75272fefSAndrew Rist  *
11*75272fefSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*75272fefSAndrew Rist  *
13*75272fefSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*75272fefSAndrew Rist  * software distributed under the License is distributed on an
15*75272fefSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*75272fefSAndrew Rist  * KIND, either express or implied.  See the License for the
17*75272fefSAndrew Rist  * specific language governing permissions and limitations
18*75272fefSAndrew Rist  * under the License.
19*75272fefSAndrew Rist  *
20*75272fefSAndrew Rist  *************************************************************/
21*75272fefSAndrew Rist 
22*75272fefSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir #include <com/sun/star/i18n/UnicodeType.hpp>
25cdf0e10cSrcweir #include <com/sun/star/i18n/KCharacterType.hpp>
26cdf0e10cSrcweir #include <i18nutil/unicode.hxx>
27cdf0e10cSrcweir #include "unicode_data.h"
28cdf0e10cSrcweir 
29cdf0e10cSrcweir using namespace ::com::sun::star::i18n;
30cdf0e10cSrcweir 
31cdf0e10cSrcweir static ScriptTypeList defaultTypeList[] = {
32cdf0e10cSrcweir     { UnicodeScript_kBasicLatin,
33cdf0e10cSrcweir       UnicodeScript_kBasicLatin,
34cdf0e10cSrcweir       UnicodeScript_kBasicLatin },      // 0,
35cdf0e10cSrcweir     { UnicodeScript_kLatin1Supplement,
36cdf0e10cSrcweir       UnicodeScript_kLatin1Supplement,
37cdf0e10cSrcweir       UnicodeScript_kLatin1Supplement },// 1,
38cdf0e10cSrcweir     { UnicodeScript_kLatinExtendedA,
39cdf0e10cSrcweir       UnicodeScript_kLatinExtendedA,
40cdf0e10cSrcweir       UnicodeScript_kLatinExtendedA }, // 2,
41cdf0e10cSrcweir     { UnicodeScript_kLatinExtendedB,
42cdf0e10cSrcweir       UnicodeScript_kLatinExtendedB,
43cdf0e10cSrcweir       UnicodeScript_kLatinExtendedB }, // 3,
44cdf0e10cSrcweir     { UnicodeScript_kIPAExtension,
45cdf0e10cSrcweir       UnicodeScript_kIPAExtension,
46cdf0e10cSrcweir       UnicodeScript_kIPAExtension }, // 4,
47cdf0e10cSrcweir     { UnicodeScript_kSpacingModifier,
48cdf0e10cSrcweir       UnicodeScript_kSpacingModifier,
49cdf0e10cSrcweir       UnicodeScript_kSpacingModifier }, // 5,
50cdf0e10cSrcweir     { UnicodeScript_kCombiningDiacritical,
51cdf0e10cSrcweir       UnicodeScript_kCombiningDiacritical,
52cdf0e10cSrcweir       UnicodeScript_kCombiningDiacritical }, // 6,
53cdf0e10cSrcweir     { UnicodeScript_kGreek,
54cdf0e10cSrcweir       UnicodeScript_kGreek,
55cdf0e10cSrcweir       UnicodeScript_kGreek }, // 7,
56cdf0e10cSrcweir     { UnicodeScript_kCyrillic,
57cdf0e10cSrcweir       UnicodeScript_kCyrillic,
58cdf0e10cSrcweir       UnicodeScript_kCyrillic }, // 8,
59cdf0e10cSrcweir     { UnicodeScript_kArmenian,
60cdf0e10cSrcweir       UnicodeScript_kArmenian,
61cdf0e10cSrcweir       UnicodeScript_kArmenian }, // 9,
62cdf0e10cSrcweir     { UnicodeScript_kHebrew,
63cdf0e10cSrcweir       UnicodeScript_kHebrew,
64cdf0e10cSrcweir       UnicodeScript_kHebrew }, // 10,
65cdf0e10cSrcweir     { UnicodeScript_kArabic,
66cdf0e10cSrcweir       UnicodeScript_kArabic,
67cdf0e10cSrcweir       UnicodeScript_kArabic }, // 11,
68cdf0e10cSrcweir     { UnicodeScript_kSyriac,
69cdf0e10cSrcweir       UnicodeScript_kSyriac,
70cdf0e10cSrcweir       UnicodeScript_kSyriac }, // 12,
71cdf0e10cSrcweir     { UnicodeScript_kThaana,
72cdf0e10cSrcweir       UnicodeScript_kThaana,
73cdf0e10cSrcweir       UnicodeScript_kThaana }, // 13,
74cdf0e10cSrcweir     { UnicodeScript_kDevanagari,
75cdf0e10cSrcweir       UnicodeScript_kDevanagari,
76cdf0e10cSrcweir       UnicodeScript_kDevanagari }, // 14,
77cdf0e10cSrcweir     { UnicodeScript_kBengali,
78cdf0e10cSrcweir       UnicodeScript_kBengali,
79cdf0e10cSrcweir       UnicodeScript_kBengali }, // 15,
80cdf0e10cSrcweir     { UnicodeScript_kGurmukhi,
81cdf0e10cSrcweir       UnicodeScript_kGurmukhi,
82cdf0e10cSrcweir       UnicodeScript_kGurmukhi }, // 16,
83cdf0e10cSrcweir     { UnicodeScript_kGujarati,
84cdf0e10cSrcweir       UnicodeScript_kGujarati,
85cdf0e10cSrcweir       UnicodeScript_kGujarati }, // 17,
86cdf0e10cSrcweir     { UnicodeScript_kOriya,
87cdf0e10cSrcweir       UnicodeScript_kOriya,
88cdf0e10cSrcweir       UnicodeScript_kOriya }, // 18,
89cdf0e10cSrcweir     { UnicodeScript_kTamil,
90cdf0e10cSrcweir       UnicodeScript_kTamil,
91cdf0e10cSrcweir       UnicodeScript_kTamil }, // 19,
92cdf0e10cSrcweir     { UnicodeScript_kTelugu,
93cdf0e10cSrcweir       UnicodeScript_kTelugu,
94cdf0e10cSrcweir       UnicodeScript_kTelugu }, // 20,
95cdf0e10cSrcweir     { UnicodeScript_kKannada,
96cdf0e10cSrcweir       UnicodeScript_kKannada,
97cdf0e10cSrcweir       UnicodeScript_kKannada }, // 21,
98cdf0e10cSrcweir     { UnicodeScript_kMalayalam,
99cdf0e10cSrcweir       UnicodeScript_kMalayalam,
100cdf0e10cSrcweir       UnicodeScript_kMalayalam }, // 22,
101cdf0e10cSrcweir     { UnicodeScript_kSinhala,
102cdf0e10cSrcweir       UnicodeScript_kSinhala,
103cdf0e10cSrcweir       UnicodeScript_kSinhala }, // 23,
104cdf0e10cSrcweir     { UnicodeScript_kThai,
105cdf0e10cSrcweir       UnicodeScript_kThai,
106cdf0e10cSrcweir       UnicodeScript_kThai }, // 24,
107cdf0e10cSrcweir     { UnicodeScript_kLao,
108cdf0e10cSrcweir       UnicodeScript_kLao,
109cdf0e10cSrcweir       UnicodeScript_kLao }, // 25,
110cdf0e10cSrcweir     { UnicodeScript_kTibetan,
111cdf0e10cSrcweir       UnicodeScript_kTibetan,
112cdf0e10cSrcweir       UnicodeScript_kTibetan }, // 26,
113cdf0e10cSrcweir     { UnicodeScript_kMyanmar,
114cdf0e10cSrcweir       UnicodeScript_kMyanmar,
115cdf0e10cSrcweir       UnicodeScript_kMyanmar }, // 27,
116cdf0e10cSrcweir     { UnicodeScript_kGeorgian,
117cdf0e10cSrcweir       UnicodeScript_kGeorgian,
118cdf0e10cSrcweir       UnicodeScript_kGeorgian }, // 28,
119cdf0e10cSrcweir     { UnicodeScript_kHangulJamo,
120cdf0e10cSrcweir       UnicodeScript_kHangulJamo,
121cdf0e10cSrcweir       UnicodeScript_kHangulJamo }, // 29,
122cdf0e10cSrcweir     { UnicodeScript_kEthiopic,
123cdf0e10cSrcweir       UnicodeScript_kEthiopic,
124cdf0e10cSrcweir       UnicodeScript_kEthiopic }, // 30,
125cdf0e10cSrcweir     { UnicodeScript_kCherokee,
126cdf0e10cSrcweir       UnicodeScript_kCherokee,
127cdf0e10cSrcweir       UnicodeScript_kCherokee }, // 31,
128cdf0e10cSrcweir     { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
129cdf0e10cSrcweir       UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
130cdf0e10cSrcweir       UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
131cdf0e10cSrcweir     { UnicodeScript_kOgham,
132cdf0e10cSrcweir       UnicodeScript_kOgham,
133cdf0e10cSrcweir       UnicodeScript_kOgham }, // 33,
134cdf0e10cSrcweir     { UnicodeScript_kRunic,
135cdf0e10cSrcweir       UnicodeScript_kRunic,
136cdf0e10cSrcweir       UnicodeScript_kRunic }, // 34,
137cdf0e10cSrcweir     { UnicodeScript_kKhmer,
138cdf0e10cSrcweir       UnicodeScript_kKhmer,
139cdf0e10cSrcweir       UnicodeScript_kKhmer }, // 35,
140cdf0e10cSrcweir     { UnicodeScript_kMongolian,
141cdf0e10cSrcweir       UnicodeScript_kMongolian,
142cdf0e10cSrcweir       UnicodeScript_kMongolian }, // 36,
143cdf0e10cSrcweir     { UnicodeScript_kLatinExtendedAdditional,
144cdf0e10cSrcweir       UnicodeScript_kLatinExtendedAdditional,
145cdf0e10cSrcweir       UnicodeScript_kLatinExtendedAdditional }, // 37,
146cdf0e10cSrcweir     { UnicodeScript_kGreekExtended,
147cdf0e10cSrcweir       UnicodeScript_kGreekExtended,
148cdf0e10cSrcweir       UnicodeScript_kGreekExtended }, // 38,
149cdf0e10cSrcweir     { UnicodeScript_kGeneralPunctuation,
150cdf0e10cSrcweir       UnicodeScript_kGeneralPunctuation,
151cdf0e10cSrcweir       UnicodeScript_kGeneralPunctuation }, // 39,
152cdf0e10cSrcweir     { UnicodeScript_kSuperSubScript,
153cdf0e10cSrcweir       UnicodeScript_kSuperSubScript,
154cdf0e10cSrcweir       UnicodeScript_kSuperSubScript }, // 40,
155cdf0e10cSrcweir     { UnicodeScript_kCurrencySymbolScript,
156cdf0e10cSrcweir       UnicodeScript_kCurrencySymbolScript,
157cdf0e10cSrcweir       UnicodeScript_kCurrencySymbolScript }, // 41,
158cdf0e10cSrcweir     { UnicodeScript_kSymbolCombiningMark,
159cdf0e10cSrcweir       UnicodeScript_kSymbolCombiningMark,
160cdf0e10cSrcweir       UnicodeScript_kSymbolCombiningMark }, // 42,
161cdf0e10cSrcweir     { UnicodeScript_kLetterlikeSymbol,
162cdf0e10cSrcweir       UnicodeScript_kLetterlikeSymbol,
163cdf0e10cSrcweir       UnicodeScript_kLetterlikeSymbol }, // 43,
164cdf0e10cSrcweir     { UnicodeScript_kNumberForm,
165cdf0e10cSrcweir       UnicodeScript_kNumberForm,
166cdf0e10cSrcweir       UnicodeScript_kNumberForm }, // 44,
167cdf0e10cSrcweir     { UnicodeScript_kArrow,
168cdf0e10cSrcweir       UnicodeScript_kArrow,
169cdf0e10cSrcweir       UnicodeScript_kArrow }, // 45,
170cdf0e10cSrcweir     { UnicodeScript_kMathOperator,
171cdf0e10cSrcweir       UnicodeScript_kMathOperator,
172cdf0e10cSrcweir       UnicodeScript_kMathOperator }, // 46,
173cdf0e10cSrcweir     { UnicodeScript_kMiscTechnical,
174cdf0e10cSrcweir       UnicodeScript_kMiscTechnical,
175cdf0e10cSrcweir       UnicodeScript_kMiscTechnical }, // 47,
176cdf0e10cSrcweir     { UnicodeScript_kControlPicture,
177cdf0e10cSrcweir       UnicodeScript_kControlPicture,
178cdf0e10cSrcweir       UnicodeScript_kControlPicture }, // 48,
179cdf0e10cSrcweir     { UnicodeScript_kOpticalCharacter,
180cdf0e10cSrcweir       UnicodeScript_kOpticalCharacter,
181cdf0e10cSrcweir       UnicodeScript_kOpticalCharacter }, // 49,
182cdf0e10cSrcweir     { UnicodeScript_kEnclosedAlphanumeric,
183cdf0e10cSrcweir       UnicodeScript_kEnclosedAlphanumeric,
184cdf0e10cSrcweir       UnicodeScript_kEnclosedAlphanumeric }, // 50,
185cdf0e10cSrcweir     { UnicodeScript_kBoxDrawing,
186cdf0e10cSrcweir       UnicodeScript_kBoxDrawing,
187cdf0e10cSrcweir       UnicodeScript_kBoxDrawing }, // 51,
188cdf0e10cSrcweir     { UnicodeScript_kBlockElement,
189cdf0e10cSrcweir       UnicodeScript_kBlockElement,
190cdf0e10cSrcweir       UnicodeScript_kBlockElement }, // 52,
191cdf0e10cSrcweir     { UnicodeScript_kGeometricShape,
192cdf0e10cSrcweir       UnicodeScript_kGeometricShape,
193cdf0e10cSrcweir       UnicodeScript_kGeometricShape }, // 53,
194cdf0e10cSrcweir     { UnicodeScript_kMiscSymbol,
195cdf0e10cSrcweir       UnicodeScript_kMiscSymbol,
196cdf0e10cSrcweir       UnicodeScript_kMiscSymbol }, // 54,
197cdf0e10cSrcweir     { UnicodeScript_kDingbat,
198cdf0e10cSrcweir       UnicodeScript_kDingbat,
199cdf0e10cSrcweir       UnicodeScript_kDingbat }, // 55,
200cdf0e10cSrcweir     { UnicodeScript_kBraillePatterns,
201cdf0e10cSrcweir       UnicodeScript_kBraillePatterns,
202cdf0e10cSrcweir       UnicodeScript_kBraillePatterns }, // 56,
203cdf0e10cSrcweir     { UnicodeScript_kCJKRadicalsSupplement,
204cdf0e10cSrcweir       UnicodeScript_kCJKRadicalsSupplement,
205cdf0e10cSrcweir       UnicodeScript_kCJKRadicalsSupplement }, // 57,
206cdf0e10cSrcweir     { UnicodeScript_kKangxiRadicals,
207cdf0e10cSrcweir       UnicodeScript_kKangxiRadicals,
208cdf0e10cSrcweir       UnicodeScript_kKangxiRadicals }, // 58,
209cdf0e10cSrcweir     { UnicodeScript_kIdeographicDescriptionCharacters,
210cdf0e10cSrcweir       UnicodeScript_kIdeographicDescriptionCharacters,
211cdf0e10cSrcweir       UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
212cdf0e10cSrcweir     { UnicodeScript_kCJKSymbolPunctuation,
213cdf0e10cSrcweir       UnicodeScript_kCJKSymbolPunctuation,
214cdf0e10cSrcweir       UnicodeScript_kCJKSymbolPunctuation }, // 60,
215cdf0e10cSrcweir     { UnicodeScript_kHiragana,
216cdf0e10cSrcweir       UnicodeScript_kHiragana,
217cdf0e10cSrcweir       UnicodeScript_kHiragana }, // 61,
218cdf0e10cSrcweir     { UnicodeScript_kKatakana,
219cdf0e10cSrcweir       UnicodeScript_kKatakana,
220cdf0e10cSrcweir       UnicodeScript_kKatakana }, // 62,
221cdf0e10cSrcweir     { UnicodeScript_kBopomofo,
222cdf0e10cSrcweir       UnicodeScript_kBopomofo,
223cdf0e10cSrcweir       UnicodeScript_kBopomofo }, // 63,
224cdf0e10cSrcweir     { UnicodeScript_kHangulCompatibilityJamo,
225cdf0e10cSrcweir       UnicodeScript_kHangulCompatibilityJamo,
226cdf0e10cSrcweir       UnicodeScript_kHangulCompatibilityJamo }, // 64,
227cdf0e10cSrcweir     { UnicodeScript_kKanbun,
228cdf0e10cSrcweir       UnicodeScript_kKanbun,
229cdf0e10cSrcweir       UnicodeScript_kKanbun }, // 65,
230cdf0e10cSrcweir     { UnicodeScript_kBopomofoExtended,
231cdf0e10cSrcweir       UnicodeScript_kBopomofoExtended,
232cdf0e10cSrcweir       UnicodeScript_kBopomofoExtended }, // 66,
233cdf0e10cSrcweir     { UnicodeScript_kEnclosedCJKLetterMonth,
234cdf0e10cSrcweir       UnicodeScript_kEnclosedCJKLetterMonth,
235cdf0e10cSrcweir       UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
236cdf0e10cSrcweir     { UnicodeScript_kCJKCompatibility,
237cdf0e10cSrcweir       UnicodeScript_kCJKCompatibility,
238cdf0e10cSrcweir       UnicodeScript_kCJKCompatibility }, // 68,
239cdf0e10cSrcweir     { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
240cdf0e10cSrcweir       UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
241cdf0e10cSrcweir       UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
242cdf0e10cSrcweir     { UnicodeScript_kCJKUnifiedIdeograph,
243cdf0e10cSrcweir       UnicodeScript_kCJKUnifiedIdeograph,
244cdf0e10cSrcweir       UnicodeScript_kCJKUnifiedIdeograph }, // 70,
245cdf0e10cSrcweir     { UnicodeScript_kYiSyllables,
246cdf0e10cSrcweir       UnicodeScript_kYiSyllables,
247cdf0e10cSrcweir       UnicodeScript_kYiSyllables }, // 71,
248cdf0e10cSrcweir     { UnicodeScript_kYiRadicals,
249cdf0e10cSrcweir       UnicodeScript_kYiRadicals,
250cdf0e10cSrcweir       UnicodeScript_kYiRadicals }, // 72,
251cdf0e10cSrcweir     { UnicodeScript_kHangulSyllable,
252cdf0e10cSrcweir       UnicodeScript_kHangulSyllable,
253cdf0e10cSrcweir       UnicodeScript_kHangulSyllable }, // 73,
254cdf0e10cSrcweir     { UnicodeScript_kHighSurrogate,
255cdf0e10cSrcweir       UnicodeScript_kHighSurrogate,
256cdf0e10cSrcweir       UnicodeScript_kHighSurrogate }, // 74,
257cdf0e10cSrcweir     { UnicodeScript_kHighPrivateUseSurrogate,
258cdf0e10cSrcweir       UnicodeScript_kHighPrivateUseSurrogate,
259cdf0e10cSrcweir       UnicodeScript_kHighPrivateUseSurrogate }, // 75,
260cdf0e10cSrcweir     { UnicodeScript_kLowSurrogate,
261cdf0e10cSrcweir       UnicodeScript_kLowSurrogate,
262cdf0e10cSrcweir       UnicodeScript_kLowSurrogate }, // 76,
263cdf0e10cSrcweir     { UnicodeScript_kPrivateUse,
264cdf0e10cSrcweir       UnicodeScript_kPrivateUse,
265cdf0e10cSrcweir       UnicodeScript_kPrivateUse }, // 77,
266cdf0e10cSrcweir     { UnicodeScript_kCJKCompatibilityIdeograph,
267cdf0e10cSrcweir       UnicodeScript_kCJKCompatibilityIdeograph,
268cdf0e10cSrcweir       UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
269cdf0e10cSrcweir     { UnicodeScript_kAlphabeticPresentation,
270cdf0e10cSrcweir       UnicodeScript_kAlphabeticPresentation,
271cdf0e10cSrcweir       UnicodeScript_kAlphabeticPresentation }, // 79,
272cdf0e10cSrcweir     { UnicodeScript_kArabicPresentationA,
273cdf0e10cSrcweir       UnicodeScript_kArabicPresentationA,
274cdf0e10cSrcweir       UnicodeScript_kArabicPresentationA }, // 80,
275cdf0e10cSrcweir     { UnicodeScript_kCombiningHalfMark,
276cdf0e10cSrcweir       UnicodeScript_kCombiningHalfMark,
277cdf0e10cSrcweir       UnicodeScript_kCombiningHalfMark }, // 81,
278cdf0e10cSrcweir     { UnicodeScript_kCJKCompatibilityForm,
279cdf0e10cSrcweir       UnicodeScript_kCJKCompatibilityForm,
280cdf0e10cSrcweir       UnicodeScript_kCJKCompatibilityForm }, // 82,
281cdf0e10cSrcweir     { UnicodeScript_kSmallFormVariant,
282cdf0e10cSrcweir       UnicodeScript_kSmallFormVariant,
283cdf0e10cSrcweir       UnicodeScript_kSmallFormVariant }, // 83,
284cdf0e10cSrcweir     { UnicodeScript_kArabicPresentationB,
285cdf0e10cSrcweir       UnicodeScript_kArabicPresentationB,
286cdf0e10cSrcweir       UnicodeScript_kArabicPresentationB }, // 84,
287cdf0e10cSrcweir     { UnicodeScript_kNoScript,
288cdf0e10cSrcweir       UnicodeScript_kNoScript,
289cdf0e10cSrcweir       UnicodeScript_kNoScript }, // 85,
290cdf0e10cSrcweir     { UnicodeScript_kHalfwidthFullwidthForm,
291cdf0e10cSrcweir       UnicodeScript_kHalfwidthFullwidthForm,
292cdf0e10cSrcweir       UnicodeScript_kHalfwidthFullwidthForm }, // 86,
293cdf0e10cSrcweir     { UnicodeScript_kScriptCount,
294cdf0e10cSrcweir       UnicodeScript_kScriptCount,
295cdf0e10cSrcweir       UnicodeScript_kNoScript } // 87,
296cdf0e10cSrcweir };
297cdf0e10cSrcweir 
298cdf0e10cSrcweir sal_Int16 SAL_CALL
getUnicodeScriptType(const sal_Unicode ch,ScriptTypeList * typeList,sal_Int16 unknownType)299cdf0e10cSrcweir unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) {
300cdf0e10cSrcweir 
301cdf0e10cSrcweir     if (!typeList) {
302cdf0e10cSrcweir         typeList = defaultTypeList;
303cdf0e10cSrcweir         unknownType = UnicodeScript_kNoScript;
304cdf0e10cSrcweir     }
305cdf0e10cSrcweir 
306cdf0e10cSrcweir     sal_Int16 i = 0, type = typeList[0].to;
307cdf0e10cSrcweir     while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
308cdf0e10cSrcweir         type = typeList[++i].to;
309cdf0e10cSrcweir     }
310cdf0e10cSrcweir 
311cdf0e10cSrcweir     return (type < UnicodeScript_kScriptCount &&
312cdf0e10cSrcweir             ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
313cdf0e10cSrcweir             typeList[i].value : unknownType;
314cdf0e10cSrcweir }
315cdf0e10cSrcweir 
316cdf0e10cSrcweir sal_Bool SAL_CALL
isUnicodeScriptType(const sal_Unicode ch,sal_Int16 type)317cdf0e10cSrcweir unicode::isUnicodeScriptType( const sal_Unicode ch, sal_Int16 type) {
318cdf0e10cSrcweir     return ch >= UnicodeScriptType[type][UnicodeScriptTypeFrom] &&
319cdf0e10cSrcweir         ch <= UnicodeScriptType[type][UnicodeScriptTypeTo];
320cdf0e10cSrcweir }
321cdf0e10cSrcweir 
322cdf0e10cSrcweir sal_Unicode SAL_CALL
getUnicodeScriptStart(UnicodeScript type)323cdf0e10cSrcweir unicode::getUnicodeScriptStart( UnicodeScript type) {
324cdf0e10cSrcweir     return UnicodeScriptType[type][UnicodeScriptTypeFrom];
325cdf0e10cSrcweir }
326cdf0e10cSrcweir 
327cdf0e10cSrcweir sal_Unicode SAL_CALL
getUnicodeScriptEnd(UnicodeScript type)328cdf0e10cSrcweir unicode::getUnicodeScriptEnd( UnicodeScript type) {
329cdf0e10cSrcweir     return UnicodeScriptType[type][UnicodeScriptTypeTo];
330cdf0e10cSrcweir }
331cdf0e10cSrcweir 
332cdf0e10cSrcweir sal_Int16 SAL_CALL
getUnicodeType(const sal_Unicode ch)333cdf0e10cSrcweir unicode::getUnicodeType( const sal_Unicode ch ) {
334cdf0e10cSrcweir     static sal_Unicode c = 0x00;
335cdf0e10cSrcweir     static sal_Int16 r = 0x00;
336cdf0e10cSrcweir 
337cdf0e10cSrcweir     if (ch == c) return r;
338cdf0e10cSrcweir     else c = ch;
339cdf0e10cSrcweir 
340cdf0e10cSrcweir     sal_Int16 address = UnicodeTypeIndex[ch >> 8];
341cdf0e10cSrcweir     return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
342cdf0e10cSrcweir         UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
343cdf0e10cSrcweir }
344cdf0e10cSrcweir 
345cdf0e10cSrcweir sal_uInt8 SAL_CALL
getUnicodeDirection(const sal_Unicode ch)346cdf0e10cSrcweir unicode::getUnicodeDirection( const sal_Unicode ch ) {
347cdf0e10cSrcweir     static sal_Unicode c = 0x00;
348cdf0e10cSrcweir     static sal_uInt8 r = 0x00;
349cdf0e10cSrcweir 
350cdf0e10cSrcweir     if (ch == c) return r;
351cdf0e10cSrcweir     else c = ch;
352cdf0e10cSrcweir 
353cdf0e10cSrcweir     sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
354cdf0e10cSrcweir     return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
355cdf0e10cSrcweir         UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);
356cdf0e10cSrcweir 
357cdf0e10cSrcweir }
358cdf0e10cSrcweir 
359cdf0e10cSrcweir #define bit(name)   (1 << name)
360cdf0e10cSrcweir 
361cdf0e10cSrcweir #define UPPERMASK   bit(UnicodeType::UPPERCASE_LETTER)
362cdf0e10cSrcweir 
363cdf0e10cSrcweir #define LOWERMASK   bit(UnicodeType::LOWERCASE_LETTER)
364cdf0e10cSrcweir 
365cdf0e10cSrcweir #define TITLEMASK   bit(UnicodeType::TITLECASE_LETTER)
366cdf0e10cSrcweir 
367cdf0e10cSrcweir #define DIGITMASK   bit(UnicodeType::DECIMAL_DIGIT_NUMBER)|\
368cdf0e10cSrcweir             bit(UnicodeType::LETTER_NUMBER)|\
369cdf0e10cSrcweir             bit(UnicodeType::OTHER_NUMBER)
370cdf0e10cSrcweir 
371cdf0e10cSrcweir #define ALPHAMASK   UPPERMASK|LOWERMASK|TITLEMASK|\
372cdf0e10cSrcweir             bit(UnicodeType::MODIFIER_LETTER)|\
373cdf0e10cSrcweir             bit(UnicodeType::OTHER_LETTER)
374cdf0e10cSrcweir 
375cdf0e10cSrcweir #define BASEMASK    DIGITMASK|ALPHAMASK|\
376cdf0e10cSrcweir             bit(UnicodeType::NON_SPACING_MARK)|\
377cdf0e10cSrcweir             bit(UnicodeType::ENCLOSING_MARK)|\
378cdf0e10cSrcweir             bit(UnicodeType::COMBINING_SPACING_MARK)
379cdf0e10cSrcweir 
380cdf0e10cSrcweir #define SPACEMASK   bit(UnicodeType::SPACE_SEPARATOR)|\
381cdf0e10cSrcweir             bit(UnicodeType::LINE_SEPARATOR)|\
382cdf0e10cSrcweir             bit(UnicodeType::PARAGRAPH_SEPARATOR)
383cdf0e10cSrcweir 
384cdf0e10cSrcweir #define PUNCTUATIONMASK bit(UnicodeType::DASH_PUNCTUATION)|\
385cdf0e10cSrcweir             bit(UnicodeType::INITIAL_PUNCTUATION)|\
386cdf0e10cSrcweir             bit(UnicodeType::FINAL_PUNCTUATION)|\
387cdf0e10cSrcweir             bit(UnicodeType::CONNECTOR_PUNCTUATION)|\
388cdf0e10cSrcweir             bit(UnicodeType::OTHER_PUNCTUATION)
389cdf0e10cSrcweir 
390cdf0e10cSrcweir #define SYMBOLMASK  bit(UnicodeType::MATH_SYMBOL)|\
391cdf0e10cSrcweir             bit(UnicodeType::CURRENCY_SYMBOL)|\
392cdf0e10cSrcweir             bit(UnicodeType::MODIFIER_SYMBOL)|\
393cdf0e10cSrcweir             bit(UnicodeType::OTHER_SYMBOL)
394cdf0e10cSrcweir 
395cdf0e10cSrcweir #define PRINTMASK   BASEMASK|SPACEMASK|PUNCTUATIONMASK|SYMBOLMASK
396cdf0e10cSrcweir 
397cdf0e10cSrcweir #define CONTROLMASK bit(UnicodeType::CONTROL)|\
398cdf0e10cSrcweir             bit(UnicodeType::FORMAT)|\
399cdf0e10cSrcweir             bit(UnicodeType::LINE_SEPARATOR)|\
400cdf0e10cSrcweir             bit(UnicodeType::PARAGRAPH_SEPARATOR)
401cdf0e10cSrcweir 
402cdf0e10cSrcweir #define IsType(func, mask)  \
403cdf0e10cSrcweir sal_Bool SAL_CALL func( const sal_Unicode ch) {\
404cdf0e10cSrcweir     return (bit(getUnicodeType(ch)) & (mask)) != 0;\
405cdf0e10cSrcweir }
406cdf0e10cSrcweir 
IsType(unicode::isUpper,UPPERMASK)407cdf0e10cSrcweir IsType(unicode::isUpper, UPPERMASK)
408cdf0e10cSrcweir IsType(unicode::isLower, LOWERMASK)
409cdf0e10cSrcweir IsType(unicode::isTitle, DIGITMASK)
410cdf0e10cSrcweir IsType(unicode::isControl, CONTROLMASK)
411cdf0e10cSrcweir IsType(unicode::isPrint, PRINTMASK)
412cdf0e10cSrcweir IsType(unicode::isAlpha, ALPHAMASK)
413cdf0e10cSrcweir IsType(unicode::isDigit, DIGITMASK)
414cdf0e10cSrcweir IsType(unicode::isAlphaDigit, ALPHAMASK|DIGITMASK)
415cdf0e10cSrcweir IsType(unicode::isSpace, SPACEMASK)
416cdf0e10cSrcweir IsType(unicode::isBase, BASEMASK)
417cdf0e10cSrcweir IsType(unicode::isPunctuation, PUNCTUATIONMASK)
418cdf0e10cSrcweir 
419cdf0e10cSrcweir #define CONTROLSPACE    bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
420cdf0e10cSrcweir             bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
421cdf0e10cSrcweir 
422cdf0e10cSrcweir sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
423cdf0e10cSrcweir     return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
424cdf0e10cSrcweir }
425cdf0e10cSrcweir 
getCharType(const sal_Unicode ch)426cdf0e10cSrcweir sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch )
427cdf0e10cSrcweir {
428cdf0e10cSrcweir     using namespace ::com::sun::star::i18n::KCharacterType;
429cdf0e10cSrcweir 
430cdf0e10cSrcweir     switch ( getUnicodeType( ch ) ) {
431cdf0e10cSrcweir     // Upper
432cdf0e10cSrcweir     case UnicodeType::UPPERCASE_LETTER :
433cdf0e10cSrcweir         return UPPER|LETTER|PRINTABLE|BASE_FORM;
434cdf0e10cSrcweir 
435cdf0e10cSrcweir     // Lower
436cdf0e10cSrcweir     case UnicodeType::LOWERCASE_LETTER :
437cdf0e10cSrcweir         return LOWER|LETTER|PRINTABLE|BASE_FORM;
438cdf0e10cSrcweir 
439cdf0e10cSrcweir     // Title
440cdf0e10cSrcweir     case UnicodeType::TITLECASE_LETTER :
441cdf0e10cSrcweir         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
442cdf0e10cSrcweir 
443cdf0e10cSrcweir     // Letter
444cdf0e10cSrcweir     case UnicodeType::MODIFIER_LETTER :
445cdf0e10cSrcweir     case UnicodeType::OTHER_LETTER :
446cdf0e10cSrcweir         return LETTER|PRINTABLE|BASE_FORM;
447cdf0e10cSrcweir 
448cdf0e10cSrcweir     // Digit
449cdf0e10cSrcweir     case UnicodeType::DECIMAL_DIGIT_NUMBER:
450cdf0e10cSrcweir     case UnicodeType::LETTER_NUMBER:
451cdf0e10cSrcweir     case UnicodeType::OTHER_NUMBER:
452cdf0e10cSrcweir         return DIGIT|PRINTABLE|BASE_FORM;
453cdf0e10cSrcweir 
454cdf0e10cSrcweir     // Base
455cdf0e10cSrcweir     case UnicodeType::NON_SPACING_MARK:
456cdf0e10cSrcweir     case UnicodeType::ENCLOSING_MARK:
457cdf0e10cSrcweir     case UnicodeType::COMBINING_SPACING_MARK:
458cdf0e10cSrcweir         return BASE_FORM|PRINTABLE;
459cdf0e10cSrcweir 
460cdf0e10cSrcweir     // Print
461cdf0e10cSrcweir     case UnicodeType::SPACE_SEPARATOR:
462cdf0e10cSrcweir 
463cdf0e10cSrcweir     case UnicodeType::DASH_PUNCTUATION:
464cdf0e10cSrcweir     case UnicodeType::INITIAL_PUNCTUATION:
465cdf0e10cSrcweir     case UnicodeType::FINAL_PUNCTUATION:
466cdf0e10cSrcweir     case UnicodeType::CONNECTOR_PUNCTUATION:
467cdf0e10cSrcweir     case UnicodeType::OTHER_PUNCTUATION:
468cdf0e10cSrcweir 
469cdf0e10cSrcweir     case UnicodeType::MATH_SYMBOL:
470cdf0e10cSrcweir     case UnicodeType::CURRENCY_SYMBOL:
471cdf0e10cSrcweir     case UnicodeType::MODIFIER_SYMBOL:
472cdf0e10cSrcweir     case UnicodeType::OTHER_SYMBOL:
473cdf0e10cSrcweir         return PRINTABLE;
474cdf0e10cSrcweir 
475cdf0e10cSrcweir     // Control
476cdf0e10cSrcweir     case UnicodeType::CONTROL:
477cdf0e10cSrcweir     case UnicodeType::FORMAT:
478cdf0e10cSrcweir         return CONTROL;
479cdf0e10cSrcweir 
480cdf0e10cSrcweir     case UnicodeType::LINE_SEPARATOR:
481cdf0e10cSrcweir     case UnicodeType::PARAGRAPH_SEPARATOR:
482cdf0e10cSrcweir         return CONTROL|PRINTABLE;
483cdf0e10cSrcweir 
484cdf0e10cSrcweir     // for all others
485cdf0e10cSrcweir     default:
486cdf0e10cSrcweir         return 0;
487cdf0e10cSrcweir     }
488cdf0e10cSrcweir }
489cdf0e10cSrcweir 
490cdf0e10cSrcweir 
491