1*449ab281SAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*449ab281SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*449ab281SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*449ab281SAndrew Rist  * distributed with this work for additional information
6*449ab281SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*449ab281SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*449ab281SAndrew Rist  * "License"); you may not use this file except in compliance
9*449ab281SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*449ab281SAndrew Rist  *
11*449ab281SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*449ab281SAndrew Rist  *
13*449ab281SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*449ab281SAndrew Rist  * software distributed under the License is distributed on an
15*449ab281SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*449ab281SAndrew Rist  * KIND, either express or implied.  See the License for the
17*449ab281SAndrew Rist  * specific language governing permissions and limitations
18*449ab281SAndrew Rist  * under the License.
19*449ab281SAndrew Rist  *
20*449ab281SAndrew Rist  *************************************************************/
21*449ab281SAndrew Rist 
22*449ab281SAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_i18npool.hxx"
26cdf0e10cSrcweir #include <indexentrysupplier_default.hxx>
27cdf0e10cSrcweir #include <localedata.hxx>
28cdf0e10cSrcweir #include <i18nutil/unicode.hxx>
29cdf0e10cSrcweir #include <com/sun/star/i18n/CollatorOptions.hpp>
30cdf0e10cSrcweir 
31cdf0e10cSrcweir using namespace ::com::sun::star::uno;
32cdf0e10cSrcweir using namespace ::com::sun::star::lang;
33cdf0e10cSrcweir using namespace ::rtl;
34cdf0e10cSrcweir 
35cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n {
36cdf0e10cSrcweir 
37cdf0e10cSrcweir IndexEntrySupplier_Unicode::IndexEntrySupplier_Unicode(
38cdf0e10cSrcweir     const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF ) :
39cdf0e10cSrcweir     IndexEntrySupplier_Common(rxMSF)
40cdf0e10cSrcweir {
41cdf0e10cSrcweir 	implementationName = "com.sun.star.i18n.IndexEntrySupplier_Unicode";
42cdf0e10cSrcweir     index = new Index(rxMSF);
43cdf0e10cSrcweir }
44cdf0e10cSrcweir 
45cdf0e10cSrcweir IndexEntrySupplier_Unicode::~IndexEntrySupplier_Unicode()
46cdf0e10cSrcweir {
47cdf0e10cSrcweir     delete index;
48cdf0e10cSrcweir }
49cdf0e10cSrcweir 
50cdf0e10cSrcweir sal_Bool SAL_CALL IndexEntrySupplier_Unicode::loadAlgorithm( const lang::Locale& rLocale,
51cdf0e10cSrcweir 	const OUString& rAlgorithm, sal_Int32 collatorOptions ) throw (RuntimeException)
52cdf0e10cSrcweir {
53cdf0e10cSrcweir     index->init(rLocale, rAlgorithm);
54cdf0e10cSrcweir     return IndexEntrySupplier_Common::loadAlgorithm(rLocale, rAlgorithm, collatorOptions);
55cdf0e10cSrcweir }
56cdf0e10cSrcweir 
57cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexKey( const OUString& rIndexEntry,
58cdf0e10cSrcweir 	const OUString& rPhoneticEntry, const lang::Locale& rLocale ) throw (RuntimeException)
59cdf0e10cSrcweir {
60cdf0e10cSrcweir     return index->getIndexDescription(getEntry(rIndexEntry, rPhoneticEntry, rLocale));
61cdf0e10cSrcweir }
62cdf0e10cSrcweir 
63cdf0e10cSrcweir sal_Int16 SAL_CALL IndexEntrySupplier_Unicode::compareIndexEntry(
64cdf0e10cSrcweir 	const OUString& rIndexEntry1, const OUString& rPhoneticEntry1, const lang::Locale& rLocale1,
65cdf0e10cSrcweir 	const OUString& rIndexEntry2, const OUString& rPhoneticEntry2, const lang::Locale& rLocale2 )
66cdf0e10cSrcweir 	throw (RuntimeException)
67cdf0e10cSrcweir {
68cdf0e10cSrcweir     sal_Int16 result =
69cdf0e10cSrcweir             index->getIndexWeight(getEntry(rIndexEntry1, rPhoneticEntry1, rLocale1)) -
70cdf0e10cSrcweir             index->getIndexWeight(getEntry(rIndexEntry2, rPhoneticEntry2, rLocale2));
71cdf0e10cSrcweir     if (result == 0)
72cdf0e10cSrcweir         return IndexEntrySupplier_Common::compareIndexEntry(
73cdf0e10cSrcweir                     rIndexEntry1, rPhoneticEntry1, rLocale1,
74cdf0e10cSrcweir                     rIndexEntry2, rPhoneticEntry2, rLocale2);
75cdf0e10cSrcweir     return result > 0 ? 1 : -1;
76cdf0e10cSrcweir }
77cdf0e10cSrcweir 
78cdf0e10cSrcweir OUString SAL_CALL IndexEntrySupplier_Unicode::getIndexCharacter( const OUString& rIndexEntry,
79cdf0e10cSrcweir 	const lang::Locale& rLocale, const OUString& rAlgorithm ) throw (RuntimeException) {
80cdf0e10cSrcweir 
81cdf0e10cSrcweir     if (loadAlgorithm( rLocale, rAlgorithm, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT))
82cdf0e10cSrcweir         return index->getIndexDescription(rIndexEntry);
83cdf0e10cSrcweir     else
84cdf0e10cSrcweir         return IndexEntrySupplier_Common::getIndexCharacter(rIndexEntry, rLocale, rAlgorithm);
85cdf0e10cSrcweir }
86cdf0e10cSrcweir 
87cdf0e10cSrcweir IndexTable::IndexTable()
88cdf0e10cSrcweir {
89cdf0e10cSrcweir     table = NULL;
90cdf0e10cSrcweir }
91cdf0e10cSrcweir 
92cdf0e10cSrcweir IndexTable::~IndexTable()
93cdf0e10cSrcweir {
94cdf0e10cSrcweir     if (table) free(table);
95cdf0e10cSrcweir }
96cdf0e10cSrcweir 
97cdf0e10cSrcweir void IndexTable::init(sal_Unicode start_, sal_Unicode end_, IndexKey *keys, sal_Int16 key_count, Index *index)
98cdf0e10cSrcweir {
99cdf0e10cSrcweir     start=start_;
100cdf0e10cSrcweir     end=end_;
101cdf0e10cSrcweir     table = (sal_uInt8*) malloc((end-start+1)*sizeof(sal_uInt8));
102cdf0e10cSrcweir     for (sal_Unicode i = start; i <= end; i++) {
103cdf0e10cSrcweir         sal_Int16 j;
104cdf0e10cSrcweir         for (j = 0; j < key_count; j++) {
105cdf0e10cSrcweir             if (keys[j].key > 0 && (i == keys[j].key || index->compare(i, keys[j].key) == 0)) {
106cdf0e10cSrcweir                 table[i-start] = sal::static_int_cast<sal_uInt8>(j);
107cdf0e10cSrcweir                 break;
108cdf0e10cSrcweir             }
109cdf0e10cSrcweir         }
110cdf0e10cSrcweir         if (j == key_count)
111cdf0e10cSrcweir             table[i-start] = 0xFF;
112cdf0e10cSrcweir     }
113cdf0e10cSrcweir }
114cdf0e10cSrcweir 
115cdf0e10cSrcweir Index::Index(const com::sun::star::uno::Reference < com::sun::star::lang::XMultiServiceFactory >& rxMSF)
116cdf0e10cSrcweir {
117cdf0e10cSrcweir 	collator = new CollatorImpl(rxMSF);
118cdf0e10cSrcweir }
119cdf0e10cSrcweir 
120cdf0e10cSrcweir Index::~Index()
121cdf0e10cSrcweir {
122cdf0e10cSrcweir     delete collator;
123cdf0e10cSrcweir }
124cdf0e10cSrcweir 
125cdf0e10cSrcweir sal_Int16 Index::compare(sal_Unicode c1, sal_Unicode c2)
126cdf0e10cSrcweir {
127cdf0e10cSrcweir     return sal::static_int_cast<sal_Int16>( collator->compareString(OUString(&c1, 1), OUString(&c2, 1)) );
128cdf0e10cSrcweir }
129cdf0e10cSrcweir 
130cdf0e10cSrcweir sal_Int16 Index::getIndexWeight(const OUString& rIndexEntry)
131cdf0e10cSrcweir {
132cdf0e10cSrcweir     sal_Int32 startPos=0;
133cdf0e10cSrcweir     if (skipping_chars.getLength() > 0)
134cdf0e10cSrcweir         while (skipping_chars.indexOf(rIndexEntry[startPos]) >= 0)
135cdf0e10cSrcweir             startPos++;
136cdf0e10cSrcweir     if (mkey_count > 0) {
137cdf0e10cSrcweir         for (sal_Int16 i = 0; i < mkey_count; i++) {
138cdf0e10cSrcweir             sal_Int32 len = keys[mkeys[i]].mkey.getLength();
139cdf0e10cSrcweir             if (collator->compareSubstring(rIndexEntry, startPos, len,
140cdf0e10cSrcweir                                     keys[mkeys[i]].mkey, 0, len) == 0)
141cdf0e10cSrcweir                 return mkeys[i];
142cdf0e10cSrcweir         }
143cdf0e10cSrcweir     }
144cdf0e10cSrcweir     sal_Unicode code = rIndexEntry[startPos];
145cdf0e10cSrcweir     for (sal_Int16 i = 0; i < table_count; i++) {
146cdf0e10cSrcweir         if (tables[i].start <= code && code <= tables[i].end)
147cdf0e10cSrcweir             return tables[i].table[code-tables[i].start];
148cdf0e10cSrcweir     }
149cdf0e10cSrcweir     return 0xFF;
150cdf0e10cSrcweir }
151cdf0e10cSrcweir 
152cdf0e10cSrcweir OUString Index::getIndexDescription(const OUString& rIndexEntry)
153cdf0e10cSrcweir {
154cdf0e10cSrcweir     sal_Int16 wgt = getIndexWeight(rIndexEntry);
155cdf0e10cSrcweir     if (wgt < MAX_KEYS) {
156cdf0e10cSrcweir         if (keys[wgt].desc.getLength())
157cdf0e10cSrcweir             return keys[wgt].desc;
158cdf0e10cSrcweir         else if (keys[wgt].key > 0)
159cdf0e10cSrcweir             return OUString(&keys[wgt].key, 1);
160cdf0e10cSrcweir         else
161cdf0e10cSrcweir             return keys[wgt].mkey;
162cdf0e10cSrcweir     }
163cdf0e10cSrcweir     sal_Int32 nPos=0;
164cdf0e10cSrcweir     sal_uInt32 indexChar=rIndexEntry.iterateCodePoints(&nPos, 0);
165cdf0e10cSrcweir     return OUString(&indexChar, 1);
166cdf0e10cSrcweir }
167cdf0e10cSrcweir 
168cdf0e10cSrcweir #define LOCALE_EN lang::Locale(OUString::createFromAscii("en"), OUString(), OUString())
169cdf0e10cSrcweir 
170cdf0e10cSrcweir void Index::makeIndexKeys(const lang::Locale &rLocale, const OUString &algorithm) throw (RuntimeException)
171cdf0e10cSrcweir {
172cdf0e10cSrcweir     OUString keyStr = LocaleData().getIndexKeysByAlgorithm(rLocale, algorithm);
173cdf0e10cSrcweir 
174cdf0e10cSrcweir     if (!keyStr.getLength()) {
175cdf0e10cSrcweir         keyStr = LocaleData().getIndexKeysByAlgorithm(LOCALE_EN,
176cdf0e10cSrcweir                     LocaleData().getDefaultIndexAlgorithm(LOCALE_EN));
177cdf0e10cSrcweir         if (!keyStr)
178cdf0e10cSrcweir             throw RuntimeException();
179cdf0e10cSrcweir     }
180cdf0e10cSrcweir 
181cdf0e10cSrcweir     sal_Int16 len = sal::static_int_cast<sal_Int16>( keyStr.getLength() );
182cdf0e10cSrcweir     mkey_count=key_count=0;
183cdf0e10cSrcweir     skipping_chars=OUString();
184cdf0e10cSrcweir     sal_Int16 i, j;
185cdf0e10cSrcweir 
186cdf0e10cSrcweir     for (i = 0; i < len && key_count < MAX_KEYS; i++)
187cdf0e10cSrcweir     {
188cdf0e10cSrcweir         sal_Unicode curr = keyStr[i];
189cdf0e10cSrcweir         sal_Unicode close = sal_Unicode(')');
190cdf0e10cSrcweir 
191cdf0e10cSrcweir         if (unicode::isWhiteSpace(curr))
192cdf0e10cSrcweir             continue;
193cdf0e10cSrcweir 
194cdf0e10cSrcweir         switch(curr) {
195cdf0e10cSrcweir             case sal_Unicode('-'):
196cdf0e10cSrcweir                 if (key_count > 0 && i + 1 < len ) {
197cdf0e10cSrcweir                     for (curr = keyStr[++i]; key_count < MAX_KEYS && keys[key_count-1].key < curr; key_count++) {
198cdf0e10cSrcweir                         keys[key_count].key = keys[key_count-1].key+1;
199cdf0e10cSrcweir                         keys[key_count].desc = OUString();
200cdf0e10cSrcweir                     }
201cdf0e10cSrcweir                 } else
202cdf0e10cSrcweir                     throw RuntimeException();
203cdf0e10cSrcweir                 break;
204cdf0e10cSrcweir             case sal_Unicode('['):
205cdf0e10cSrcweir                 for (i++; i < len && keyStr[i] != sal_Unicode(']'); i++) {
206cdf0e10cSrcweir                     if (unicode::isWhiteSpace(keyStr[i])) {
207cdf0e10cSrcweir                         continue;
208cdf0e10cSrcweir                     } else if (keyStr[i] == sal_Unicode('_')) {
209cdf0e10cSrcweir                         for (curr=keyStr[i-1]+1;  curr <= keyStr[i+1]; curr++)
210cdf0e10cSrcweir                             skipping_chars+=OUString(curr);
211cdf0e10cSrcweir                         i+=2;
212cdf0e10cSrcweir                     } else {
213cdf0e10cSrcweir                         skipping_chars+=OUString(keyStr[i]);
214cdf0e10cSrcweir                     }
215cdf0e10cSrcweir                 }
216cdf0e10cSrcweir                 break;
217cdf0e10cSrcweir             case sal_Unicode('{'):
218cdf0e10cSrcweir                 close = sal_Unicode('}');
219cdf0e10cSrcweir             case sal_Unicode('('):
220cdf0e10cSrcweir                 if (key_count > 0) {
221cdf0e10cSrcweir                     sal_Int16 end = i+1;
222cdf0e10cSrcweir                     for (end=i+1; end < len && keyStr[end] != close; end++) ;
223cdf0e10cSrcweir 
224cdf0e10cSrcweir                     if (end >= len) // no found
225cdf0e10cSrcweir                         throw RuntimeException();
226cdf0e10cSrcweir                     if (close == sal_Unicode(')'))
227cdf0e10cSrcweir                         keys[key_count-1].desc = keyStr.copy(i+1, end-i-1);
228cdf0e10cSrcweir                     else {
229cdf0e10cSrcweir                         mkeys[mkey_count++]=key_count;
230cdf0e10cSrcweir                         keys[key_count].key = 0;
231cdf0e10cSrcweir                         keys[key_count].mkey = keyStr.copy(i+1, end-i-1);
232cdf0e10cSrcweir                         keys[key_count++].desc=OUString();
233cdf0e10cSrcweir                     }
234cdf0e10cSrcweir                     i=end+1;
235cdf0e10cSrcweir                 } else
236cdf0e10cSrcweir                     throw RuntimeException();
237cdf0e10cSrcweir                 break;
238cdf0e10cSrcweir             default:
239cdf0e10cSrcweir                 keys[key_count].key = curr;
240cdf0e10cSrcweir                 keys[key_count++].desc = OUString();
241cdf0e10cSrcweir                 break;
242cdf0e10cSrcweir         }
243cdf0e10cSrcweir     }
244cdf0e10cSrcweir     for (i = 0; i < mkey_count; i++) {
245cdf0e10cSrcweir         for (j=i+1; j < mkey_count; j++) {
246cdf0e10cSrcweir             if (keys[mkeys[i]].mkey.getLength() < keys[mkeys[j]].mkey.getLength()) {
247cdf0e10cSrcweir                 sal_Int16 k = mkeys[i];
248cdf0e10cSrcweir                 mkeys[i] = mkeys[j];
249cdf0e10cSrcweir                 mkeys[j] = k;
250cdf0e10cSrcweir             }
251cdf0e10cSrcweir         }
252cdf0e10cSrcweir     }
253cdf0e10cSrcweir }
254cdf0e10cSrcweir 
255cdf0e10cSrcweir void Index::init(const lang::Locale &rLocale, const OUString& algorithm) throw (RuntimeException)
256cdf0e10cSrcweir {
257cdf0e10cSrcweir     makeIndexKeys(rLocale, algorithm);
258cdf0e10cSrcweir 
259cdf0e10cSrcweir     Sequence< UnicodeScript > scriptList = LocaleData().getUnicodeScripts( rLocale );
260cdf0e10cSrcweir 
261cdf0e10cSrcweir     if (scriptList.getLength() == 0) {
262cdf0e10cSrcweir         scriptList = LocaleData().getUnicodeScripts(LOCALE_EN);
263cdf0e10cSrcweir         if (scriptList.getLength() == 0)
264cdf0e10cSrcweir             throw RuntimeException();
265cdf0e10cSrcweir     }
266cdf0e10cSrcweir 
267cdf0e10cSrcweir     table_count = sal::static_int_cast<sal_Int16>( scriptList.getLength() );
268cdf0e10cSrcweir     if (table_count > MAX_TABLES)
269cdf0e10cSrcweir         throw RuntimeException();
270cdf0e10cSrcweir 
271cdf0e10cSrcweir     collator->loadCollatorAlgorithm(algorithm, rLocale, CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT);
272cdf0e10cSrcweir     sal_Int16 j=0;
273cdf0e10cSrcweir     sal_Unicode start = unicode::getUnicodeScriptStart((UnicodeScript)0);
274cdf0e10cSrcweir     sal_Unicode end = unicode::getUnicodeScriptEnd((UnicodeScript)0);
275cdf0e10cSrcweir     for (sal_Int16 i= (scriptList[0] == (UnicodeScript)0) ? 1 : 0; i< scriptList.getLength(); i++) {
276cdf0e10cSrcweir         if (unicode::getUnicodeScriptStart(scriptList[i]) != end+1) {
277cdf0e10cSrcweir             tables[j++].init(start, end, keys, key_count, this);
278cdf0e10cSrcweir             start = unicode::getUnicodeScriptStart(scriptList[i]);
279cdf0e10cSrcweir         }
280cdf0e10cSrcweir         end = unicode::getUnicodeScriptEnd(scriptList[i]);
281cdf0e10cSrcweir     }
282cdf0e10cSrcweir     tables[j++].init(start, end, keys, key_count, this);
283cdf0e10cSrcweir     table_count = j;
284cdf0e10cSrcweir }
285cdf0e10cSrcweir 
286cdf0e10cSrcweir } } } }
287