1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_i18npool.hxx"
26 
27 #include <cclass_unicode.hxx>
28 #include <com/sun/star/i18n/UnicodeScript.hpp>
29 #include <com/sun/star/i18n/UnicodeType.hpp>
30 #include <com/sun/star/i18n/KCharacterType.hpp>
31 #include <unicode/uchar.h>
32 #include <i18nutil/x_rtl_ustring.h>
33 #include <breakiteratorImpl.hxx>
34 
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
37 using namespace ::rtl;
38 
39 namespace com { namespace sun { namespace star { namespace i18n {
40 //	----------------------------------------------------
41 //	class cclass_Unicode
42 //	----------------------------------------------------;
43 
cclass_Unicode(uno::Reference<XMultiServiceFactory> xSMgr)44 cclass_Unicode::cclass_Unicode( uno::Reference < XMultiServiceFactory > xSMgr ) : xMSF( xSMgr ),
45 		pTable( NULL ),
46 		pStart( NULL ),
47 		pCont( NULL ),
48 		nStartTypes( 0 ),
49 		nContTypes( 0 ),
50 		eState( ssGetChar ),
51 		cGroupSep( ',' ),
52 		cDecimalSep( '.' )
53 {
54 	trans = new Transliteration_casemapping();
55 	cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
56 }
57 
~cclass_Unicode()58 cclass_Unicode::~cclass_Unicode() {
59 	destroyParserTable();
60 	delete trans;
61 }
62 
63 
64 OUString SAL_CALL
toUpper(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)65 cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
66     sal_Int32 len = Text.getLength();
67     if (nPos >= len)
68         return OUString();
69     if (nCount + nPos > len)
70         nCount = len - nPos;
71 
72     trans->setMappingType(MappingTypeToUpper, rLocale);
73     return trans->transliterateString2String(Text, nPos, nCount);
74 }
75 
76 OUString SAL_CALL
toLower(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)77 cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
78     sal_Int32 len = Text.getLength();
79     if (nPos >= len)
80         return OUString();
81     if (nCount + nPos > len)
82         nCount = len - nPos;
83 
84     trans->setMappingType(MappingTypeToLower, rLocale);
85     return trans->transliterateString2String(Text, nPos, nCount);
86 }
87 
88 OUString SAL_CALL
toTitle(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale & rLocale)89 cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
90     sal_Int32 len = Text.getLength();
91     if (nPos >= len)
92         return OUString();
93     if (nCount + nPos > len)
94         nCount = len - nPos;
95 
96     trans->setMappingType(MappingTypeToTitle, rLocale);
97     rtl_uString* pStr = x_rtl_uString_new_WithLength( nCount ); // defined in x_rtl_ustring.h
98     sal_Unicode* out = pStr->buffer;
99     BreakIteratorImpl brk(xMSF);
100     Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
101                 WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
102     for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
103         if (i >= bdy.endPos)
104             bdy = brk.nextWord(Text, bdy.endPos, rLocale,
105                         WordType::ANYWORD_IGNOREWHITESPACES);
106         *out = (i == bdy.startPos) ?
107             trans->transliterateChar2Char(Text[i]) : Text[i];
108     }
109     *out = 0;
110     return OUString( pStr, SAL_NO_ACQUIRE );
111 }
112 
113 sal_Int16 SAL_CALL
getType(const OUString & Text,sal_Int32 nPos)114 cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116     return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
117 }
118 
119 sal_Int16 SAL_CALL
getCharacterDirection(const OUString & Text,sal_Int32 nPos)120 cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
121     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
122     return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
123 }
124 
125 
126 sal_Int16 SAL_CALL
getScript(const OUString & Text,sal_Int32 nPos)127 cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
128     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
129     // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
130     // while OO.o enum UnicideScript starts from 0.
131     // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
132     return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
133 }
134 
135 
136 sal_Int32 SAL_CALL
getCharType(const OUString & Text,sal_Int32 * nPos,sal_Int32 increment)137 cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
138     using namespace ::com::sun::star::i18n::KCharacterType;
139 
140 	sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
141 	if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
142     switch ( u_charType(ch) ) {
143     // Upper
144     case U_UPPERCASE_LETTER :
145         return UPPER|LETTER|PRINTABLE|BASE_FORM;
146 
147     // Lower
148     case U_LOWERCASE_LETTER :
149         return LOWER|LETTER|PRINTABLE|BASE_FORM;
150 
151     // Title
152     case U_TITLECASE_LETTER :
153         return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
154 
155     // Letter
156     case U_MODIFIER_LETTER :
157     case U_OTHER_LETTER :
158         return LETTER|PRINTABLE|BASE_FORM;
159 
160     // Digit
161     case U_DECIMAL_DIGIT_NUMBER:
162     case U_LETTER_NUMBER:
163     case U_OTHER_NUMBER:
164         return DIGIT|PRINTABLE|BASE_FORM;
165 
166     // Base
167     case U_NON_SPACING_MARK:
168     case U_ENCLOSING_MARK:
169     case U_COMBINING_SPACING_MARK:
170         return BASE_FORM|PRINTABLE;
171 
172     // Print
173     case U_SPACE_SEPARATOR:
174 
175     case U_DASH_PUNCTUATION:
176     case U_INITIAL_PUNCTUATION:
177     case U_FINAL_PUNCTUATION:
178     case U_CONNECTOR_PUNCTUATION:
179     case U_OTHER_PUNCTUATION:
180 
181     case U_MATH_SYMBOL:
182     case U_CURRENCY_SYMBOL:
183     case U_MODIFIER_SYMBOL:
184     case U_OTHER_SYMBOL:
185         return PRINTABLE;
186 
187     // Control
188     case U_CONTROL_CHAR:
189     case U_FORMAT_CHAR:
190         return CONTROL;
191 
192     case U_LINE_SEPARATOR:
193     case U_PARAGRAPH_SEPARATOR:
194         return CONTROL|PRINTABLE;
195 
196     // for all others
197     default:
198         return U_GENERAL_OTHER_TYPES;
199     }
200 }
201 
202 sal_Int32 SAL_CALL
getCharacterType(const OUString & Text,sal_Int32 nPos,const Locale &)203 cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
204     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
205     return getCharType(Text, &nPos, 0);
206 
207 }
208 
209 sal_Int32 SAL_CALL
getStringType(const OUString & Text,sal_Int32 nPos,sal_Int32 nCount,const Locale &)210 cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
211     if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212 
213     sal_Int32 result = getCharType(Text, &nPos, 0);
214     for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
215         result |= getCharType(Text, &nPos, 1);
216     return result;
217 }
218 
parseAnyToken(const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)219 ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
220 			const OUString& Text,
221 			sal_Int32 nPos,
222 			const Locale& rLocale,
223 			sal_Int32 startCharTokenType,
224 			const OUString& userDefinedCharactersStart,
225 			sal_Int32 contCharTokenType,
226 			const OUString& userDefinedCharactersCont )
227 				throw(RuntimeException)
228 {
229 	ParseResult r;
230 	if ( Text.getLength() <= nPos )
231 		return r;
232 
233 	setupParserTable( rLocale,
234 		startCharTokenType, userDefinedCharactersStart,
235 		contCharTokenType, userDefinedCharactersCont );
236 	parseText( r, Text, nPos );
237 
238 	return r;
239 }
240 
241 
parsePredefinedToken(sal_Int32 nTokenType,const OUString & Text,sal_Int32 nPos,const Locale & rLocale,sal_Int32 startCharTokenType,const OUString & userDefinedCharactersStart,sal_Int32 contCharTokenType,const OUString & userDefinedCharactersCont)242 ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
243 			sal_Int32 nTokenType,
244 			const OUString& Text,
245 			sal_Int32 nPos,
246 			const Locale& rLocale,
247 			sal_Int32 startCharTokenType,
248 			const OUString& userDefinedCharactersStart,
249 			sal_Int32 contCharTokenType,
250 			const OUString& userDefinedCharactersCont )
251 				throw(RuntimeException)
252 {
253 	ParseResult r;
254 	if ( Text.getLength() <= nPos )
255 		return r;
256 
257 	setupParserTable( rLocale,
258 		startCharTokenType, userDefinedCharactersStart,
259 		contCharTokenType, userDefinedCharactersCont );
260 	parseText( r, Text, nPos, nTokenType );
261 
262 	return r;
263 }
264 
getImplementationName()265 OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
266 {
267     return OUString::createFromAscii(cClass);
268 }
269 
270 
supportsService(const OUString & rServiceName)271 sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
272 {
273     return !rServiceName.compareToAscii(cClass);
274 }
275 
getSupportedServiceNames()276 Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
277 {
278     Sequence< OUString > aRet(1);
279     aRet[0] = OUString::createFromAscii(cClass);
280     return aRet;
281 }
282 
283 } } } }
284 
285