1*449ab281SAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*449ab281SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*449ab281SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*449ab281SAndrew Rist * distributed with this work for additional information 6*449ab281SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*449ab281SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*449ab281SAndrew Rist * "License"); you may not use this file except in compliance 9*449ab281SAndrew Rist * with the License. You may obtain a copy of the License at 10*449ab281SAndrew Rist * 11*449ab281SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*449ab281SAndrew Rist * 13*449ab281SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*449ab281SAndrew Rist * software distributed under the License is distributed on an 15*449ab281SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*449ab281SAndrew Rist * KIND, either express or implied. See the License for the 17*449ab281SAndrew Rist * specific language governing permissions and limitations 18*449ab281SAndrew Rist * under the License. 19*449ab281SAndrew Rist * 20*449ab281SAndrew Rist *************************************************************/ 21*449ab281SAndrew Rist 22*449ab281SAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 25cdf0e10cSrcweir #include "precompiled_i18npool.hxx" 26cdf0e10cSrcweir 27cdf0e10cSrcweir #include <assert.h> 28cdf0e10cSrcweir #include <textconversion.hxx> 29cdf0e10cSrcweir #include <com/sun/star/i18n/TextConversionType.hpp> 30cdf0e10cSrcweir #include <com/sun/star/i18n/TextConversionOption.hpp> 31cdf0e10cSrcweir #include <com/sun/star/linguistic2/ConversionDirection.hpp> 32cdf0e10cSrcweir #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp> 33cdf0e10cSrcweir #include <i18nutil/x_rtl_ustring.h> 34cdf0e10cSrcweir 35cdf0e10cSrcweir using namespace com::sun::star::lang; 36cdf0e10cSrcweir using namespace com::sun::star::i18n; 37cdf0e10cSrcweir using namespace com::sun::star::linguistic2; 38cdf0e10cSrcweir using namespace com::sun::star::uno; 39cdf0e10cSrcweir using namespace rtl; 40cdf0e10cSrcweir 41cdf0e10cSrcweir namespace com { namespace sun { namespace star { namespace i18n { 42cdf0e10cSrcweir 43cdf0e10cSrcweir TextConversion_zh::TextConversion_zh( const Reference < XMultiServiceFactory >& xMSF ) 44cdf0e10cSrcweir { 45cdf0e10cSrcweir Reference < XInterface > xI; 46cdf0e10cSrcweir xI = xMSF->createInstance( 47cdf0e10cSrcweir OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" )); 48cdf0e10cSrcweir if ( xI.is() ) 49cdf0e10cSrcweir xI->queryInterface( getCppuType((const Reference< XConversionDictionaryList>*)0) ) >>= xCDL; 50cdf0e10cSrcweir 51cdf0e10cSrcweir implementationName = "com.sun.star.i18n.TextConversion_zh"; 52cdf0e10cSrcweir } 53cdf0e10cSrcweir 54cdf0e10cSrcweir sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index) 55cdf0e10cSrcweir { 56cdf0e10cSrcweir if (Data && Index) { 57cdf0e10cSrcweir sal_Unicode address = Index[ch>>8]; 58cdf0e10cSrcweir if (address != 0xFFFF) 59cdf0e10cSrcweir address = Data[address + (ch & 0xFF)]; 60cdf0e10cSrcweir return (address != 0xFFFF) ? address : ch; 61cdf0e10cSrcweir } else { 62cdf0e10cSrcweir return ch; 63cdf0e10cSrcweir } 64cdf0e10cSrcweir } 65cdf0e10cSrcweir 66cdf0e10cSrcweir OUString SAL_CALL 67cdf0e10cSrcweir TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions) 68cdf0e10cSrcweir { 69cdf0e10cSrcweir const sal_Unicode *Data; 70cdf0e10cSrcweir const sal_uInt16 *Index; 71cdf0e10cSrcweir 72cdf0e10cSrcweir if (toSChinese) { 73cdf0e10cSrcweir Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_T2S"))(); 74cdf0e10cSrcweir Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_T2S"))(); 75cdf0e10cSrcweir } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) { 76cdf0e10cSrcweir Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2V"))(); 77cdf0e10cSrcweir Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2V"))(); 78cdf0e10cSrcweir } else { 79cdf0e10cSrcweir Data = ((const sal_Unicode* (*)())getFunctionBySymbol("getSTC_CharData_S2T"))(); 80cdf0e10cSrcweir Index = ((const sal_uInt16* (*)())getFunctionBySymbol("getSTC_CharIndex_S2T"))(); 81cdf0e10cSrcweir } 82cdf0e10cSrcweir 83cdf0e10cSrcweir rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength ); // defined in x_rtl_ustring.h 84cdf0e10cSrcweir for (sal_Int32 i = 0; i < nLength; i++) 85cdf0e10cSrcweir newStr->buffer[i] = 86cdf0e10cSrcweir getOneCharConversion(aText[nStartPos+i], Data, Index); 87cdf0e10cSrcweir return OUString( newStr->buffer, nLength); 88cdf0e10cSrcweir } 89cdf0e10cSrcweir 90cdf0e10cSrcweir OUString SAL_CALL 91cdf0e10cSrcweir TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, sal_Bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset) 92cdf0e10cSrcweir { 93cdf0e10cSrcweir sal_Int32 dictLen = 0; 94cdf0e10cSrcweir sal_Int32 maxLen = 0; 95cdf0e10cSrcweir const sal_uInt16 *index; 96cdf0e10cSrcweir const sal_uInt16 *entry; 97cdf0e10cSrcweir const sal_Unicode *charData; 98cdf0e10cSrcweir const sal_uInt16 *charIndex; 99cdf0e10cSrcweir sal_Bool one2one=sal_True; 100cdf0e10cSrcweir 101cdf0e10cSrcweir const sal_Unicode *wordData = ((const sal_Unicode* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordData"))(dictLen); 102cdf0e10cSrcweir if (toSChinese) { 103cdf0e10cSrcweir index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen); 104cdf0e10cSrcweir entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_T2S"))(); 105cdf0e10cSrcweir charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_T2S"))(); 106cdf0e10cSrcweir charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_T2S"))(); 107cdf0e10cSrcweir } else { 108cdf0e10cSrcweir index = ((const sal_uInt16* (*)(sal_Int32&)) getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen); 109cdf0e10cSrcweir entry = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_WordEntry_S2T"))(); 110cdf0e10cSrcweir if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) { 111cdf0e10cSrcweir charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2V"))(); 112cdf0e10cSrcweir charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2V"))(); 113cdf0e10cSrcweir } else { 114cdf0e10cSrcweir charData = ((const sal_Unicode* (*)()) getFunctionBySymbol("getSTC_CharData_S2T"))(); 115cdf0e10cSrcweir charIndex = ((const sal_uInt16* (*)()) getFunctionBySymbol("getSTC_CharIndex_S2T"))(); 116cdf0e10cSrcweir } 117cdf0e10cSrcweir } 118cdf0e10cSrcweir 119cdf0e10cSrcweir if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion. 120cdf0e10cSrcweir return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions); 121cdf0e10cSrcweir 122cdf0e10cSrcweir rtl_uString * newStr = x_rtl_uString_new_WithLength( nLength * 2 ); // defined in x_rtl_ustring.h 123cdf0e10cSrcweir sal_Int32 currPos = 0, count = 0; 124cdf0e10cSrcweir while (currPos < nLength) { 125cdf0e10cSrcweir sal_Int32 len = nLength - currPos; 126cdf0e10cSrcweir sal_Bool found = sal_False; 127cdf0e10cSrcweir if (len > maxLen) 128cdf0e10cSrcweir len = maxLen; 129cdf0e10cSrcweir for (; len > 0 && ! found; len--) { 130cdf0e10cSrcweir OUString word = aText.copy(nStartPos + currPos, len); 131cdf0e10cSrcweir sal_Int32 current = 0; 132cdf0e10cSrcweir // user dictionary 133cdf0e10cSrcweir if (xCDL.is()) { 134cdf0e10cSrcweir Sequence < OUString > conversions; 135cdf0e10cSrcweir try { 136cdf0e10cSrcweir conversions = xCDL->queryConversions(word, 0, len, 137cdf0e10cSrcweir aLocale, ConversionDictionaryType::SCHINESE_TCHINESE, 138cdf0e10cSrcweir /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/, 139cdf0e10cSrcweir nConversionOptions); 140cdf0e10cSrcweir } 141cdf0e10cSrcweir catch ( NoSupportException & ) { 142cdf0e10cSrcweir // clear reference (when there is no user dictionary) in order 143cdf0e10cSrcweir // to not always have to catch this exception again 144cdf0e10cSrcweir // in further calls. (save time) 145cdf0e10cSrcweir xCDL = 0; 146cdf0e10cSrcweir } 147cdf0e10cSrcweir catch (...) { 148cdf0e10cSrcweir // catch all other exceptions to allow 149cdf0e10cSrcweir // querying the system dictionary in the next line 150cdf0e10cSrcweir } 151cdf0e10cSrcweir if (conversions.getLength() > 0) { 152cdf0e10cSrcweir if (offset.getLength() > 0) { 153cdf0e10cSrcweir if (word.getLength() != conversions[0].getLength()) 154cdf0e10cSrcweir one2one=sal_False; 155cdf0e10cSrcweir while (current < conversions[0].getLength()) { 156cdf0e10cSrcweir offset[count] = nStartPos + currPos + (current * 157cdf0e10cSrcweir word.getLength() / conversions[0].getLength()); 158cdf0e10cSrcweir newStr->buffer[count++] = conversions[0][current++]; 159cdf0e10cSrcweir } 160cdf0e10cSrcweir // offset[count-1] = nStartPos + currPos + word.getLength() - 1; 161cdf0e10cSrcweir } else { 162cdf0e10cSrcweir while (current < conversions[0].getLength()) 163cdf0e10cSrcweir newStr->buffer[count++] = conversions[0][current++]; 164cdf0e10cSrcweir } 165cdf0e10cSrcweir currPos += word.getLength(); 166cdf0e10cSrcweir found = sal_True; 167cdf0e10cSrcweir } 168cdf0e10cSrcweir } 169cdf0e10cSrcweir 170cdf0e10cSrcweir if (!found && index[len+1] - index[len] > 0) { 171cdf0e10cSrcweir sal_Int32 bottom = (sal_Int32) index[len]; 172cdf0e10cSrcweir sal_Int32 top = (sal_Int32) index[len+1] - 1; 173cdf0e10cSrcweir 174cdf0e10cSrcweir while (bottom <= top && !found) { 175cdf0e10cSrcweir current = (top + bottom) / 2; 176cdf0e10cSrcweir const sal_Int32 result = word.compareTo(wordData + entry[current]); 177cdf0e10cSrcweir if (result < 0) 178cdf0e10cSrcweir top = current - 1; 179cdf0e10cSrcweir else if (result > 0) 180cdf0e10cSrcweir bottom = current + 1; 181cdf0e10cSrcweir else { 182cdf0e10cSrcweir if (toSChinese) // Traditionary/Simplified conversion, 183cdf0e10cSrcweir for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ; 184cdf0e10cSrcweir else // Simplified/Traditionary conversion, forwards search for next word 185cdf0e10cSrcweir current = entry[current] + word.getLength() + 1; 186cdf0e10cSrcweir sal_Int32 start=current; 187cdf0e10cSrcweir if (offset.getLength() > 0) { 188cdf0e10cSrcweir if (word.getLength() != OUString(&wordData[current]).getLength()) 189cdf0e10cSrcweir one2one=sal_False; 190cdf0e10cSrcweir sal_Int32 convertedLength=OUString(&wordData[current]).getLength(); 191cdf0e10cSrcweir while (wordData[current]) { 192cdf0e10cSrcweir offset[count]=nStartPos + currPos + ((current-start) * 193cdf0e10cSrcweir word.getLength() / convertedLength); 194cdf0e10cSrcweir newStr->buffer[count++] = wordData[current++]; 195cdf0e10cSrcweir } 196cdf0e10cSrcweir // offset[count-1]=nStartPos + currPos + word.getLength() - 1; 197cdf0e10cSrcweir } else { 198cdf0e10cSrcweir while (wordData[current]) 199cdf0e10cSrcweir newStr->buffer[count++] = wordData[current++]; 200cdf0e10cSrcweir } 201cdf0e10cSrcweir currPos += word.getLength(); 202cdf0e10cSrcweir found = sal_True; 203cdf0e10cSrcweir } 204cdf0e10cSrcweir } 205cdf0e10cSrcweir } 206cdf0e10cSrcweir } 207cdf0e10cSrcweir if (!found) { 208cdf0e10cSrcweir if (offset.getLength() > 0) 209cdf0e10cSrcweir offset[count]=nStartPos+currPos; 210cdf0e10cSrcweir newStr->buffer[count++] = 211cdf0e10cSrcweir getOneCharConversion(aText[nStartPos+currPos], charData, charIndex); 212cdf0e10cSrcweir currPos++; 213cdf0e10cSrcweir } 214cdf0e10cSrcweir } 215cdf0e10cSrcweir if (offset.getLength() > 0) 216cdf0e10cSrcweir offset.realloc(one2one ? 0 : count); 217cdf0e10cSrcweir return OUString( newStr->buffer, count); 218cdf0e10cSrcweir } 219cdf0e10cSrcweir 220cdf0e10cSrcweir TextConversionResult SAL_CALL 221cdf0e10cSrcweir TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, 222cdf0e10cSrcweir const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions) 223cdf0e10cSrcweir throw( RuntimeException, IllegalArgumentException, NoSupportException ) 224cdf0e10cSrcweir { 225cdf0e10cSrcweir TextConversionResult result; 226cdf0e10cSrcweir 227cdf0e10cSrcweir result.Candidates.realloc(1); 228cdf0e10cSrcweir result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions); 229cdf0e10cSrcweir result.Boundary.startPos = nStartPos; 230cdf0e10cSrcweir result.Boundary.endPos = nStartPos + nLength; 231cdf0e10cSrcweir 232cdf0e10cSrcweir return result; 233cdf0e10cSrcweir } 234cdf0e10cSrcweir 235cdf0e10cSrcweir OUString SAL_CALL 236cdf0e10cSrcweir TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, 237cdf0e10cSrcweir const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions) 238cdf0e10cSrcweir throw( RuntimeException, IllegalArgumentException, NoSupportException ) 239cdf0e10cSrcweir { 240cdf0e10cSrcweir if (rLocale.Language.equalsAscii("zh") && 241cdf0e10cSrcweir ( nConversionType == TextConversionType::TO_SCHINESE || 242cdf0e10cSrcweir nConversionType == TextConversionType::TO_TCHINESE) ) { 243cdf0e10cSrcweir 244cdf0e10cSrcweir aLocale=rLocale; 245cdf0e10cSrcweir sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE; 246cdf0e10cSrcweir 247cdf0e10cSrcweir if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) 248cdf0e10cSrcweir // char to char dictionary 249cdf0e10cSrcweir return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions); 250cdf0e10cSrcweir else { 251cdf0e10cSrcweir Sequence <sal_Int32> offset; 252cdf0e10cSrcweir // word to word dictionary 253cdf0e10cSrcweir return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset); 254cdf0e10cSrcweir } 255cdf0e10cSrcweir } else 256cdf0e10cSrcweir throw NoSupportException(); // Conversion type is not supported in this service. 257cdf0e10cSrcweir } 258cdf0e10cSrcweir 259cdf0e10cSrcweir OUString SAL_CALL 260cdf0e10cSrcweir TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, 261cdf0e10cSrcweir const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset) 262cdf0e10cSrcweir throw( RuntimeException, IllegalArgumentException, NoSupportException ) 263cdf0e10cSrcweir { 264cdf0e10cSrcweir if (rLocale.Language.equalsAscii("zh") && 265cdf0e10cSrcweir ( nConversionType == TextConversionType::TO_SCHINESE || 266cdf0e10cSrcweir nConversionType == TextConversionType::TO_TCHINESE) ) { 267cdf0e10cSrcweir 268cdf0e10cSrcweir aLocale=rLocale; 269cdf0e10cSrcweir sal_Bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE; 270cdf0e10cSrcweir 271cdf0e10cSrcweir if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) { 272cdf0e10cSrcweir offset.realloc(0); 273cdf0e10cSrcweir // char to char dictionary 274cdf0e10cSrcweir return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions); 275cdf0e10cSrcweir } else { 276cdf0e10cSrcweir if (offset.getLength() < 2*nLength) 277cdf0e10cSrcweir offset.realloc(2*nLength); 278cdf0e10cSrcweir // word to word dictionary 279cdf0e10cSrcweir return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset); 280cdf0e10cSrcweir } 281cdf0e10cSrcweir } else 282cdf0e10cSrcweir throw NoSupportException(); // Conversion type is not supported in this service. 283cdf0e10cSrcweir } 284cdf0e10cSrcweir 285cdf0e10cSrcweir sal_Bool SAL_CALL 286cdf0e10cSrcweir TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ ) 287cdf0e10cSrcweir throw( RuntimeException, IllegalArgumentException, NoSupportException ) 288cdf0e10cSrcweir { 289cdf0e10cSrcweir return sal_False; 290cdf0e10cSrcweir } 291cdf0e10cSrcweir 292cdf0e10cSrcweir } } } } 293