1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_vcl.hxx" 30*cdf0e10cSrcweir #include "salcvt.hxx" 31*cdf0e10cSrcweir 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir SalConverterCache::SalConverterCache() 34*cdf0e10cSrcweir { 35*cdf0e10cSrcweir } 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir SalConverterCache* 38*cdf0e10cSrcweir SalConverterCache::GetInstance () 39*cdf0e10cSrcweir { 40*cdf0e10cSrcweir static SalConverterCache* pCvt = NULL; 41*cdf0e10cSrcweir if (pCvt == NULL) 42*cdf0e10cSrcweir pCvt = new SalConverterCache; 43*cdf0e10cSrcweir 44*cdf0e10cSrcweir return pCvt; 45*cdf0e10cSrcweir } 46*cdf0e10cSrcweir 47*cdf0e10cSrcweir SalConverterCache::~SalConverterCache() 48*cdf0e10cSrcweir { 49*cdf0e10cSrcweir } 50*cdf0e10cSrcweir 51*cdf0e10cSrcweir // ---> FIXME 52*cdf0e10cSrcweir #include <stdio.h> 53*cdf0e10cSrcweir // <--- 54*cdf0e10cSrcweir 55*cdf0e10cSrcweir rtl_UnicodeToTextConverter 56*cdf0e10cSrcweir SalConverterCache::GetU2TConverter( rtl_TextEncoding nEncoding ) 57*cdf0e10cSrcweir { 58*cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) ) 59*cdf0e10cSrcweir { 60*cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] ); 61*cdf0e10cSrcweir if ( rConverter.mpU2T == NULL ) 62*cdf0e10cSrcweir { 63*cdf0e10cSrcweir rConverter.mpU2T = 64*cdf0e10cSrcweir rtl_createUnicodeToTextConverter( nEncoding ); 65*cdf0e10cSrcweir // ---> FIXME 66*cdf0e10cSrcweir if ( rConverter.mpU2T == NULL ) 67*cdf0e10cSrcweir fprintf( stderr, "failed to create Unicode -> %i converter\n", nEncoding); 68*cdf0e10cSrcweir // <--- 69*cdf0e10cSrcweir } 70*cdf0e10cSrcweir return rConverter.mpU2T; 71*cdf0e10cSrcweir } 72*cdf0e10cSrcweir return NULL; 73*cdf0e10cSrcweir } 74*cdf0e10cSrcweir 75*cdf0e10cSrcweir rtl_TextToUnicodeConverter 76*cdf0e10cSrcweir SalConverterCache::GetT2UConverter( rtl_TextEncoding nEncoding ) 77*cdf0e10cSrcweir { 78*cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) ) 79*cdf0e10cSrcweir { 80*cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] ); 81*cdf0e10cSrcweir if ( rConverter.mpT2U == NULL ) 82*cdf0e10cSrcweir { 83*cdf0e10cSrcweir rConverter.mpT2U = 84*cdf0e10cSrcweir rtl_createTextToUnicodeConverter( nEncoding ); 85*cdf0e10cSrcweir // ---> FIXME 86*cdf0e10cSrcweir if ( rConverter.mpT2U == NULL ) 87*cdf0e10cSrcweir fprintf( stderr, "failed to create %i -> Unicode converter\n", nEncoding ); 88*cdf0e10cSrcweir // <--- 89*cdf0e10cSrcweir } 90*cdf0e10cSrcweir return rConverter.mpT2U; 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir return NULL; 93*cdf0e10cSrcweir } 94*cdf0e10cSrcweir 95*cdf0e10cSrcweir Bool 96*cdf0e10cSrcweir SalConverterCache::IsSingleByteEncoding( rtl_TextEncoding nEncoding ) 97*cdf0e10cSrcweir { 98*cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) ) 99*cdf0e10cSrcweir { 100*cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] ); 101*cdf0e10cSrcweir if ( ! rConverter.mbValid ) 102*cdf0e10cSrcweir { 103*cdf0e10cSrcweir rConverter.mbValid = True; 104*cdf0e10cSrcweir 105*cdf0e10cSrcweir rtl_TextEncodingInfo aTextEncInfo; 106*cdf0e10cSrcweir aTextEncInfo.StructSize = sizeof( aTextEncInfo ); 107*cdf0e10cSrcweir rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ); 108*cdf0e10cSrcweir 109*cdf0e10cSrcweir if ( aTextEncInfo.MinimumCharSize == aTextEncInfo.MaximumCharSize 110*cdf0e10cSrcweir && aTextEncInfo.MinimumCharSize == 1) 111*cdf0e10cSrcweir rConverter.mbSingleByteEncoding = True; 112*cdf0e10cSrcweir else 113*cdf0e10cSrcweir rConverter.mbSingleByteEncoding = False; 114*cdf0e10cSrcweir } 115*cdf0e10cSrcweir 116*cdf0e10cSrcweir return rConverter.mbSingleByteEncoding; 117*cdf0e10cSrcweir } 118*cdf0e10cSrcweir return False; 119*cdf0e10cSrcweir } 120*cdf0e10cSrcweir 121*cdf0e10cSrcweir // check whether the character set nEncoding contains the unicode 122*cdf0e10cSrcweir // code point nChar. This list has been compiled from the according 123*cdf0e10cSrcweir // ttmap files in /usr/openwin/lib/X11/fonts/TrueType/ttmap/ 124*cdf0e10cSrcweir Bool 125*cdf0e10cSrcweir SalConverterCache::EncodingHasChar( rtl_TextEncoding nEncoding, 126*cdf0e10cSrcweir sal_Unicode nChar ) 127*cdf0e10cSrcweir { 128*cdf0e10cSrcweir Bool bMatch = False; 129*cdf0e10cSrcweir 130*cdf0e10cSrcweir switch ( nEncoding ) 131*cdf0e10cSrcweir { 132*cdf0e10cSrcweir case RTL_TEXTENCODING_DONTKNOW: 133*cdf0e10cSrcweir bMatch = False; 134*cdf0e10cSrcweir break; 135*cdf0e10cSrcweir 136*cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1252: 137*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_1: 138*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_15: 139*cdf0e10cSrcweir // handle iso8859-15 and iso8859-1 the same (and both with euro) 140*cdf0e10cSrcweir // handle them also like ms1252 141*cdf0e10cSrcweir // this is due to the fact that so many X fonts say they are iso8859-1 142*cdf0e10cSrcweir // but have the other glyphs anyway because they are really ms1252 143*cdf0e10cSrcweir bMatch = ( /*nChar >= 0x0000 &&*/ nChar <= 0x00ff ) 144*cdf0e10cSrcweir || ( nChar == 0x20ac ) 145*cdf0e10cSrcweir || ( nChar == 0x201a ) 146*cdf0e10cSrcweir || ( nChar == 0x0192 ) 147*cdf0e10cSrcweir || ( nChar == 0x201e ) 148*cdf0e10cSrcweir || ( nChar == 0x2026 ) 149*cdf0e10cSrcweir || ( nChar == 0x2020 ) 150*cdf0e10cSrcweir || ( nChar == 0x2021 ) 151*cdf0e10cSrcweir || ( nChar == 0x02c6 ) 152*cdf0e10cSrcweir || ( nChar == 0x2030 ) 153*cdf0e10cSrcweir || ( nChar == 0x0160 ) 154*cdf0e10cSrcweir || ( nChar == 0x2039 ) 155*cdf0e10cSrcweir || ( nChar == 0x0152 ) 156*cdf0e10cSrcweir || ( nChar == 0x017d ) 157*cdf0e10cSrcweir || ( nChar == 0x2018 ) 158*cdf0e10cSrcweir || ( nChar == 0x2019 ) 159*cdf0e10cSrcweir || ( nChar == 0x201c ) 160*cdf0e10cSrcweir || ( nChar == 0x201d ) 161*cdf0e10cSrcweir || ( nChar == 0x2022 ) 162*cdf0e10cSrcweir || ( nChar == 0x2013 ) 163*cdf0e10cSrcweir || ( nChar == 0x2014 ) 164*cdf0e10cSrcweir || ( nChar == 0x02dc ) 165*cdf0e10cSrcweir || ( nChar == 0x2122 ) 166*cdf0e10cSrcweir || ( nChar == 0x0161 ) 167*cdf0e10cSrcweir || ( nChar == 0x203a ) 168*cdf0e10cSrcweir || ( nChar == 0x0153 ) 169*cdf0e10cSrcweir || ( nChar == 0x017e ) 170*cdf0e10cSrcweir || ( nChar == 0x0178 ) 171*cdf0e10cSrcweir ; 172*cdf0e10cSrcweir break; 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_2: 175*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 176*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e ) 177*cdf0e10cSrcweir || ( nChar >= 0x02c7 && nChar <= 0x02dd ); 178*cdf0e10cSrcweir break; 179*cdf0e10cSrcweir 180*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_4: 181*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 182*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e ) 183*cdf0e10cSrcweir || ( nChar >= 0x02c7 && nChar <= 0x02db ); 184*cdf0e10cSrcweir break; 185*cdf0e10cSrcweir 186*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_5: 187*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 188*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00ad ) 189*cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x045f ) 190*cdf0e10cSrcweir || ( nChar == 0x2116 ); 191*cdf0e10cSrcweir break; 192*cdf0e10cSrcweir 193*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_6: 194*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 195*cdf0e10cSrcweir || ( nChar >= 0x0600 && nChar <= 0x06ff ) 196*cdf0e10cSrcweir || ( nChar >= 0xfb50 && nChar <= 0xfffe ); 197*cdf0e10cSrcweir break; 198*cdf0e10cSrcweir 199*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_7: 200*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 201*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00bd ) 202*cdf0e10cSrcweir || ( nChar == 0x02bd ) 203*cdf0e10cSrcweir || ( nChar >= 0x0384 && nChar <= 0x03ce ) 204*cdf0e10cSrcweir || ( nChar >= 0x2014 && nChar <= 0x2019 ); 205*cdf0e10cSrcweir break; 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_8: 208*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 209*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00f7 ) 210*cdf0e10cSrcweir || ( nChar >= 0x05d0 && nChar <= 0x05ea ) 211*cdf0e10cSrcweir || ( nChar == 0x2017 ); 212*cdf0e10cSrcweir break; 213*cdf0e10cSrcweir 214*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_9: 215*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 216*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x015f ); 217*cdf0e10cSrcweir break; 218*cdf0e10cSrcweir 219*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_13: 220*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 221*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e ) 222*cdf0e10cSrcweir || ( nChar >= 0x2019 && nChar <= 0x201e ); 223*cdf0e10cSrcweir break; 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir /* real case for RTL_TEXTENCODING_ISO_8859_15 226*cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_15: 227*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 228*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00ff ) 229*cdf0e10cSrcweir || ( nChar >= 0x0152 && nChar <= 0x017e ) 230*cdf0e10cSrcweir || ( nChar == 0x20ac ); 231*cdf0e10cSrcweir break; 232*cdf0e10cSrcweir */ 233*cdf0e10cSrcweir 234*cdf0e10cSrcweir case RTL_TEXTENCODING_JIS_X_0201: 235*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 236*cdf0e10cSrcweir || ( nChar >= 0xff61 && nChar <= 0xff9f ); 237*cdf0e10cSrcweir break; 238*cdf0e10cSrcweir 239*cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1251: 240*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 241*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00bb ) 242*cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x045f ) 243*cdf0e10cSrcweir || ( nChar >= 0x0490 && nChar <= 0x0491 ) 244*cdf0e10cSrcweir || ( nChar >= 0x2013 && nChar <= 0x203a ) 245*cdf0e10cSrcweir || ( nChar >= 0x2116 && nChar <= 0x2122 ); 246*cdf0e10cSrcweir break; 247*cdf0e10cSrcweir 248*cdf0e10cSrcweir case RTL_TEXTENCODING_KOI8_R: 249*cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 250*cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00b7 ) 251*cdf0e10cSrcweir || ( nChar == 0x00f7 ) 252*cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x0451 ) 253*cdf0e10cSrcweir || ( nChar >= 0x2219 && nChar <= 0x221a ) 254*cdf0e10cSrcweir || ( nChar >= 0x2248 && nChar <= 0x2265 ) 255*cdf0e10cSrcweir || ( nChar >= 0x2320 && nChar <= 0x2321 ) 256*cdf0e10cSrcweir || ( nChar >= 0x2500 && nChar <= 0x25a0 ); 257*cdf0e10cSrcweir break; 258*cdf0e10cSrcweir 259*cdf0e10cSrcweir case RTL_TEXTENCODING_UNICODE: 260*cdf0e10cSrcweir bMatch = True; 261*cdf0e10cSrcweir break; 262*cdf0e10cSrcweir 263*cdf0e10cSrcweir case RTL_TEXTENCODING_EUC_KR: 264*cdf0e10cSrcweir case RTL_TEXTENCODING_BIG5: 265*cdf0e10cSrcweir case RTL_TEXTENCODING_GBK: 266*cdf0e10cSrcweir case RTL_TEXTENCODING_GB_2312: 267*cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1361: 268*cdf0e10cSrcweir case RTL_TEXTENCODING_JIS_X_0208: 269*cdf0e10cSrcweir 270*cdf0e10cSrcweir // XXX Big5 and Korean EUC contain Ascii chars, but Solaris 271*cdf0e10cSrcweir // *-big5-1 and *-ksc5601.1992-3 fonts dont, in general CJK fonts 272*cdf0e10cSrcweir // are monospaced, so dont trust them for latin chars 273*cdf0e10cSrcweir if (nChar <= 0xFF) 274*cdf0e10cSrcweir { 275*cdf0e10cSrcweir bMatch = False; 276*cdf0e10cSrcweir break; 277*cdf0e10cSrcweir } 278*cdf0e10cSrcweir 279*cdf0e10cSrcweir default: 280*cdf0e10cSrcweir // XXX really convert the unicode char into the encoding 281*cdf0e10cSrcweir // and check for conversion errors, this is expensive ! 282*cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter; 283*cdf0e10cSrcweir rtl_UnicodeToTextContext aContext; 284*cdf0e10cSrcweir 285*cdf0e10cSrcweir aConverter = GetU2TConverter(nEncoding); 286*cdf0e10cSrcweir aContext = rtl_createUnicodeToTextContext( aConverter ); 287*cdf0e10cSrcweir 288*cdf0e10cSrcweir // ---> FIXME 289*cdf0e10cSrcweir if ( aConverter == NULL ) 290*cdf0e10cSrcweir return False; 291*cdf0e10cSrcweir // <--- 292*cdf0e10cSrcweir 293*cdf0e10cSrcweir sal_Char pConversionBuffer[ 32 ]; 294*cdf0e10cSrcweir sal_uInt32 nConversionInfo; 295*cdf0e10cSrcweir sal_Size nConvertedChars; 296*cdf0e10cSrcweir sal_Size nSize; 297*cdf0e10cSrcweir 298*cdf0e10cSrcweir nSize = rtl_convertUnicodeToText( aConverter, aContext, 299*cdf0e10cSrcweir &nChar, 1, pConversionBuffer, sizeof(pConversionBuffer), 300*cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 301*cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR, 302*cdf0e10cSrcweir &nConversionInfo, &nConvertedChars ); 303*cdf0e10cSrcweir 304*cdf0e10cSrcweir rtl_destroyUnicodeToTextContext( aConverter, aContext ); 305*cdf0e10cSrcweir 306*cdf0e10cSrcweir bMatch = (nConvertedChars == 1) 307*cdf0e10cSrcweir && (nSize == 1 || nSize == 2) // XXX Fix me this is a hack 308*cdf0e10cSrcweir && ((nConversionInfo & RTL_UNICODETOTEXT_INFO_ERROR) == 0); 309*cdf0e10cSrcweir break; 310*cdf0e10cSrcweir } 311*cdf0e10cSrcweir 312*cdf0e10cSrcweir return bMatch; 313*cdf0e10cSrcweir } 314*cdf0e10cSrcweir 315*cdf0e10cSrcweir // wrapper for rtl_convertUnicodeToText that handles the usual cases for 316*cdf0e10cSrcweir // textconversion in drawtext and gettextwidth routines 317*cdf0e10cSrcweir sal_Size 318*cdf0e10cSrcweir SalConverterCache::ConvertStringUTF16( const sal_Unicode *pText, int nTextLen, 319*cdf0e10cSrcweir sal_Char *pBuffer, sal_Size nBufferSize, rtl_TextEncoding nEncoding ) 320*cdf0e10cSrcweir { 321*cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter = GetU2TConverter(nEncoding); 322*cdf0e10cSrcweir 323*cdf0e10cSrcweir const sal_uInt32 nCvtFlags = 324*cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE 325*cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK 326*cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK ; 327*cdf0e10cSrcweir sal_uInt32 nCvtInfo; 328*cdf0e10cSrcweir sal_Size nCvtChars; 329*cdf0e10cSrcweir 330*cdf0e10cSrcweir rtl_UnicodeToTextContext aContext = 331*cdf0e10cSrcweir rtl_createUnicodeToTextContext( aConverter ); 332*cdf0e10cSrcweir 333*cdf0e10cSrcweir sal_Size nSize = rtl_convertUnicodeToText( aConverter, aContext, 334*cdf0e10cSrcweir pText, nTextLen, pBuffer, nBufferSize, 335*cdf0e10cSrcweir nCvtFlags, &nCvtInfo, &nCvtChars ); 336*cdf0e10cSrcweir 337*cdf0e10cSrcweir rtl_destroyUnicodeToTextContext( aConverter, aContext ); 338*cdf0e10cSrcweir 339*cdf0e10cSrcweir return nSize; 340*cdf0e10cSrcweir } 341*cdf0e10cSrcweir 342