1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_vcl.hxx" 30 #include "salcvt.hxx" 31 32 33 SalConverterCache::SalConverterCache() 34 { 35 } 36 37 SalConverterCache* 38 SalConverterCache::GetInstance () 39 { 40 static SalConverterCache* pCvt = NULL; 41 if (pCvt == NULL) 42 pCvt = new SalConverterCache; 43 44 return pCvt; 45 } 46 47 SalConverterCache::~SalConverterCache() 48 { 49 } 50 51 // ---> FIXME 52 #include <stdio.h> 53 // <--- 54 55 rtl_UnicodeToTextConverter 56 SalConverterCache::GetU2TConverter( rtl_TextEncoding nEncoding ) 57 { 58 if( rtl_isOctetTextEncoding( nEncoding ) ) 59 { 60 ConverterT& rConverter( m_aConverters[ nEncoding ] ); 61 if ( rConverter.mpU2T == NULL ) 62 { 63 rConverter.mpU2T = 64 rtl_createUnicodeToTextConverter( nEncoding ); 65 // ---> FIXME 66 if ( rConverter.mpU2T == NULL ) 67 fprintf( stderr, "failed to create Unicode -> %i converter\n", nEncoding); 68 // <--- 69 } 70 return rConverter.mpU2T; 71 } 72 return NULL; 73 } 74 75 rtl_TextToUnicodeConverter 76 SalConverterCache::GetT2UConverter( rtl_TextEncoding nEncoding ) 77 { 78 if( rtl_isOctetTextEncoding( nEncoding ) ) 79 { 80 ConverterT& rConverter( m_aConverters[ nEncoding ] ); 81 if ( rConverter.mpT2U == NULL ) 82 { 83 rConverter.mpT2U = 84 rtl_createTextToUnicodeConverter( nEncoding ); 85 // ---> FIXME 86 if ( rConverter.mpT2U == NULL ) 87 fprintf( stderr, "failed to create %i -> Unicode converter\n", nEncoding ); 88 // <--- 89 } 90 return rConverter.mpT2U; 91 } 92 return NULL; 93 } 94 95 Bool 96 SalConverterCache::IsSingleByteEncoding( rtl_TextEncoding nEncoding ) 97 { 98 if( rtl_isOctetTextEncoding( nEncoding ) ) 99 { 100 ConverterT& rConverter( m_aConverters[ nEncoding ] ); 101 if ( ! rConverter.mbValid ) 102 { 103 rConverter.mbValid = True; 104 105 rtl_TextEncodingInfo aTextEncInfo; 106 aTextEncInfo.StructSize = sizeof( aTextEncInfo ); 107 rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ); 108 109 if ( aTextEncInfo.MinimumCharSize == aTextEncInfo.MaximumCharSize 110 && aTextEncInfo.MinimumCharSize == 1) 111 rConverter.mbSingleByteEncoding = True; 112 else 113 rConverter.mbSingleByteEncoding = False; 114 } 115 116 return rConverter.mbSingleByteEncoding; 117 } 118 return False; 119 } 120 121 // check whether the character set nEncoding contains the unicode 122 // code point nChar. This list has been compiled from the according 123 // ttmap files in /usr/openwin/lib/X11/fonts/TrueType/ttmap/ 124 Bool 125 SalConverterCache::EncodingHasChar( rtl_TextEncoding nEncoding, 126 sal_Unicode nChar ) 127 { 128 Bool bMatch = False; 129 130 switch ( nEncoding ) 131 { 132 case RTL_TEXTENCODING_DONTKNOW: 133 bMatch = False; 134 break; 135 136 case RTL_TEXTENCODING_MS_1252: 137 case RTL_TEXTENCODING_ISO_8859_1: 138 case RTL_TEXTENCODING_ISO_8859_15: 139 // handle iso8859-15 and iso8859-1 the same (and both with euro) 140 // handle them also like ms1252 141 // this is due to the fact that so many X fonts say they are iso8859-1 142 // but have the other glyphs anyway because they are really ms1252 143 bMatch = ( /*nChar >= 0x0000 &&*/ nChar <= 0x00ff ) 144 || ( nChar == 0x20ac ) 145 || ( nChar == 0x201a ) 146 || ( nChar == 0x0192 ) 147 || ( nChar == 0x201e ) 148 || ( nChar == 0x2026 ) 149 || ( nChar == 0x2020 ) 150 || ( nChar == 0x2021 ) 151 || ( nChar == 0x02c6 ) 152 || ( nChar == 0x2030 ) 153 || ( nChar == 0x0160 ) 154 || ( nChar == 0x2039 ) 155 || ( nChar == 0x0152 ) 156 || ( nChar == 0x017d ) 157 || ( nChar == 0x2018 ) 158 || ( nChar == 0x2019 ) 159 || ( nChar == 0x201c ) 160 || ( nChar == 0x201d ) 161 || ( nChar == 0x2022 ) 162 || ( nChar == 0x2013 ) 163 || ( nChar == 0x2014 ) 164 || ( nChar == 0x02dc ) 165 || ( nChar == 0x2122 ) 166 || ( nChar == 0x0161 ) 167 || ( nChar == 0x203a ) 168 || ( nChar == 0x0153 ) 169 || ( nChar == 0x017e ) 170 || ( nChar == 0x0178 ) 171 ; 172 break; 173 174 case RTL_TEXTENCODING_ISO_8859_2: 175 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 176 || ( nChar >= 0x00a0 && nChar <= 0x017e ) 177 || ( nChar >= 0x02c7 && nChar <= 0x02dd ); 178 break; 179 180 case RTL_TEXTENCODING_ISO_8859_4: 181 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 182 || ( nChar >= 0x00a0 && nChar <= 0x017e ) 183 || ( nChar >= 0x02c7 && nChar <= 0x02db ); 184 break; 185 186 case RTL_TEXTENCODING_ISO_8859_5: 187 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 188 || ( nChar >= 0x00a0 && nChar <= 0x00ad ) 189 || ( nChar >= 0x0401 && nChar <= 0x045f ) 190 || ( nChar == 0x2116 ); 191 break; 192 193 case RTL_TEXTENCODING_ISO_8859_6: 194 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 195 || ( nChar >= 0x0600 && nChar <= 0x06ff ) 196 || ( nChar >= 0xfb50 && nChar <= 0xfffe ); 197 break; 198 199 case RTL_TEXTENCODING_ISO_8859_7: 200 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 201 || ( nChar >= 0x00a0 && nChar <= 0x00bd ) 202 || ( nChar == 0x02bd ) 203 || ( nChar >= 0x0384 && nChar <= 0x03ce ) 204 || ( nChar >= 0x2014 && nChar <= 0x2019 ); 205 break; 206 207 case RTL_TEXTENCODING_ISO_8859_8: 208 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 209 || ( nChar >= 0x00a0 && nChar <= 0x00f7 ) 210 || ( nChar >= 0x05d0 && nChar <= 0x05ea ) 211 || ( nChar == 0x2017 ); 212 break; 213 214 case RTL_TEXTENCODING_ISO_8859_9: 215 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 216 || ( nChar >= 0x00a0 && nChar <= 0x015f ); 217 break; 218 219 case RTL_TEXTENCODING_ISO_8859_13: 220 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 221 || ( nChar >= 0x00a0 && nChar <= 0x017e ) 222 || ( nChar >= 0x2019 && nChar <= 0x201e ); 223 break; 224 225 /* real case for RTL_TEXTENCODING_ISO_8859_15 226 case RTL_TEXTENCODING_ISO_8859_15: 227 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 228 || ( nChar >= 0x00a0 && nChar <= 0x00ff ) 229 || ( nChar >= 0x0152 && nChar <= 0x017e ) 230 || ( nChar == 0x20ac ); 231 break; 232 */ 233 234 case RTL_TEXTENCODING_JIS_X_0201: 235 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 236 || ( nChar >= 0xff61 && nChar <= 0xff9f ); 237 break; 238 239 case RTL_TEXTENCODING_MS_1251: 240 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 241 || ( nChar >= 0x00a0 && nChar <= 0x00bb ) 242 || ( nChar >= 0x0401 && nChar <= 0x045f ) 243 || ( nChar >= 0x0490 && nChar <= 0x0491 ) 244 || ( nChar >= 0x2013 && nChar <= 0x203a ) 245 || ( nChar >= 0x2116 && nChar <= 0x2122 ); 246 break; 247 248 case RTL_TEXTENCODING_KOI8_R: 249 bMatch = ( nChar >= 0x0020 && nChar <= 0x007e ) 250 || ( nChar >= 0x00a0 && nChar <= 0x00b7 ) 251 || ( nChar == 0x00f7 ) 252 || ( nChar >= 0x0401 && nChar <= 0x0451 ) 253 || ( nChar >= 0x2219 && nChar <= 0x221a ) 254 || ( nChar >= 0x2248 && nChar <= 0x2265 ) 255 || ( nChar >= 0x2320 && nChar <= 0x2321 ) 256 || ( nChar >= 0x2500 && nChar <= 0x25a0 ); 257 break; 258 259 case RTL_TEXTENCODING_UNICODE: 260 bMatch = True; 261 break; 262 263 case RTL_TEXTENCODING_EUC_KR: 264 case RTL_TEXTENCODING_BIG5: 265 case RTL_TEXTENCODING_GBK: 266 case RTL_TEXTENCODING_GB_2312: 267 case RTL_TEXTENCODING_MS_1361: 268 case RTL_TEXTENCODING_JIS_X_0208: 269 270 // XXX Big5 and Korean EUC contain Ascii chars, but Solaris 271 // *-big5-1 and *-ksc5601.1992-3 fonts dont, in general CJK fonts 272 // are monospaced, so dont trust them for latin chars 273 if (nChar <= 0xFF) 274 { 275 bMatch = False; 276 break; 277 } 278 279 default: 280 // XXX really convert the unicode char into the encoding 281 // and check for conversion errors, this is expensive ! 282 rtl_UnicodeToTextConverter aConverter; 283 rtl_UnicodeToTextContext aContext; 284 285 aConverter = GetU2TConverter(nEncoding); 286 aContext = rtl_createUnicodeToTextContext( aConverter ); 287 288 // ---> FIXME 289 if ( aConverter == NULL ) 290 return False; 291 // <--- 292 293 sal_Char pConversionBuffer[ 32 ]; 294 sal_uInt32 nConversionInfo; 295 sal_Size nConvertedChars; 296 sal_Size nSize; 297 298 nSize = rtl_convertUnicodeToText( aConverter, aContext, 299 &nChar, 1, pConversionBuffer, sizeof(pConversionBuffer), 300 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 301 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR, 302 &nConversionInfo, &nConvertedChars ); 303 304 rtl_destroyUnicodeToTextContext( aConverter, aContext ); 305 306 bMatch = (nConvertedChars == 1) 307 && (nSize == 1 || nSize == 2) // XXX Fix me this is a hack 308 && ((nConversionInfo & RTL_UNICODETOTEXT_INFO_ERROR) == 0); 309 break; 310 } 311 312 return bMatch; 313 } 314 315 // wrapper for rtl_convertUnicodeToText that handles the usual cases for 316 // textconversion in drawtext and gettextwidth routines 317 sal_Size 318 SalConverterCache::ConvertStringUTF16( const sal_Unicode *pText, int nTextLen, 319 sal_Char *pBuffer, sal_Size nBufferSize, rtl_TextEncoding nEncoding ) 320 { 321 rtl_UnicodeToTextConverter aConverter = GetU2TConverter(nEncoding); 322 323 const sal_uInt32 nCvtFlags = 324 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE 325 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK 326 | RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK ; 327 sal_uInt32 nCvtInfo; 328 sal_Size nCvtChars; 329 330 rtl_UnicodeToTextContext aContext = 331 rtl_createUnicodeToTextContext( aConverter ); 332 333 sal_Size nSize = rtl_convertUnicodeToText( aConverter, aContext, 334 pText, nTextLen, pBuffer, nBufferSize, 335 nCvtFlags, &nCvtInfo, &nCvtChars ); 336 337 rtl_destroyUnicodeToTextContext( aConverter, aContext ); 338 339 return nSize; 340 } 341 342