1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #define INCL_WIN 29 #include "svpm.h" 30 31 #include <osl/nlsupport.h> 32 #include <osl/diagnose.h> 33 #include <osl/process.h> 34 #include <rtl/memory.h> 35 36 #include <string.h> 37 38 /***************************************************************************** 39 typedefs 40 *****************************************************************************/ 41 42 43 typedef struct { 44 const char *key; 45 const rtl_TextEncoding value; 46 } _pair; 47 48 49 /***************************************************************************** 50 compare function for binary search 51 *****************************************************************************/ 52 53 static int 54 _pair_compare (const char *key, const _pair *pair) 55 { 56 int result = rtl_str_compareIgnoreAsciiCase( key, pair->key ); 57 return result; 58 } 59 60 /***************************************************************************** 61 binary search on encoding tables 62 *****************************************************************************/ 63 64 static const _pair* 65 _pair_search (const char *key, const _pair *base, unsigned int member ) 66 { 67 unsigned int lower = 0; 68 unsigned int upper = member; 69 unsigned int current; 70 int comparison; 71 72 /* check for validity of input */ 73 if ( (key == NULL) || (base == NULL) || (member == 0) ) 74 return NULL; 75 76 /* binary search */ 77 while ( lower < upper ) 78 { 79 current = (lower + upper) / 2; 80 comparison = _pair_compare( key, base + current ); 81 if (comparison < 0) 82 upper = current; 83 else if (comparison > 0) 84 lower = current + 1; 85 else 86 return base + current; 87 } 88 89 return NULL; 90 } 91 92 93 /***************************************************************************** 94 convert rtl_Locale to locale string 95 *****************************************************************************/ 96 97 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n ) 98 { 99 /* check if a valid locale is specified */ 100 if( pLocale && pLocale->Language && (pLocale->Language->length == 2) ) 101 { 102 size_t offset = 0; 103 104 /* convert language code to ascii */ 105 { 106 rtl_String *pLanguage = NULL; 107 108 rtl_uString2String( &pLanguage, 109 pLocale->Language->buffer, pLocale->Language->length, 110 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 111 112 if( pLanguage->length < n ) 113 { 114 strcpy( buffer, pLanguage->buffer ); 115 offset = pLanguage->length; 116 } 117 118 rtl_string_release( pLanguage ); 119 } 120 121 /* convert country code to ascii */ 122 if( pLocale->Country && (pLocale->Country->length == 2) ) 123 { 124 rtl_String *pCountry = NULL; 125 126 rtl_uString2String( &pCountry, 127 pLocale->Country->buffer, pLocale->Country->length, 128 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 129 130 if( offset + pCountry->length + 1 < n ) 131 { 132 strcpy( buffer + offset++, "_" ); 133 strcpy( buffer + offset, pCountry->buffer ); 134 offset += pCountry->length; 135 } 136 137 rtl_string_release( pCountry ); 138 } 139 140 /* convert variant to ascii - check if there is enough space for the variant string */ 141 if( pLocale->Variant && pLocale->Variant->length && 142 ( pLocale->Variant->length < n - 6 ) ) 143 { 144 rtl_String *pVariant = NULL; 145 146 rtl_uString2String( &pVariant, 147 pLocale->Variant->buffer, pLocale->Variant->length, 148 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS ); 149 150 if( offset + pVariant->length + 1 < n ) 151 { 152 strcpy( buffer + offset, pVariant->buffer ); 153 offset += pVariant->length; 154 } 155 156 rtl_string_release( pVariant ); 157 } 158 159 return buffer; 160 } 161 162 return NULL; 163 } 164 165 /***************************************************************************** 166 convert locale string to rtl_Locale 167 *****************************************************************************/ 168 169 static rtl_Locale * _parse_locale( const char * locale ) 170 { 171 static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 }; 172 173 /* check if locale contains a valid string */ 174 if( locale ) 175 { 176 size_t len = strlen( locale ); 177 178 if( len >= 2 ) 179 { 180 rtl_uString * pLanguage = NULL; 181 rtl_uString * pCountry = NULL; 182 rtl_uString * pVariant = NULL; 183 184 size_t offset = 2; 185 186 /* convert language code to unicode */ 187 rtl_string2UString( &pLanguage, locale, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 188 OSL_ASSERT(pLanguage != NULL); 189 190 /* convert country code to unicode */ 191 if( len >= 5 && '_' == locale[2] ) 192 { 193 rtl_string2UString( &pCountry, locale + 3, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 194 OSL_ASSERT(pCountry != NULL); 195 offset = 5; 196 } 197 198 /* convert variant code to unicode - do not rely on "." as delimiter */ 199 if( len > offset ) { 200 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS ); 201 OSL_ASSERT(pVariant != NULL); 202 } 203 204 rtl_Locale * ret = rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 ); 205 206 if (pVariant) rtl_uString_release(pVariant); 207 if (pCountry) rtl_uString_release(pCountry); 208 if (pLanguage) rtl_uString_release(pLanguage); 209 210 return ret; 211 } 212 else 213 return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 ); 214 } 215 216 return NULL; 217 } 218 219 /* 220 * This implementation of osl_getTextEncodingFromLocale maps 221 * from the ISO language codes. 222 */ 223 224 const _pair _full_locale_list[] = { 225 { "ja_JP.eucJP", RTL_TEXTENCODING_EUC_JP }, 226 { "ja_JP.EUC", RTL_TEXTENCODING_EUC_JP }, 227 { "ko_KR.EUC", RTL_TEXTENCODING_EUC_KR }, 228 { "zh_CN.EUC", RTL_TEXTENCODING_EUC_CN }, 229 { "zh_TW.EUC", RTL_TEXTENCODING_EUC_TW } 230 }; 231 232 const _pair _locale_extension_list[] = { 233 { "big5", RTL_TEXTENCODING_BIG5 }, 234 { "big5hk", RTL_TEXTENCODING_BIG5_HKSCS }, 235 { "gb18030", RTL_TEXTENCODING_GB_18030 }, 236 { "euc", RTL_TEXTENCODING_EUC_JP }, 237 { "iso8859-1", RTL_TEXTENCODING_ISO_8859_1 }, 238 { "iso8859-10", RTL_TEXTENCODING_ISO_8859_10 }, 239 { "iso8859-13", RTL_TEXTENCODING_ISO_8859_13 }, 240 { "iso8859-14", RTL_TEXTENCODING_ISO_8859_14 }, 241 { "iso8859-15", RTL_TEXTENCODING_ISO_8859_15 }, 242 { "iso8859-2", RTL_TEXTENCODING_ISO_8859_2 }, 243 { "iso8859-3", RTL_TEXTENCODING_ISO_8859_3 }, 244 { "iso8859-4", RTL_TEXTENCODING_ISO_8859_4 }, 245 { "iso8859-5", RTL_TEXTENCODING_ISO_8859_5 }, 246 { "iso8859-6", RTL_TEXTENCODING_ISO_8859_6 }, 247 { "iso8859-7", RTL_TEXTENCODING_ISO_8859_7 }, 248 { "iso8859-8", RTL_TEXTENCODING_ISO_8859_8 }, 249 { "iso8859-9", RTL_TEXTENCODING_ISO_8859_9 }, 250 { "koi8-r", RTL_TEXTENCODING_KOI8_R }, 251 { "koi8-u", RTL_TEXTENCODING_KOI8_U }, 252 { "pck", RTL_TEXTENCODING_MS_932 }, 253 #if (0) 254 { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW }, 255 #endif 256 { "utf-16", RTL_TEXTENCODING_UNICODE }, 257 { "utf-7", RTL_TEXTENCODING_UTF7 }, 258 { "utf-8", RTL_TEXTENCODING_UTF8 } 259 }; 260 261 const _pair _iso_language_list[] = { 262 { "af", RTL_TEXTENCODING_ISO_8859_1 }, 263 { "ar", RTL_TEXTENCODING_ISO_8859_6 }, 264 { "az", RTL_TEXTENCODING_ISO_8859_9 }, 265 { "be", RTL_TEXTENCODING_ISO_8859_5 }, 266 { "bg", RTL_TEXTENCODING_ISO_8859_5 }, 267 { "ca", RTL_TEXTENCODING_ISO_8859_1 }, 268 { "cs", RTL_TEXTENCODING_ISO_8859_2 }, 269 { "da", RTL_TEXTENCODING_ISO_8859_1 }, 270 { "de", RTL_TEXTENCODING_ISO_8859_1 }, 271 { "el", RTL_TEXTENCODING_ISO_8859_7 }, 272 { "en", RTL_TEXTENCODING_ISO_8859_1 }, 273 { "es", RTL_TEXTENCODING_ISO_8859_1 }, 274 { "et", RTL_TEXTENCODING_ISO_8859_4 }, 275 { "eu", RTL_TEXTENCODING_ISO_8859_1 }, 276 { "fa", RTL_TEXTENCODING_ISO_8859_6 }, 277 { "fi", RTL_TEXTENCODING_ISO_8859_1 }, 278 { "fo", RTL_TEXTENCODING_ISO_8859_1 }, 279 { "fr", RTL_TEXTENCODING_ISO_8859_1 }, 280 { "gr", RTL_TEXTENCODING_ISO_8859_7 }, 281 { "he", RTL_TEXTENCODING_ISO_8859_8 }, 282 { "hi", RTL_TEXTENCODING_DONTKNOW }, 283 { "hr", RTL_TEXTENCODING_ISO_8859_2 }, 284 { "hu", RTL_TEXTENCODING_ISO_8859_2 }, 285 { "hy", RTL_TEXTENCODING_DONTKNOW }, 286 { "id", RTL_TEXTENCODING_ISO_8859_1 }, 287 { "is", RTL_TEXTENCODING_ISO_8859_1 }, 288 { "it", RTL_TEXTENCODING_ISO_8859_1 }, 289 { "iw", RTL_TEXTENCODING_ISO_8859_8 }, 290 { "ja", RTL_TEXTENCODING_EUC_JP }, 291 { "ka", RTL_TEXTENCODING_DONTKNOW }, 292 { "kk", RTL_TEXTENCODING_ISO_8859_5 }, 293 { "ko", RTL_TEXTENCODING_EUC_KR }, 294 { "lt", RTL_TEXTENCODING_ISO_8859_4 }, 295 { "lv", RTL_TEXTENCODING_ISO_8859_4 }, 296 { "mk", RTL_TEXTENCODING_ISO_8859_5 }, 297 { "mr", RTL_TEXTENCODING_DONTKNOW }, 298 { "ms", RTL_TEXTENCODING_ISO_8859_1 }, 299 { "nl", RTL_TEXTENCODING_ISO_8859_1 }, 300 { "no", RTL_TEXTENCODING_ISO_8859_1 }, 301 { "pl", RTL_TEXTENCODING_ISO_8859_2 }, 302 { "pt", RTL_TEXTENCODING_ISO_8859_1 }, 303 { "ro", RTL_TEXTENCODING_ISO_8859_2 }, 304 { "ru", RTL_TEXTENCODING_ISO_8859_5 }, 305 { "sa", RTL_TEXTENCODING_DONTKNOW }, 306 { "sk", RTL_TEXTENCODING_ISO_8859_2 }, 307 { "sl", RTL_TEXTENCODING_ISO_8859_2 }, 308 { "sq", RTL_TEXTENCODING_ISO_8859_2 }, 309 { "sv", RTL_TEXTENCODING_ISO_8859_1 }, 310 { "sw", RTL_TEXTENCODING_ISO_8859_1 }, 311 { "ta", RTL_TEXTENCODING_DONTKNOW }, 312 { "th", RTL_TEXTENCODING_DONTKNOW }, 313 { "tr", RTL_TEXTENCODING_ISO_8859_9 }, 314 { "tt", RTL_TEXTENCODING_ISO_8859_5 }, 315 { "uk", RTL_TEXTENCODING_ISO_8859_5 }, 316 { "ur", RTL_TEXTENCODING_ISO_8859_6 }, 317 { "uz", RTL_TEXTENCODING_ISO_8859_9 }, 318 { "vi", RTL_TEXTENCODING_DONTKNOW }, 319 { "zh", RTL_TEXTENCODING_BIG5 } 320 }; 321 322 /***************************************************************************** 323 return the text encoding corresponding to the given locale 324 *****************************************************************************/ 325 326 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale ) 327 { 328 const _pair *language = 0; 329 char locale_buf[64] = ""; 330 char *cp; 331 332 WinMessageBox(HWND_DESKTOP,HWND_DESKTOP, 333 "Please contact technical support and report above informations.\n\n", 334 "Critical error: osl_getTextEncodingFromLocale", 335 0, MB_ERROR | MB_OK | MB_MOVEABLE); 336 337 /* default to process locale if pLocale == NULL */ 338 if( NULL == pLocale ) 339 osl_getProcessLocale( &pLocale ); 340 341 /* convert rtl_Locale to locale string */ 342 if( _compose_locale( pLocale, locale_buf, 64 ) ) 343 { 344 /* check special handling list (EUC) first */ 345 const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair ); 346 language = _pair_search( locale_buf, _full_locale_list, members); 347 348 if( NULL == language ) 349 { 350 /* 351 * check if there is a charset qualifier at the end of the given locale string 352 * e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what 353 * charset to use 354 */ 355 cp = strrchr( locale_buf, '.' ); 356 357 if( NULL != cp ) 358 { 359 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair ); 360 language = _pair_search( cp + 1, _locale_extension_list, members); 361 } 362 } 363 364 /* use iso language code to determine the charset */ 365 if( NULL == language ) 366 { 367 const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair ); 368 369 /* iso lang codes have 2 charaters */ 370 locale_buf[2] = '\0'; 371 372 language = _pair_search( locale_buf, _iso_language_list, members); 373 } 374 } 375 376 /* a matching item in our list provides a mapping from codeset to 377 * rtl-codeset */ 378 if ( language != NULL ) 379 return language->value; 380 381 return RTL_TEXTENCODING_DONTKNOW; 382 } 383 384 /***************************************************************************** 385 return the current process locale 386 *****************************************************************************/ 387 388 void _imp_getProcessLocale( rtl_Locale ** ppLocale ) 389 { 390 /* simulate behavior off setlocale */ 391 char * locale = getenv( "LC_ALL" ); 392 393 if( NULL == locale ) 394 locale = getenv( "LC_CTYPE" ); 395 396 if( NULL == locale ) 397 locale = getenv( "LANG" ); 398 399 if( NULL == locale ) 400 locale = "C"; 401 402 *ppLocale = _parse_locale( locale ); 403 } 404 405 /***************************************************************************** 406 set the current process locale 407 *****************************************************************************/ 408 409 int _imp_setProcessLocale( rtl_Locale * pLocale ) 410 { 411 char locale_buf[64]; 412 413 /* convert rtl_Locale to locale string */ 414 if( NULL != _compose_locale( pLocale, locale_buf, 64 ) ) 415 { 416 /* only change env vars that exist already */ 417 if( getenv( "LC_ALL" ) ) { 418 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 419 setenv( "LC_ALL", locale_buf, 1); 420 #else 421 setenv( "LC_ALL", locale_buf ); 422 #endif 423 } 424 425 if( getenv( "LC_CTYPE" ) ) { 426 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 427 setenv("LC_CTYPE", locale_buf, 1 ); 428 #else 429 setenv( "LC_CTYPE", locale_buf ); 430 #endif 431 } 432 433 if( getenv( "LANG" ) ) { 434 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX ) || defined( __EMX__ ) 435 setenv("LC_CTYPE", locale_buf, 1 ); 436 #else 437 setenv( "LANG", locale_buf ); 438 #endif 439 } 440 } 441 442 return 0; 443 } 444 445 446