xref: /aoo4110/main/sal/osl/unx/nlsupport.c (revision b1cdbd2c)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #include <osl/nlsupport.h>
25 #include <osl/diagnose.h>
26 #include <osl/process.h>
27 #include <rtl/memory.h>
28 
29 #if defined(LINUX) || defined(SOLARIS) || defined(NETBSD) || defined(FREEBSD) || defined(MACOSX)
30 #include <pthread.h>
31 #ifndef MACOSX
32  #include <locale.h>
33  #include <langinfo.h>
34 #else
35 #include <osl/module.h>
36 #include <osl/thread.h>
37 #endif	/* !MACOSX */
38 #endif	/* LINUX || SOLARIS || NETBSD || MACOSX */
39 
40 #include <string.h>
41 
42 /*****************************************************************************
43  typedefs
44  *****************************************************************************/
45 
46 
47 typedef struct {
48     const char              *key;
49     const rtl_TextEncoding   value;
50 } _pair;
51 
52 
53 /*****************************************************************************
54  compare function for binary search
55  *****************************************************************************/
56 
57 static int
_pair_compare(const char * key,const _pair * pair)58 _pair_compare (const char *key, const _pair *pair)
59 {
60     int result = rtl_str_compareIgnoreAsciiCase( key, pair->key );
61     return result;
62 }
63 
64 /*****************************************************************************
65  binary search on encoding tables
66  *****************************************************************************/
67 
68 static const _pair*
_pair_search(const char * key,const _pair * base,unsigned int member)69 _pair_search (const char *key, const _pair *base, unsigned int member )
70 {
71     unsigned int lower = 0;
72     unsigned int upper = member;
73     unsigned int current;
74     int comparison;
75 
76     /* check for validity of input */
77     if ( (key == NULL) || (base == NULL) || (member == 0) )
78         return NULL;
79 
80     /* binary search */
81     while ( lower < upper )
82     {
83         current = (lower + upper) / 2;
84         comparison = _pair_compare( key, base + current );
85         if (comparison < 0)
86             upper = current;
87         else if (comparison > 0)
88             lower = current + 1;
89         else
90             return base + current;
91     }
92 
93     return NULL;
94 }
95 
96 
97 /*****************************************************************************
98  convert rtl_Locale to locale string
99  *****************************************************************************/
100 
_compose_locale(rtl_Locale * pLocale,char * buffer,size_t n)101 static char * _compose_locale( rtl_Locale * pLocale, char * buffer, size_t n )
102 {
103     /* check if a valid locale is specified */
104     if( pLocale && pLocale->Language &&
105             (pLocale->Language->length == 2 || pLocale->Language->length == 3) )
106     {
107         size_t offset = 0;
108 
109         /* convert language code to ascii */
110         {
111             rtl_String *pLanguage = NULL;
112 
113             rtl_uString2String( &pLanguage,
114                 pLocale->Language->buffer, pLocale->Language->length,
115                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
116 
117             if( SAL_INT_CAST(sal_uInt32, pLanguage->length) < n )
118             {
119                 strcpy( buffer, pLanguage->buffer );
120                 offset = pLanguage->length;
121             }
122 
123             rtl_string_release( pLanguage );
124         }
125 
126         /* convert country code to ascii */
127         if( pLocale->Country && (pLocale->Country->length == 2) )
128         {
129             rtl_String *pCountry = NULL;
130 
131             rtl_uString2String( &pCountry,
132                 pLocale->Country->buffer, pLocale->Country->length,
133                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
134 
135             if( offset + pCountry->length + 1 < n )
136             {
137                 strcpy( buffer + offset++, "_" );
138                 strcpy( buffer + offset, pCountry->buffer );
139                 offset += pCountry->length;
140             }
141 
142             rtl_string_release( pCountry );
143         }
144 
145         /* convert variant to ascii - check if there is enough space for the variant string */
146         if( pLocale->Variant && pLocale->Variant->length &&
147             ( SAL_INT_CAST(sal_uInt32, pLocale->Variant->length) < n - 6 ) )
148         {
149             rtl_String *pVariant = NULL;
150 
151             rtl_uString2String( &pVariant,
152                 pLocale->Variant->buffer, pLocale->Variant->length,
153                 RTL_TEXTENCODING_ASCII_US, OUSTRING_TO_OSTRING_CVTFLAGS );
154 
155             if( offset + pVariant->length + 1 < n )
156             {
157                 strcpy( buffer + offset, pVariant->buffer );
158                 offset += pVariant->length;
159             }
160 
161             rtl_string_release( pVariant );
162         }
163 
164         return buffer;
165     }
166 
167     return NULL;
168 }
169 
170 /*****************************************************************************
171  convert locale string to rtl_Locale
172  *****************************************************************************/
173 
_parse_locale(const char * locale)174 static rtl_Locale * _parse_locale( const char * locale )
175 {
176     static sal_Unicode c_locale[2] = { (sal_Unicode) 'C', 0 };
177 
178     /* check if locale contains a valid string */
179     if( locale )
180     {
181         size_t len = strlen( locale );
182 
183         if( len >= 2 )
184         {
185             rtl_uString * pLanguage = NULL;
186             rtl_uString * pCountry  = NULL;
187             rtl_uString * pVariant  = NULL;
188 
189             size_t offset = 2;
190 
191             rtl_Locale * ret;
192 
193             /* language is a two or three letter code */
194             if( (len > 3 && '_' == locale[3]) || (len == 3 && '_' != locale[2]) )
195                 offset = 3;
196 
197             /* convert language code to unicode */
198             rtl_string2UString( &pLanguage, locale, offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
199             OSL_ASSERT(pLanguage != NULL);
200 
201             /* convert country code to unicode */
202             if( len >= offset+3 && '_' == locale[offset] )
203             {
204                 rtl_string2UString( &pCountry, locale + offset + 1, 2, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
205                 OSL_ASSERT(pCountry != NULL);
206                 offset += 3;
207             }
208 
209             /* convert variant code to unicode - do not rely on "." as delimiter */
210             if( len > offset ) {
211                 rtl_string2UString( &pVariant, locale + offset, len - offset, RTL_TEXTENCODING_ASCII_US, OSTRING_TO_OUSTRING_CVTFLAGS );
212                 OSL_ASSERT(pVariant != NULL);
213             }
214 
215             ret =  rtl_locale_register( pLanguage->buffer, pCountry ? pCountry->buffer : c_locale + 1, pVariant ? pVariant->buffer : c_locale + 1 );
216 
217             if (pVariant) rtl_uString_release(pVariant);
218             if (pCountry) rtl_uString_release(pCountry);
219             if (pLanguage) rtl_uString_release(pLanguage);
220 
221 			return ret;
222         }
223         else
224             return rtl_locale_register( c_locale, c_locale + 1, c_locale + 1 );
225     }
226 
227     return NULL;
228 }
229 
230 #if defined(LINUX) || defined(SOLARIS) || defined(NETBSD) || defined(FREEBSD)
231 
232 /*
233  * This implementation of osl_getTextEncodingFromLocale maps
234  * from nl_langinfo(CODESET) to rtl_textencoding defines.
235  * nl_langinfo() is supported only on Linux, Solaris,
236  * >= NetBSD 1.6 and >= FreeBSD 4.4
237  *
238  * This routine is SLOW because of the setlocale call, so
239  * grab the result and cache it.
240  *
241  * XXX this code has the usual mt problems aligned with setlocale() XXX
242  */
243 
244 #ifdef LINUX
245 #if !defined(CODESET)
246 #define CODESET _NL_CTYPE_CODESET_NAME
247 #endif
248 #endif
249 
250 /*
251  * _nl_language_list[] is an array list of supported encodings. Because
252  * we are using a binary search, the list has to be in ascending order.
253  * We are comparing the encodings case insensitiv, so the list has
254  * to be completly upper- , or lowercase.
255  */
256 
257 #if defined(SOLARIS)
258 
259 /* The values in the below list can be obtained with a script like
260  *  #!/bin/sh
261  *  for i in `locale -a`; do
262  *    LC_ALL=$i locale -k code_set_name
263  *  done
264  */
265 const _pair _nl_language_list[] = {
266     { "5601",           RTL_TEXTENCODING_EUC_KR         }, /* ko_KR.EUC */
267     { "646",            RTL_TEXTENCODING_ISO_8859_1     }, /* fake: ASCII_US */
268     { "ANSI-1251",      RTL_TEXTENCODING_MS_1251        }, /* ru_RU.ANSI1251 */
269     { "BIG5",           RTL_TEXTENCODING_BIG5           }, /* zh_CN.BIG5 */
270     { "BIG5-HKSCS",     RTL_TEXTENCODING_BIG5_HKSCS     }, /* zh_CN.BIG5HK */
271     { "CNS11643",       RTL_TEXTENCODING_EUC_TW         }, /* zh_TW.EUC */
272     { "EUCJP",          RTL_TEXTENCODING_EUC_JP         }, /* ja_JP.eucjp */
273     { "GB18030",        RTL_TEXTENCODING_GB_18030       }, /* zh_CN.GB18030 */
274     { "GB2312",         RTL_TEXTENCODING_GB_2312        }, /* zh_CN */
275     { "GBK",            RTL_TEXTENCODING_GBK            }, /* zh_CN.GBK */
276     { "ISO8859-1",      RTL_TEXTENCODING_ISO_8859_1     },
277     { "ISO8859-10",     RTL_TEXTENCODING_ISO_8859_10    },
278     { "ISO8859-13",     RTL_TEXTENCODING_ISO_8859_13    }, /* lt_LT lv_LV */
279     { "ISO8859-14",     RTL_TEXTENCODING_ISO_8859_14    },
280     { "ISO8859-15",     RTL_TEXTENCODING_ISO_8859_15    },
281     { "ISO8859-2",      RTL_TEXTENCODING_ISO_8859_2     },
282     { "ISO8859-3",      RTL_TEXTENCODING_ISO_8859_3     },
283     { "ISO8859-4",      RTL_TEXTENCODING_ISO_8859_4     },
284     { "ISO8859-5",      RTL_TEXTENCODING_ISO_8859_5     },
285     { "ISO8859-6",      RTL_TEXTENCODING_ISO_8859_6     },
286     { "ISO8859-7",      RTL_TEXTENCODING_ISO_8859_7     },
287     { "ISO8859-8",      RTL_TEXTENCODING_ISO_8859_8     },
288     { "ISO8859-9",      RTL_TEXTENCODING_ISO_8859_9     },
289     { "KOI8-R",         RTL_TEXTENCODING_KOI8_R         },
290     { "KOI8-U",         RTL_TEXTENCODING_KOI8_U         },
291     { "PCK",            RTL_TEXTENCODING_MS_932         },
292     { "SUN_EU_GREEK",   RTL_TEXTENCODING_ISO_8859_7     }, /* 8859-7 + Euro */
293     { "TIS620.2533",    RTL_TEXTENCODING_MS_874         }, /* th_TH.TIS620 */
294     { "UTF-8",          RTL_TEXTENCODING_UTF8           }
295 };
296 
297 /* XXX MS-874 is an extension to tis620, so this is not
298  * really equivalent */
299 
300 #elif defined(LINUX) || defined(NETBSD)
301 
302 const _pair _nl_language_list[] = {
303     { "ANSI_X3.110-1983",           RTL_TEXTENCODING_DONTKNOW   },  /* ISO-IR-99 NAPLPS */
304     { "ANSI_X3.4-1968",             RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US */
305     { "ASMO_449",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO_9036 ARABIC7 */
306     { "BALTIC",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-179 */
307     { "BIG5",                       RTL_TEXTENCODING_BIG5 },        /* locale: zh_TW */
308     { "BIG5-HKSCS",                 RTL_TEXTENCODING_BIG5_HKSCS },  /* locale: zh_CN.BIG5HK */
309     { "BIG5HKSCS",                  RTL_TEXTENCODING_BIG5_HKSCS },  /* depricated */
310     { "BS_4730",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-4 ISO646-GB */
311     { "BS_VIEWDATA",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-47 */
312     { "CP1250",                     RTL_TEXTENCODING_MS_1250 },     /* MS-EE */
313     { "CP1251",                     RTL_TEXTENCODING_MS_1251 },     /* MS-CYRL */
314     { "CP1252",                     RTL_TEXTENCODING_MS_1252 },     /* MS-ANSI */
315     { "CP1253",                     RTL_TEXTENCODING_MS_1253 },     /* MS-GREEK */
316     { "CP1254",                     RTL_TEXTENCODING_MS_1254 },     /* MS-TURK */
317     { "CP1255",                     RTL_TEXTENCODING_MS_1255 },     /* MS-HEBR */
318     { "CP1256",                     RTL_TEXTENCODING_MS_1256 },     /* MS-ARAB */
319     { "CP1257",                     RTL_TEXTENCODING_MS_1257 },     /* WINBALTRIM */
320     { "CSA_Z243.4-1985-1",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-121 */
321     { "CSA_Z243.4-1985-2",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-122 CSA7-2 */
322     { "CSA_Z243.4-1985-GR",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-123 */
323     { "CSN_369103",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-139 */
324     { "CWI",                        RTL_TEXTENCODING_DONTKNOW },    /* CWI-2 CP-HU */
325     { "DEC-MCS",                    RTL_TEXTENCODING_DONTKNOW },    /* DEC */
326     { "DIN_66003",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-21 */
327     { "DS_2089",                    RTL_TEXTENCODING_DONTKNOW },    /* DS2089 ISO646-DK */
328     { "EBCDIC-AT-DE",               RTL_TEXTENCODING_DONTKNOW },
329     { "EBCDIC-AT-DE-A",             RTL_TEXTENCODING_DONTKNOW },
330     { "EBCDIC-CA-FR",               RTL_TEXTENCODING_DONTKNOW },
331     { "EBCDIC-DK-NO",               RTL_TEXTENCODING_DONTKNOW },
332     { "EBCDIC-DK-NO-A",             RTL_TEXTENCODING_DONTKNOW },
333     { "EBCDIC-ES",                  RTL_TEXTENCODING_DONTKNOW },
334     { "EBCDIC-ES-A",                RTL_TEXTENCODING_DONTKNOW },
335     { "EBCDIC-ES-S",                RTL_TEXTENCODING_DONTKNOW },
336     { "EBCDIC-FI-SE",               RTL_TEXTENCODING_DONTKNOW },
337     { "EBCDIC-FI-SE-A",             RTL_TEXTENCODING_DONTKNOW },
338     { "EBCDIC-FR",                  RTL_TEXTENCODING_DONTKNOW },
339     { "EBCDIC-IS-FRISS",            RTL_TEXTENCODING_DONTKNOW },    /*  FRISS */
340     { "EBCDIC-IT",                  RTL_TEXTENCODING_DONTKNOW },
341     { "EBCDIC-PT",                  RTL_TEXTENCODING_DONTKNOW },
342     { "EBCDIC-UK",                  RTL_TEXTENCODING_DONTKNOW },
343     { "EBCDIC-US",                  RTL_TEXTENCODING_DONTKNOW },
344     { "ECMA-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-111 */
345     { "ES",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-17 */
346     { "ES2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-85 */
347     { "EUC-JP",                     RTL_TEXTENCODING_EUC_JP },      /* locale: ja_JP.eucjp */
348     { "EUC-KR",                     RTL_TEXTENCODING_EUC_KR },      /* locale: ko_KR.euckr */
349     { "EUC-TW",                     RTL_TEXTENCODING_EUC_TW },      /* locale: zh_TW.euctw */
350     { "GB18030",                    RTL_TEXTENCODING_GB_18030 },    /* locale: zh_CN.gb18030 */
351     { "GB2312",                     RTL_TEXTENCODING_GB_2312 },     /* locale: zh_CN */
352     { "GB_1988-80",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-57 */
353     { "GBK",                        RTL_TEXTENCODING_GBK },         /* locale: zh_CN.GBK */
354     { "GOST_19768-74",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-153 */
355     { "GREEK-CCITT",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-150 */
356     { "GREEK7",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-88 */
357     { "GREEK7-OLD",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-18 */
358     { "HP-ROMAN8",                  RTL_TEXTENCODING_DONTKNOW },    /* ROMAN8 R8 */
359     { "IBM037",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[US|CA|WT] */
360     { "IBM038",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT CP038 */
361     { "IBM1004",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1004 OS2LATIN1 */
362     { "IBM1026",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1026 1026 */
363     { "IBM1047",                    RTL_TEXTENCODING_DONTKNOW },    /* CP1047 1047 */
364     { "IBM256",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-INT1 */
365     { "IBM273",                     RTL_TEXTENCODING_DONTKNOW },    /* CP273 */
366     { "IBM274",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BE CP274 */
367     { "IBM275",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-BR CP275 */
368     { "IBM277",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[DK|NO] */
369     { "IBM278",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[FISE]*/
370     { "IBM280",                     RTL_TEXTENCODING_DONTKNOW },    /* CP280 EBCDIC-CP-IT*/
371     { "IBM281",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-E CP281 */
372     { "IBM284",                     RTL_TEXTENCODING_DONTKNOW },    /* CP284 EBCDIC-CP-ES */
373     { "IBM285",                     RTL_TEXTENCODING_DONTKNOW },    /* CP285 EBCDIC-CP-GB */
374     { "IBM290",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-JP-KANA */
375     { "IBM297",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-FR */
376     { "IBM420",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-AR1 */
377     { "IBM423",                     RTL_TEXTENCODING_DONTKNOW },    /* CP423 EBCDIC-CP-GR */
378     { "IBM424",                     RTL_TEXTENCODING_DONTKNOW },    /* CP424 EBCDIC-CP-HE */
379     { "IBM437",                     RTL_TEXTENCODING_IBM_437 },     /* CP437 437 */
380     { "IBM500",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CP-[BE|CH] */
381     { "IBM850",                     RTL_TEXTENCODING_IBM_850 },     /* CP850 850 */
382     { "IBM851",                     RTL_TEXTENCODING_DONTKNOW },    /* CP851 851 */
383     { "IBM852",                     RTL_TEXTENCODING_IBM_852 },     /* CP852 852 */
384     { "IBM855",                     RTL_TEXTENCODING_IBM_855 },     /* CP855 855 */
385     { "IBM857",                     RTL_TEXTENCODING_IBM_857 },     /* CP857 857 */
386     { "IBM860",                     RTL_TEXTENCODING_IBM_860 },     /* CP860 860 */
387     { "IBM861",                     RTL_TEXTENCODING_IBM_861 },     /* CP861 861 CP-IS */
388     { "IBM862",                     RTL_TEXTENCODING_IBM_862 },     /* CP862 862 */
389     { "IBM863",                     RTL_TEXTENCODING_IBM_863 },     /* CP863 863 */
390     { "IBM864",                     RTL_TEXTENCODING_IBM_864 },     /* CP864 */
391     { "IBM865",                     RTL_TEXTENCODING_IBM_865 },     /* CP865 865 */
392     { "IBM866",                     RTL_TEXTENCODING_IBM_866 },     /* CP866 866 */
393     { "IBM868",                     RTL_TEXTENCODING_DONTKNOW },    /* CP868 CP-AR */
394     { "IBM869",                     RTL_TEXTENCODING_IBM_869 },     /* CP869 869 CP-GR */
395     { "IBM870",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-[ROECE|YU] */
396     { "IBM871",                     RTL_TEXTENCODING_DONTKNOW },    /* CP871 EBCDIC-CP-IS */
397     { "IBM875",                     RTL_TEXTENCODING_DONTKNOW },    /* CP875 EBCDIC-GREEK */
398     { "IBM880",                     RTL_TEXTENCODING_DONTKNOW },    /* EBCDIC-CYRILLIC */
399     { "IBM891",                     RTL_TEXTENCODING_DONTKNOW },    /* CP891 */
400     { "IBM903",                     RTL_TEXTENCODING_DONTKNOW },    /* CP903 */
401     { "IBM904",                     RTL_TEXTENCODING_DONTKNOW },    /* CP904 904 */
402     { "IBM905",                     RTL_TEXTENCODING_DONTKNOW },    /* CP905 EBCDIC-CP-TR */
403     { "IBM918",                     RTL_TEXTENCODING_DONTKNOW },    /* CP918 EBCDIC-AR2 */
404     { "IEC_P27-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-143 */
405     { "INIS",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-49 */
406     { "INIS-8",                     RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-50 */
407     { "INIS-CYRILLIC",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-51 */
408     { "INVARIANT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-170 */
409     { "ISO-8859-1",                 RTL_TEXTENCODING_ISO_8859_1 },  /* ISO-IR-100 CP819 */
410     { "ISO-8859-10",                RTL_TEXTENCODING_ISO_8859_10 }, /* ISO-IR-157 LATIN6 */
411     { "ISO-8859-13",                RTL_TEXTENCODING_ISO_8859_13 }, /* ISO-IR-179 LATIN7 */
412     { "ISO-8859-14",                RTL_TEXTENCODING_ISO_8859_14 }, /* LATIN8 L8 */
413     { "ISO-8859-15",                RTL_TEXTENCODING_ISO_8859_15 },
414     { "ISO-8859-2",                 RTL_TEXTENCODING_ISO_8859_2 },  /* LATIN2 L2 */
415     { "ISO-8859-3",                 RTL_TEXTENCODING_ISO_8859_3 },  /* LATIN3 L3 */
416     { "ISO-8859-4",                 RTL_TEXTENCODING_ISO_8859_4 },  /* LATIN4 L4 */
417     { "ISO-8859-5",                 RTL_TEXTENCODING_ISO_8859_5 },  /* CYRILLIC */
418     { "ISO-8859-6",                 RTL_TEXTENCODING_ISO_8859_6 },  /* ECMA-114 ARABIC */
419     { "ISO-8859-7",                 RTL_TEXTENCODING_ISO_8859_7 },  /* ECMA-118 GREEK8 */
420     { "ISO-8859-8",                 RTL_TEXTENCODING_ISO_8859_8 },  /* ISO_8859-8 HEBREW */
421     { "ISO-8859-9",                 RTL_TEXTENCODING_ISO_8859_9 },  /* ISO_8859-9 LATIN5 */
422     { "ISO-IR-90",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO_6937-2:1983 */
423     { "ISO_10367-BOX",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-155 */
424     { "ISO_2033-1983",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-98 E13B */
425     { "ISO_5427",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-37 KOI-7 */
426     { "ISO_5427-EXT",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-54  */
427     { "ISO_5428",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-55 */
428     { "ISO_646.BASIC",              RTL_TEXTENCODING_ASCII_US },    /* REF */
429     { "ISO_646.IRV",                RTL_TEXTENCODING_ASCII_US },    /* ISO-IR-2 IRV */
430     { "ISO_646.IRV:1983",           RTL_TEXTENCODING_ISO_8859_1 },  /* fake: ASCII_US, used for "C" locale*/
431     { "ISO_6937",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-156 ISO6937*/
432     { "ISO_6937-2-25",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-152 */
433     { "ISO_6937-2-ADD",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-142 */
434     { "ISO_8859-SUPP",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-154 */
435     { "IT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-15  */
436     { "JIS_C6220-1969-JP",          RTL_TEXTENCODING_DONTKNOW },    /* KATAKANA X0201-7 */
437     { "JIS_C6220-1969-RO",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-14 */
438     { "JIS_C6229-1984-A",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-91 */
439     { "JIS_C6229-1984-B",           RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-92 */
440     { "JIS_C6229-1984-B-ADD",       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-93 */
441     { "JIS_C6229-1984-HAND",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-94 */
442     { "JIS_C6229-1984-HAND-ADD",    RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-95 */
443     { "JIS_C6229-1984-KANA",        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-96 */
444     { "JIS_X0201",                  RTL_TEXTENCODING_DONTKNOW },    /* X0201 */
445     { "JUS_I.B1.002",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-141 */
446     { "JUS_I.B1.003-MAC",           RTL_TEXTENCODING_DONTKNOW },    /* MACEDONIAN */
447     { "JUS_I.B1.003-SERB",          RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-146 SERBIAN */
448     { "KOI-8",                      RTL_TEXTENCODING_DONTKNOW },
449     { "KOI8-R",                     RTL_TEXTENCODING_KOI8_R },
450     { "KOI8-U",                     RTL_TEXTENCODING_KOI8_U },
451     { "KSC5636",                    RTL_TEXTENCODING_DONTKNOW },    /* ISO646-KR */
452     { "LATIN-GREEK",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-19 */
453     { "LATIN-GREEK-1",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-27 */
454     { "MAC-IS",                     RTL_TEXTENCODING_APPLE_ROMAN },
455     { "MAC-UK",                     RTL_TEXTENCODING_APPLE_ROMAN },
456     { "MACINTOSH",                  RTL_TEXTENCODING_APPLE_ROMAN }, /* MAC */
457     { "MSZ_7795.3",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-86 */
458     { "NATS-DANO",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-1 */
459     { "NATS-DANO-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-9-2 */
460     { "NATS-SEFI",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-1 */
461     { "NATS-SEFI-ADD",              RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-8-2 */
462     { "NC_NC00-10",                 RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-151 */
463     { "NEXTSTEP",                   RTL_TEXTENCODING_DONTKNOW },    /* NEXT */
464     { "NF_Z_62-010",                RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-69 */
465     { "NF_Z_62-010_(1973)",         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-25 */
466     { "NS_4551-1",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-60 */
467     { "NS_4551-2",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-61 */
468     { "PT",                         RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-16 */
469     { "PT2",                        RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-84 */
470     { "SAMI",                       RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-158 */
471     { "SEN_850200_B",               RTL_TEXTENCODING_DONTKNOW },    /* ISO646-[FI|SE] */
472     { "SEN_850200_C",               RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-11 */
473     { "T.101-G2",                   RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-128 */
474     { "T.61-7BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-102 */
475     { "T.61-8BIT",                  RTL_TEXTENCODING_DONTKNOW },    /* T.61 ISO-IR-103 */
476     { "TIS-620",                    RTL_TEXTENCODING_MS_874 },     /* locale: th_TH */
477     { "UTF-8",                      RTL_TEXTENCODING_UTF8 },        /* ISO-10646/UTF-8 */
478     { "VIDEOTEX-SUPPL",             RTL_TEXTENCODING_DONTKNOW },    /* ISO-IR-70 */
479     { "WIN-SAMI-2",                 RTL_TEXTENCODING_DONTKNOW }     /* WS2 */
480 };
481 
482 #elif defined(FREEBSD)
483 
484 const _pair _nl_language_list[] = {
485     { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
486     { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
487     { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
488     { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
489     { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
490     { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
491     { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
492     { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
493     { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
494     { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
495     { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
496     { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
497     { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
498     { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
499     { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
500     { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
501     { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
502     { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
503     { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
504 };
505 
506 #elif defined(NETBSD)
507 
508 const _pair _nl_language_list[] = {
509     { "ASCII",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
510     { "BIG5",          RTL_TEXTENCODING_BIG5           }, /* China - Traditional Chinese */
511     { "CP1251",        RTL_TEXTENCODING_MS_1251        }, /* MS-CYRL */
512     { "CP866",         RTL_TEXTENCODING_IBM_866        }, /* CP866 866 */
513     { "CTEXT",         RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
514     { "EUCCN",         RTL_TEXTENCODING_EUC_CN         }, /* China - Simplified Chinese */
515     { "EUCJP",         RTL_TEXTENCODING_EUC_JP         }, /* Japan */
516     { "EUCKR",         RTL_TEXTENCODING_EUC_KR         }, /* Korea */
517     { "EUCTW",         RTL_TEXTENCODING_EUC_TW         }, /* China - Traditional Chinese */
518     { "ISO-2022-JP",   RTL_TEXTENCODING_DONTKNOW       }, /* */
519     { "ISO-2022-JP-2", RTL_TEXTENCODING_DONTKNOW       }, /* */
520     { "ISO8859-1",     RTL_TEXTENCODING_ISO_8859_1     }, /* Western */
521     { "ISO8859-15",    RTL_TEXTENCODING_ISO_8859_15    }, /* Western Updated (w/Euro sign) */
522     { "ISO8859-2",     RTL_TEXTENCODING_ISO_8859_2     }, /* Central European */
523     { "ISO8859-4",     RTL_TEXTENCODING_ISO_8859_4     }, /* LATIN4 L4 */
524     { "ISO8859-5",     RTL_TEXTENCODING_ISO_8859_5     }, /* Cyrillic */
525     { "ISO8859-7",     RTL_TEXTENCODING_ISO_8859_7     }, /* Greek */
526     { "ISO8859-9",     RTL_TEXTENCODING_ISO_8859_9     }, /* Turkish */
527     { "KOI8-R",        RTL_TEXTENCODING_KOI8_R         }, /* KOI8-R */
528     { "KOI8-U",        RTL_TEXTENCODING_KOI8_U         }, /* KOI8-U */
529     { "SJIS",          RTL_TEXTENCODING_SHIFT_JIS      }, /* Japan */
530     { "US-ASCII",      RTL_TEXTENCODING_ASCII_US       }, /* US-ASCII */
531     { "UTF-8",         RTL_TEXTENCODING_UTF8           }  /* ISO-10646/UTF-8 */
532 };
533 
534 #endif /* ifdef SOLARIS LINUX FREEBSD NETBSD */
535 
536 static pthread_mutex_t aLocalMutex = PTHREAD_MUTEX_INITIALIZER;
537 
538 /*****************************************************************************
539  return the text encoding corresponding to the given locale
540  *****************************************************************************/
541 
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)542 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
543 {
544     const _pair *language=0;
545 
546     char  locale_buf[64] = "";
547     char  codeset_buf[64];
548 
549     char *ctype_locale = 0;
550     char *codeset      = 0;
551 
552     /* default to process locale if pLocale == NULL */
553     if( NULL == pLocale )
554         osl_getProcessLocale( &pLocale );
555 
556     /* convert rtl_Locale to locale string */
557     _compose_locale( pLocale, locale_buf, 64 );
558 
559     /* basic thread safeness */
560     pthread_mutex_lock( &aLocalMutex );
561 
562     /* remember the charset as indicated by the LC_CTYPE locale */
563     ctype_locale = setlocale( LC_CTYPE, NULL );
564 
565     /* set the desired LC_CTYPE locale */
566     if( NULL == setlocale( LC_CTYPE, locale_buf ) )
567     {
568         pthread_mutex_unlock(&aLocalMutex);
569         return RTL_TEXTENCODING_DONTKNOW;
570     }
571 
572     /* get the charset as indicated by the LC_CTYPE locale */
573 #if defined(NETBSD) && !defined(CODESET)
574     codeset = NULL;
575 #else
576     codeset = nl_langinfo( CODESET );
577 #endif
578 
579     if ( codeset != NULL )
580     {
581         /* get codeset into mt save memory */
582         strncpy( codeset_buf, codeset, sizeof(codeset_buf) );
583         codeset_buf[sizeof(codeset_buf) - 1] = 0;
584         codeset = codeset_buf;
585     }
586 
587     /* restore the original value of locale */
588     if ( ctype_locale != NULL )
589         setlocale( LC_CTYPE, ctype_locale );
590 
591     pthread_mutex_unlock( &aLocalMutex );
592 
593     /* search the codeset in our language list */
594     if ( codeset != NULL )
595     {
596         const unsigned int members = sizeof(_nl_language_list) / sizeof(_pair);
597         language = _pair_search (codeset, _nl_language_list, members);
598     }
599 
600     OSL_ASSERT( language && ( RTL_TEXTENCODING_DONTKNOW != language->value ) );
601 
602     /* a matching item in our list provides a mapping from codeset to
603      * rtl-codeset */
604     if ( language != NULL )
605         return language->value;
606 
607     return RTL_TEXTENCODING_DONTKNOW;
608 }
609 
610 /*****************************************************************************
611  return the current process locale
612  *****************************************************************************/
613 
_imp_getProcessLocale(rtl_Locale ** ppLocale)614 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
615 {
616     char * locale;
617 
618     /* basic thread safeness */
619     pthread_mutex_lock( &aLocalMutex );
620 
621     /* set the locale defined by the env vars */
622     locale = setlocale( LC_CTYPE, "" );
623 
624     /* fallback to the current locale */
625     if( NULL == locale )
626         locale = setlocale( LC_CTYPE, NULL );
627 
628     /* return the LC_CTYPE locale */
629     *ppLocale = _parse_locale( locale );
630 
631     pthread_mutex_unlock( &aLocalMutex );
632 }
633 
634 /*****************************************************************************
635  set the current process locale
636  *****************************************************************************/
637 
_imp_setProcessLocale(rtl_Locale * pLocale)638 int _imp_setProcessLocale( rtl_Locale * pLocale )
639 {
640     char  locale_buf[64] = "";
641     int   ret = 0;
642 
643     /* convert rtl_Locale to locale string */
644     _compose_locale( pLocale, locale_buf, 64 );
645 
646     /* basic thread safeness */
647     pthread_mutex_lock( &aLocalMutex );
648 
649     /* try to set LC_ALL locale */
650     if( NULL == setlocale( LC_ALL, locale_buf ) )
651         ret = -1;
652 
653     pthread_mutex_unlock( &aLocalMutex );
654     return ret;
655 }
656 
657 #else /* ifdef LINUX || SOLARIS || MACOSX || NETBSD */
658 
659 /*
660  * This implementation of osl_getTextEncodingFromLocale maps
661  * from the ISO language codes.
662  */
663 
664 const _pair _full_locale_list[] = {
665     { "ja_JP.eucJP",  RTL_TEXTENCODING_EUC_JP      },
666     { "ja_JP.EUC",    RTL_TEXTENCODING_EUC_JP      },
667     { "ko_KR.EUC",    RTL_TEXTENCODING_EUC_KR      },
668     { "zh_CN.EUC",    RTL_TEXTENCODING_EUC_CN      },
669     { "zh_TW.EUC",    RTL_TEXTENCODING_EUC_TW      }
670 };
671 
672 const _pair _locale_extension_list[] = {
673     { "big5",         RTL_TEXTENCODING_BIG5        },
674     { "big5hk",       RTL_TEXTENCODING_BIG5_HKSCS  },
675     { "gb18030",      RTL_TEXTENCODING_GB_18030    },
676     { "euc",          RTL_TEXTENCODING_EUC_JP      },
677     { "iso8859-1",    RTL_TEXTENCODING_ISO_8859_1  },
678     { "iso8859-10",   RTL_TEXTENCODING_ISO_8859_10 },
679     { "iso8859-13",   RTL_TEXTENCODING_ISO_8859_13 },
680     { "iso8859-14",   RTL_TEXTENCODING_ISO_8859_14 },
681     { "iso8859-15",   RTL_TEXTENCODING_ISO_8859_15 },
682     { "iso8859-2",    RTL_TEXTENCODING_ISO_8859_2  },
683     { "iso8859-3",    RTL_TEXTENCODING_ISO_8859_3  },
684     { "iso8859-4",    RTL_TEXTENCODING_ISO_8859_4  },
685     { "iso8859-5",    RTL_TEXTENCODING_ISO_8859_5  },
686     { "iso8859-6",    RTL_TEXTENCODING_ISO_8859_6  },
687     { "iso8859-7",    RTL_TEXTENCODING_ISO_8859_7  },
688     { "iso8859-8",    RTL_TEXTENCODING_ISO_8859_8  },
689     { "iso8859-9",    RTL_TEXTENCODING_ISO_8859_9  },
690     { "koi8-r",       RTL_TEXTENCODING_KOI8_R      },
691     { "koi8-u",       RTL_TEXTENCODING_KOI8_U      },
692     { "pck",          RTL_TEXTENCODING_MS_932      },
693 #if (0)
694     { "sun_eu_greek", RTL_TEXTENCODING_DONTKNOW    },
695 #endif
696     { "utf-16",       RTL_TEXTENCODING_UNICODE     },
697     { "utf-7",        RTL_TEXTENCODING_UTF7        },
698     { "utf-8",        RTL_TEXTENCODING_UTF8        }
699 };
700 
701 const _pair _iso_language_list[] = {
702     { "af",  RTL_TEXTENCODING_ISO_8859_1 },
703     { "ar",  RTL_TEXTENCODING_ISO_8859_6 },
704     { "az",  RTL_TEXTENCODING_ISO_8859_9 },
705     { "be",  RTL_TEXTENCODING_ISO_8859_5 },
706     { "bg",  RTL_TEXTENCODING_ISO_8859_5 },
707     { "ca",  RTL_TEXTENCODING_ISO_8859_1 },
708     { "cs",  RTL_TEXTENCODING_ISO_8859_2 },
709     { "da",  RTL_TEXTENCODING_ISO_8859_1 },
710     { "de",  RTL_TEXTENCODING_ISO_8859_1 },
711     { "el",  RTL_TEXTENCODING_ISO_8859_7 },
712     { "en",  RTL_TEXTENCODING_ISO_8859_1 },
713     { "es",  RTL_TEXTENCODING_ISO_8859_1 },
714     { "et",  RTL_TEXTENCODING_ISO_8859_4 },
715     { "eu",  RTL_TEXTENCODING_ISO_8859_1 },
716     { "fa",  RTL_TEXTENCODING_ISO_8859_6 },
717     { "fi",  RTL_TEXTENCODING_ISO_8859_1 },
718     { "fo",  RTL_TEXTENCODING_ISO_8859_1 },
719     { "fr",  RTL_TEXTENCODING_ISO_8859_1 },
720     { "gr",  RTL_TEXTENCODING_ISO_8859_7 },
721     { "he",  RTL_TEXTENCODING_ISO_8859_8 },
722     { "hi",  RTL_TEXTENCODING_DONTKNOW },
723     { "hr",  RTL_TEXTENCODING_ISO_8859_2 },
724     { "hu",  RTL_TEXTENCODING_ISO_8859_2 },
725     { "hy",  RTL_TEXTENCODING_DONTKNOW },
726     { "id",  RTL_TEXTENCODING_ISO_8859_1 },
727     { "is",  RTL_TEXTENCODING_ISO_8859_1 },
728     { "it",  RTL_TEXTENCODING_ISO_8859_1 },
729     { "iw",  RTL_TEXTENCODING_ISO_8859_8 },
730     { "ja",  RTL_TEXTENCODING_EUC_JP },
731     { "ka",  RTL_TEXTENCODING_DONTKNOW },
732     { "kk",  RTL_TEXTENCODING_ISO_8859_5 },
733     { "ko",  RTL_TEXTENCODING_EUC_KR },
734     { "lt",  RTL_TEXTENCODING_ISO_8859_4 },
735     { "lv",  RTL_TEXTENCODING_ISO_8859_4 },
736     { "mk",  RTL_TEXTENCODING_ISO_8859_5 },
737     { "mr",  RTL_TEXTENCODING_DONTKNOW },
738     { "ms",  RTL_TEXTENCODING_ISO_8859_1 },
739     { "nl",  RTL_TEXTENCODING_ISO_8859_1 },
740     { "no",  RTL_TEXTENCODING_ISO_8859_1 },
741     { "pl",  RTL_TEXTENCODING_ISO_8859_2 },
742     { "pt",  RTL_TEXTENCODING_ISO_8859_1 },
743     { "ro",  RTL_TEXTENCODING_ISO_8859_2 },
744     { "ru",  RTL_TEXTENCODING_ISO_8859_5 },
745     { "sa",  RTL_TEXTENCODING_DONTKNOW },
746     { "sk",  RTL_TEXTENCODING_ISO_8859_2 },
747     { "sl",  RTL_TEXTENCODING_ISO_8859_2 },
748     { "sq",  RTL_TEXTENCODING_ISO_8859_2 },
749     { "sv",  RTL_TEXTENCODING_ISO_8859_1 },
750     { "sw",  RTL_TEXTENCODING_ISO_8859_1 },
751     { "ta",  RTL_TEXTENCODING_DONTKNOW },
752     { "th",  RTL_TEXTENCODING_DONTKNOW },
753     { "tr",  RTL_TEXTENCODING_ISO_8859_9 },
754     { "tt",  RTL_TEXTENCODING_ISO_8859_5 },
755     { "uk",  RTL_TEXTENCODING_ISO_8859_5 },
756     { "ur",  RTL_TEXTENCODING_ISO_8859_6 },
757     { "uz",  RTL_TEXTENCODING_ISO_8859_9 },
758     { "vi",  RTL_TEXTENCODING_DONTKNOW },
759     { "zh",  RTL_TEXTENCODING_BIG5 }
760 };
761 
762 /*****************************************************************************
763  return the text encoding corresponding to the given locale
764  *****************************************************************************/
765 
osl_getTextEncodingFromLocale(rtl_Locale * pLocale)766 rtl_TextEncoding osl_getTextEncodingFromLocale( rtl_Locale * pLocale )
767 {
768     const _pair *language = 0;
769     char locale_buf[64] = "";
770     char *cp;
771 
772     /* default to process locale if pLocale == NULL */
773     if( NULL == pLocale )
774         osl_getProcessLocale( &pLocale );
775 
776     /* convert rtl_Locale to locale string */
777     if( _compose_locale( pLocale, locale_buf, 64 ) )
778     {
779         /* check special handling list (EUC) first */
780         const unsigned int members = sizeof( _full_locale_list ) / sizeof( _pair );
781         language = _pair_search( locale_buf, _full_locale_list, members);
782 
783         if( NULL == language )
784         {
785             /*
786              *  check if there is a charset qualifier at the end of the given locale string
787              *  e.g. de.ISO8859-15 or de.ISO8859-15@euro which strongly indicates what
788              *  charset to use
789              */
790 		    cp = strrchr( locale_buf, '.' );
791 
792             if( NULL != cp )
793             {
794                 const unsigned int members = sizeof( _locale_extension_list ) / sizeof( _pair );
795                 language = _pair_search( cp + 1, _locale_extension_list, members);
796             }
797         }
798 
799         /* use iso language code to determine the charset */
800         if( NULL == language )
801         {
802             const unsigned int members = sizeof( _iso_language_list ) / sizeof( _pair );
803 
804             /* iso lang codes have 2 charaters */
805             locale_buf[2] = '\0';
806 
807             language = _pair_search( locale_buf, _iso_language_list, members);
808         }
809     }
810 
811     /* a matching item in our list provides a mapping from codeset to
812      * rtl-codeset */
813     if ( language != NULL )
814         return language->value;
815 
816     return RTL_TEXTENCODING_DONTKNOW;
817 }
818 
819 #ifdef MACOSX
820 #include "system.h"
821 
822 /* OS X locale discovery function */
823 int (*pGetOSXLocale)( char *, sal_uInt32 );
824 
825 oslModule SAL_CALL osl_psz_loadModule(const sal_Char *pszModuleName, sal_Int32 nRtldMode);
826 /*****************************************************************************
827  return the current process locale
828  *****************************************************************************/
829 
830 int macosx_getLocale(char *locale, sal_uInt32 bufferLen);
831 
_imp_getProcessLocale(rtl_Locale ** ppLocale)832 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
833 {
834     static char *locale = NULL;
835     char *npath, *opath;
836     int slen;
837 
838     /* basic thread safeness */
839 //    pthread_mutex_lock( &aLocalMutex );
840 
841     /* Only fetch the locale once and cache it */
842     if ( NULL == locale )
843     {
844 
845         locale = (char *)malloc( 128 );
846         if ( locale )
847             macosx_getLocale( locale, 128 );
848         else
849             fprintf( stderr, "nlsupport.c:  locale allocation returned NULL!\n" );
850     }
851 
852     /* handle the case where OS specific method of finding locale fails */
853     if ( NULL == locale )
854     {
855         /* simulate behavior of setlocale */
856         locale = getenv( "LC_ALL" );
857 
858         if( NULL == locale )
859             locale = getenv( "LC_CTYPE" );
860 
861         if( NULL == locale )
862             locale = getenv( "LANG" );
863 
864         if( NULL == locale )
865             locale = "C";
866     }
867 
868     /* return the locale */
869     *ppLocale = _parse_locale( locale );
870 
871     setenv( "LC_ALL", locale, 1);
872     setenv("LC_CTYPE", locale, 1 );
873     setenv("LANG", locale, 1 );
874 
875     /*
876      * This is a hack. We know that we are setting some envvars here
877      * and due to https://bz.apache.org/ooo/show_bug.cgi?id=127965
878      * we need to update PATH on macOS. Doing it here ensures
879      * that it's done but it's not the perfect location to be doing
880      * this.
881      */
882     opath = getenv ( "PATH" );
883     if (!strstr ( opath, "/usr/local/bin" )) {
884         slen = strlen( "/usr/local/bin" ) + 1;
885         if ( opath != NULL )
886             slen += strlen( ":" ) + strlen( opath );
887         npath = malloc( slen );
888         *npath = '\0';
889         if ( opath != NULL ) {
890             strcat( npath, opath );
891             strcat( npath, ":" );
892         }
893         strcat( npath, "/usr/local/bin" ); /* We are adding at the end */
894         setenv("PATH", npath, 1 );
895         free(npath);
896     }
897 
898 #ifdef DEBUG
899     fprintf( stderr, "nlsupport.c:  _imp_getProcessLocale() returning %s as current locale.\n", locale );
900 #endif
901 
902 //    pthread_mutex_unlock( &aLocalMutex );
903 
904 }
905 #else
906 /*****************************************************************************
907  return the current process locale
908  *****************************************************************************/
909 
_imp_getProcessLocale(rtl_Locale ** ppLocale)910 void _imp_getProcessLocale( rtl_Locale ** ppLocale )
911 {
912     /* simulate behavior off setlocale */
913     char * locale = getenv( "LC_ALL" );
914 
915     if( NULL == locale )
916         locale = getenv( "LC_CTYPE" );
917 
918     if( NULL == locale )
919         locale = getenv( "LANG" );
920 
921     if( NULL == locale )
922         locale = "C";
923 
924     *ppLocale = _parse_locale( locale );
925 }
926 #endif
927 
928 /*****************************************************************************
929  set the current process locale
930  *****************************************************************************/
931 
_imp_setProcessLocale(rtl_Locale * pLocale)932 int _imp_setProcessLocale( rtl_Locale * pLocale )
933 {
934     char locale_buf[64];
935 
936     /* convert rtl_Locale to locale string */
937     if( NULL != _compose_locale( pLocale, locale_buf, 64 ) )
938     {
939         /* only change env vars that exist already */
940         if( getenv( "LC_ALL" ) ) {
941 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX )
942             setenv( "LC_ALL", locale_buf, 1);
943 #else
944             setenv( "LC_ALL", locale_buf );
945 #endif
946         }
947 
948         if( getenv( "LC_CTYPE" ) ) {
949 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX )
950             setenv("LC_CTYPE", locale_buf, 1 );
951 #else
952             setenv( "LC_CTYPE", locale_buf );
953 #endif
954         }
955 
956         if( getenv( "LANG" ) ) {
957 #if defined( FREEBSD ) || defined( NETBSD ) || defined( MACOSX )
958             setenv("LC_CTYPE", locale_buf, 1 );
959 #else
960             setenv( "LANG", locale_buf );
961 #endif
962         }
963     }
964 
965     return 0;
966 }
967 
968 #endif /* ifdef LINUX || SOLARIS || MACOSX || NETBSD */
969 
970 
971