xref: /trunk/main/sal/rtl/source/ustring.c (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
28 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
29 #endif
30 
31 #include <rtl/memory.h>
32 #include <osl/diagnose.h>
33 #include <osl/interlck.h>
34 #include <rtl/alloc.h>
35 #include <osl/mutex.h>
36 #include <osl/doublecheckedlocking.h>
37 #include <rtl/tencinfo.h>
38 
39 #include <string.h>
40 #include <sal/alloca.h>
41 
42 #include "hash.h"
43 #include "strimp.h"
44 #include "surrogates.h"
45 #include <rtl/ustring.h>
46 
47 #include "rtl/math.h"
48 #include "rtl/tencinfo.h"
49 
50 /* ======================================================================= */
51 
52 /* static data to be referenced by all empty strings
53  * the refCount is predefined to 1 and must never become 0 !
54  */
55 static rtl_uString const aImplEmpty_rtl_uString =
56 {
57     (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32    refCount; */
58     0,                                               /*sal_Int32    length;   */
59     { 0 }                                            /*sal_Unicode  buffer[1];*/
60 };
61 
62 /* ======================================================================= */
63 
64 #define IMPL_RTL_STRCODE            sal_Unicode
65 #define IMPL_RTL_USTRCODE( c )      (c)
66 #define IMPL_RTL_STRNAME( n )       rtl_ustr_ ## n
67 
68 #define IMPL_RTL_STRINGNAME( n )    rtl_uString_ ## n
69 #define IMPL_RTL_STRINGDATA         rtl_uString
70 #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_uString
71 #define IMPL_RTL_INTERN
72 static void internRelease (rtl_uString *pThis);
73 
74 /* ======================================================================= */
75 
76 /* Include String/UString template code */
77 
78 #include "strtmpl.c"
79 
80 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
81     sal_Unicode const * str, sal_Int32 len,
82     char const * subStr, sal_Int32 subLen)
83 {
84     if (subLen > 0 && subLen <= len) {
85         sal_Int32 i;
86         for (i = 0; i <= len - subLen; ++i) {
87             if (rtl_ustr_asciil_reverseEquals_WithLength(
88                     str + i, subStr, subLen))
89             {
90                 return i;
91             }
92         }
93     }
94     return -1;
95 }
96 
97 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
98     sal_Unicode const * str, sal_Int32 len,
99     char const * subStr, sal_Int32 subLen)
100 {
101     if (subLen > 0 && subLen <= len) {
102         sal_Int32 i;
103         for (i = len - subLen; i >= 0; --i) {
104             if (rtl_ustr_asciil_reverseEquals_WithLength(
105                     str + i, subStr, subLen))
106             {
107                 return i;
108             }
109         }
110     }
111     return -1;
112 }
113 
114 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
115 {
116     rtl_uString * pResult = NULL;
117     sal_Int32 nLen;
118     rtl_math_doubleToUString(
119         &pResult, 0, 0, f, rtl_math_StringFormat_G,
120         RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
121         0, sal_True);
122     nLen = pResult->length;
123     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
124     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
125     rtl_uString_release(pResult);
126     return nLen;
127 }
128 
129 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
130 {
131     rtl_uString * pResult = NULL;
132     sal_Int32 nLen;
133     rtl_math_doubleToUString(
134         &pResult, 0, 0, d, rtl_math_StringFormat_G,
135         RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
136         0, sal_True);
137     nLen = pResult->length;
138     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
139     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
140     rtl_uString_release(pResult);
141     return nLen;
142 }
143 
144 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
145 {
146     return (float) rtl_math_uStringToDouble(pStr,
147                                             pStr + rtl_ustr_getLength(pStr),
148                                             '.', 0, 0, 0);
149 }
150 
151 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
152 {
153     return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
154                                     0, 0, 0);
155 }
156 
157 /* ======================================================================= */
158 
159 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
160                                            const sal_Char* pStr2 )
161 {
162     sal_Int32 nRet;
163     while ( ((nRet = ((sal_Int32)(*pStr1))-
164                      ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
165             *pStr2 )
166     {
167         pStr1++;
168         pStr2++;
169     }
170 
171     return nRet;
172 }
173 
174 /* ----------------------------------------------------------------------- */
175 
176 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
177                                                       sal_Int32 nStr1Len,
178                                                       const sal_Char* pStr2 )
179 {
180 	sal_Int32 nRet = 0;
181     while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
182                     ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
183            nStr1Len && *pStr2 )
184     {
185         pStr1++;
186         pStr2++;
187         nStr1Len--;
188     }
189 
190     return nRet;
191 }
192 
193 /* ----------------------------------------------------------------------- */
194 
195 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
196                                                                sal_Int32 nStr1Len,
197                                                                const sal_Char* pStr2,
198                                                                sal_Int32 nShortenedLength )
199 {
200     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
201     sal_Int32           nRet;
202     while ( (nShortenedLength > 0) &&
203             (pStr1 < pStr1End) && *pStr2 )
204     {
205         /* Check ASCII range */
206         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
207 
208         nRet = ((sal_Int32)*pStr1)-
209                ((sal_Int32)(unsigned char)*pStr2);
210         if ( nRet != 0 )
211             return nRet;
212 
213         nShortenedLength--;
214         pStr1++;
215         pStr2++;
216     }
217 
218     if ( nShortenedLength <= 0 )
219         return 0;
220 
221     if ( *pStr2 )
222     {
223         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
224         // first is a substring of the second string => less (negative value)
225         nRet = -1;
226     }
227     else
228     {
229         // greater or equal
230         nRet = pStr1End - pStr1;
231     }
232 
233     return nRet;
234 }
235 
236 /* ----------------------------------------------------------------------- */
237 
238 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
239                                                               sal_Int32 nStr1Len,
240                                                               const sal_Char* pStr2,
241                                                               sal_Int32 nStr2Len )
242 {
243     const sal_Unicode*  pStr1Run = pStr1+nStr1Len;
244     const sal_Char*     pStr2Run = pStr2+nStr2Len;
245     sal_Int32           nRet;
246     while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
247     {
248         pStr1Run--;
249         pStr2Run--;
250         nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
251         if ( nRet )
252             return nRet;
253     }
254 
255     return nStr1Len - nStr2Len;
256 }
257 
258 /* ----------------------------------------------------------------------- */
259 
260 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
261                                                               const sal_Char* pStr2,
262                                                               sal_Int32 nStrLen )
263 {
264     const sal_Unicode*  pStr1Run = pStr1+nStrLen;
265     const sal_Char*     pStr2Run = pStr2+nStrLen;
266     while ( pStr1 < pStr1Run )
267     {
268         pStr1Run--;
269         pStr2Run--;
270 		if( *pStr1Run != (sal_Unicode)*pStr2Run )
271 			return sal_False;
272     }
273 
274     return sal_True;
275 }
276 
277 /* ----------------------------------------------------------------------- */
278 
279 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
280                                                           const sal_Char* pStr2 )
281 {
282     sal_Int32   nRet;
283     sal_Int32   c1;
284     sal_Int32   c2;
285     do
286     {
287         /* If character between 'A' and 'Z', than convert it to lowercase */
288         c1 = (sal_Int32)*pStr1;
289         c2 = (sal_Int32)((unsigned char)*pStr2);
290         if ( (c1 >= 65) && (c1 <= 90) )
291             c1 += 32;
292         if ( (c2 >= 65) && (c2 <= 90) )
293             c2 += 32;
294         nRet = c1-c2;
295         if ( nRet != 0 )
296             return nRet;
297 
298         pStr1++;
299         pStr2++;
300     }
301     while ( c2 );
302 
303     return 0;
304 }
305 
306 /* ----------------------------------------------------------------------- */
307 
308 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
309                                                                      sal_Int32 nStr1Len,
310                                                                      const sal_Char* pStr2 )
311 {
312     sal_Int32   nRet;
313     sal_Int32   c1;
314     sal_Int32   c2;
315     do
316     {
317         if ( !nStr1Len )
318             return *pStr2 == '\0' ? 0 : -1;
319 
320         /* If character between 'A' and 'Z', than convert it to lowercase */
321         c1 = (sal_Int32)*pStr1;
322         c2 = (sal_Int32)((unsigned char)*pStr2);
323         if ( (c1 >= 65) && (c1 <= 90) )
324             c1 += 32;
325         if ( (c2 >= 65) && (c2 <= 90) )
326             c2 += 32;
327         nRet = c1-c2;
328         if ( nRet != 0 )
329             return nRet;
330 
331         pStr1++;
332         pStr2++;
333         nStr1Len--;
334     }
335     while( c2 );
336 
337     return 0;
338 }
339 
340 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
341     sal_Unicode const * first, sal_Int32 firstLen,
342     char const * second, sal_Int32 secondLen)
343 {
344     sal_Int32 i;
345     sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
346     for (i = 0; i < len; ++i) {
347         sal_Int32 c1 = *first++;
348         sal_Int32 c2 = (unsigned char) *second++;
349         sal_Int32 d;
350         if (c1 >= 65 && c1 <= 90) {
351             c1 += 32;
352         }
353         if (c2 >= 65 && c2 <= 90) {
354             c2 += 32;
355         }
356         d = c1 - c2;
357         if (d != 0) {
358             return d;
359         }
360     }
361     return firstLen - secondLen;
362 }
363 
364 /* ----------------------------------------------------------------------- */
365 
366 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
367                                                                               sal_Int32 nStr1Len,
368                                                                               const sal_Char* pStr2,
369                                                                               sal_Int32 nShortenedLength )
370 {
371     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
372     sal_Int32           nRet;
373     sal_Int32           c1;
374     sal_Int32           c2;
375     while ( (nShortenedLength > 0) &&
376             (pStr1 < pStr1End) && *pStr2 )
377     {
378         /* Check ASCII range */
379         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
380 
381         /* If character between 'A' and 'Z', than convert it to lowercase */
382         c1 = (sal_Int32)*pStr1;
383         c2 = (sal_Int32)((unsigned char)*pStr2);
384         if ( (c1 >= 65) && (c1 <= 90) )
385             c1 += 32;
386         if ( (c2 >= 65) && (c2 <= 90) )
387             c2 += 32;
388         nRet = c1-c2;
389         if ( nRet != 0 )
390             return nRet;
391 
392         nShortenedLength--;
393         pStr1++;
394         pStr2++;
395     }
396 
397     if ( nShortenedLength <= 0 )
398         return 0;
399 
400     if ( *pStr2 )
401     {
402         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
403         // first is a substring of the second string => less (negative value)
404         nRet = -1;
405     }
406     else
407     {
408         // greater or equal
409         nRet = pStr1End - pStr1;
410     }
411 
412     return nRet;
413 }
414 
415 /* ----------------------------------------------------------------------- */
416 
417 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
418                                         const sal_Char* pCharStr )
419 {
420     sal_Int32 nLen;
421 
422     if ( pCharStr )
423     {
424         const sal_Char* pTempStr = pCharStr;
425         while( *pTempStr )
426             pTempStr++;
427         nLen = pTempStr-pCharStr;
428     }
429     else
430         nLen = 0;
431 
432     if ( !nLen )
433     {
434         IMPL_RTL_STRINGNAME( new )( ppThis );
435         return;
436     }
437 
438     if ( *ppThis )
439         IMPL_RTL_STRINGNAME( release )( *ppThis );
440 
441     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
442     OSL_ASSERT(*ppThis != NULL);
443     if ( (*ppThis) )
444     {
445         IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
446         do
447         {
448             /* Check ASCII range */
449             OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
450                         "rtl_uString_newFromAscii() - Found ASCII char > 127" );
451 
452             *pBuffer = *pCharStr;
453             pBuffer++;
454             pCharStr++;
455         }
456         while ( *pCharStr );
457     }
458 }
459 
460 void SAL_CALL rtl_uString_newFromCodePoints(
461     rtl_uString ** newString, sal_uInt32 const * codePoints,
462     sal_Int32 codePointCount)
463 {
464     sal_Int32 n;
465     sal_Int32 i;
466     sal_Unicode * p;
467     OSL_ASSERT(
468         newString != NULL &&
469         (codePoints != NULL || codePointCount == 0) &&
470         codePointCount >= 0);
471     if (codePointCount == 0) {
472         rtl_uString_new(newString);
473         return;
474     }
475     if (*newString != NULL) {
476         rtl_uString_release(*newString);
477     }
478     n = codePointCount;
479     for (i = 0; i < codePointCount; ++i) {
480         OSL_ASSERT(codePoints[i] <= 0x10FFFF);
481         if (codePoints[i] >= 0x10000) {
482             ++n;
483         }
484     }
485     /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
486        representation with wrap around (the necessary number of UTF-16 code
487        units will be no larger than 2 * SAL_MAX_INT32, represented as
488        sal_Int32 -2): */
489     if (n < 0) {
490         *newString = NULL;
491         return;
492     }
493     *newString = rtl_uString_ImplAlloc(n);
494     if (*newString == NULL) {
495         return;
496     }
497     p = (*newString)->buffer;
498     for (i = 0; i < codePointCount; ++i) {
499         sal_uInt32 c = codePoints[i];
500         if (c < 0x10000) {
501             *p++ = (sal_Unicode) c;
502         } else {
503             c -= 0x10000;
504             *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
505             *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
506         }
507     }
508 }
509 
510 /* ======================================================================= */
511 
512 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
513 {
514     int             n;
515     sal_uChar       c;
516     const sal_Char* pEndStr;
517 
518     n = 0;
519     pEndStr  = pStr+nLen;
520     while ( pStr < pEndStr )
521     {
522         c = (sal_uChar)*pStr;
523 
524         if ( !(c & 0x80) )
525             pStr++;
526         else if ( (c & 0xE0) == 0xC0 )
527             pStr += 2;
528         else if ( (c & 0xF0) == 0xE0 )
529             pStr += 3;
530         else if ( (c & 0xF8) == 0xF0 )
531             pStr += 4;
532         else if ( (c & 0xFC) == 0xF8 )
533             pStr += 5;
534         else if ( (c & 0xFE) == 0xFC )
535             pStr += 6;
536         else
537             pStr++;
538 
539         n++;
540     }
541 
542     return n;
543 }
544 
545 /* ----------------------------------------------------------------------- */
546 
547 static void rtl_string2UString_status( rtl_uString** ppThis,
548                                        const sal_Char* pStr,
549                                        sal_Int32 nLen,
550                                        rtl_TextEncoding eTextEncoding,
551                                        sal_uInt32 nCvtFlags,
552                                        sal_uInt32 *pInfo )
553 {
554     OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
555                "rtl_string2UString_status() - Wrong TextEncoding" );
556 
557     if ( !nLen )
558     {
559         rtl_uString_new( ppThis );
560         if (pInfo != NULL) {
561             *pInfo = 0;
562         }
563     }
564     else
565     {
566         if ( *ppThis )
567             IMPL_RTL_STRINGNAME( release )( *ppThis );
568 
569         /* Optimization for US-ASCII */
570         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
571         {
572             IMPL_RTL_STRCODE* pBuffer;
573             *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
574             if (*ppThis == NULL) {
575                 if (pInfo != NULL) {
576                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
577                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
578                 }
579                 return;
580             }
581             pBuffer = (*ppThis)->buffer;
582             do
583             {
584                 /* Check ASCII range */
585                 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
586                             "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
587 
588                 *pBuffer = *pStr;
589                 pBuffer++;
590                 pStr++;
591                 nLen--;
592             }
593             while ( nLen );
594             if (pInfo != NULL) {
595                 *pInfo = 0;
596             }
597         }
598         else
599         {
600             rtl_uString*                pTemp;
601             rtl_uString*                pTemp2 = NULL;
602             rtl_TextToUnicodeConverter  hConverter;
603             sal_uInt32                  nInfo;
604             sal_Size                    nSrcBytes;
605             sal_Size                    nDestChars;
606             sal_Size                    nNewLen;
607 
608             /* Optimization for UTF-8 - we try to calculate the exact length */
609             /* For all other encoding we try the maximum - and reallocate
610                the buffer if needed */
611             if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
612             {
613                 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
614                 /* Includes the string only ASCII, then we could copy
615                    the buffer faster */
616                 if ( nNewLen == (sal_Size)nLen )
617                 {
618                     IMPL_RTL_STRCODE* pBuffer;
619                     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
620                     if (*ppThis == NULL)
621                     {
622                         if (pInfo != NULL) {
623                             *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
624                                 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
625                         }
626                         return;
627                     }
628                     pBuffer = (*ppThis)->buffer;
629                     do
630                     {
631                         /* Check ASCII range */
632                         OSL_ENSURE( ((unsigned char)*pStr) <= 127,
633                                     "rtl_string2UString_status() - UTF8 test encoding is wrong" );
634 
635                         *pBuffer = *pStr;
636                         pBuffer++;
637                         pStr++;
638                         nLen--;
639                     }
640                     while ( nLen );
641                     if (pInfo != NULL) {
642                         *pInfo = 0;
643                     }
644                     return;
645                 }
646             }
647             else
648                 nNewLen = nLen;
649 
650             nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
651             hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
652 
653             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
654             if (pTemp == NULL) {
655                 if (pInfo != NULL) {
656                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
657                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
658                 }
659                 return;
660             }
661             nDestChars = rtl_convertTextToUnicode( hConverter, 0,
662                                                    pStr, nLen,
663                                                    pTemp->buffer, nNewLen,
664                                                    nCvtFlags,
665                                                    &nInfo, &nSrcBytes );
666 
667             /* Buffer not big enough, try again with enough space */
668             /* Shouldn't be the case, but if we get textencoding which
669                could results in more unicode characters we have this
670                code here. Could be the case for apple encodings */
671             while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
672             {
673                 rtl_freeMemory( pTemp );
674                 nNewLen += 8;
675                 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
676                 if (pTemp == NULL) {
677                     if (pInfo != NULL) {
678                         *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
679                             RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
680                     }
681                     return;
682                 }
683                 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
684                                                        pStr, nLen,
685                                                        pTemp->buffer, nNewLen,
686                                                        nCvtFlags,
687                                                        &nInfo, &nSrcBytes );
688             }
689 
690             if (pInfo)
691                 *pInfo = nInfo;
692 
693             /* Set the buffer to the correct size or if there is too
694                much overhead, reallocate to the correct size */
695             if ( nNewLen > nDestChars+8 )
696             {
697                 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
698             }
699             if (pTemp2 != NULL)
700             {
701                 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
702                 rtl_freeMemory(pTemp);
703                 pTemp = pTemp2;
704             }
705             else
706             {
707                 pTemp->length = nDestChars;
708                 pTemp->buffer[nDestChars] = 0;
709             }
710 
711             rtl_destroyTextToUnicodeConverter( hConverter );
712             *ppThis = pTemp;
713 
714             /* Results the conversion in an empty buffer -
715                create an empty string */
716             if ( pTemp && !nDestChars )
717                 rtl_uString_new( ppThis );
718         }
719     }
720 }
721 
722 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
723                                   const sal_Char* pStr,
724                                   sal_Int32 nLen,
725                                   rtl_TextEncoding eTextEncoding,
726                                   sal_uInt32 nCvtFlags )
727 {
728     rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
729                                nCvtFlags, NULL );
730 }
731 
732 /* ----------------------------------------------------------------------- */
733 
734 typedef enum {
735     CANNOT_RETURN,
736     CAN_RETURN = 1
737 } StrLifecycle;
738 
739 static oslMutex
740 getInternMutex()
741 {
742     static oslMutex pPoolGuard = NULL;
743     if( !pPoolGuard )
744     {
745         oslMutex pGlobalGuard;
746         pGlobalGuard = *osl_getGlobalMutex();
747         osl_acquireMutex( pGlobalGuard );
748         if( !pPoolGuard )
749         {
750             oslMutex p = osl_createMutex();
751             OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
752             pPoolGuard = p;
753         }
754         osl_releaseMutex( pGlobalGuard );
755     }
756     else
757     {
758         OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
759     }
760 
761     return pPoolGuard;
762 }
763 
764 /* returns true if we found a dup in the pool */
765 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
766                                          rtl_uString  * str,
767                                          StrLifecycle   can_return )
768 {
769     oslMutex pPoolMutex;
770 
771     pPoolMutex = getInternMutex();
772 
773     osl_acquireMutex( pPoolMutex );
774 
775     *newStr = rtl_str_hash_intern (str, can_return);
776 
777     osl_releaseMutex( pPoolMutex );
778 
779     if( can_return && *newStr != str )
780     { /* we dupped, then found a match */
781         rtl_freeMemory( str );
782     }
783 }
784 
785 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
786                                   rtl_uString  * str)
787 {
788     if (SAL_STRING_IS_INTERN(str))
789     {
790         IMPL_RTL_AQUIRE( str );
791         *newStr = str;
792     }
793     else
794     {
795         rtl_uString *pOrg = *newStr;
796         *newStr = NULL;
797         rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
798         if (pOrg)
799             rtl_uString_release (pOrg);
800     }
801 }
802 
803 void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
804                                          const sal_Char * str,
805                                          sal_Int32        len,
806                                          rtl_TextEncoding eTextEncoding,
807                                          sal_uInt32       convertFlags,
808                                          sal_uInt32     * pInfo )
809 {
810     rtl_uString *scratch;
811 
812     if (*newStr)
813     {
814         rtl_uString_release (*newStr);
815         *newStr = NULL;
816     }
817 
818     if ( len < 256 )
819     { // try various optimisations
820         if ( len < 0 )
821             len = strlen( str );
822         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
823         {
824             int i;
825             rtl_uString *pScratch;
826             pScratch = alloca( sizeof( rtl_uString )
827                                + len * sizeof (IMPL_RTL_STRCODE ) );
828             for (i = 0; i < len; i++)
829             {
830                 /* Check ASCII range */
831                 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
832                             "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
833                 pScratch->buffer[i] = str[i];
834             }
835             pScratch->length = len;
836             rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
837             return;
838         }
839         /* FIXME: we want a nice UTF-8 / alloca shortcut here */
840     }
841 
842     scratch = NULL;
843     rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
844                                pInfo );
845     if (!scratch) {
846         return;
847     }
848     rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
849 }
850 
851 static void
852 internRelease (rtl_uString *pThis)
853 {
854     oslMutex pPoolMutex;
855 
856     rtl_uString *pFree = NULL;
857     if ( SAL_STRING_REFCOUNT(
858              osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
859     {
860         pPoolMutex = getInternMutex();
861         osl_acquireMutex( pPoolMutex );
862 
863         rtl_str_hash_remove (pThis);
864 
865         /* May have been separately acquired */
866         if ( SAL_STRING_REFCOUNT(
867                  osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
868         {
869             /* we got the last ref */
870             pFree = pThis;
871         }
872         else /* very unusual */
873         {
874             internRelease (pThis);
875         }
876 
877         osl_releaseMutex( pPoolMutex );
878     }
879     if (pFree)
880         rtl_freeMemory (pFree);
881 }
882 
883 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
884     rtl_uString const * string, sal_Int32 * indexUtf16,
885     sal_Int32 incrementCodePoints)
886 {
887     sal_Int32 n;
888     sal_Unicode cu;
889     sal_uInt32 cp;
890     OSL_ASSERT(string != NULL && indexUtf16 != NULL);
891     n = *indexUtf16;
892     OSL_ASSERT(n >= 0 && n <= string->length);
893     while (incrementCodePoints < 0) {
894         OSL_ASSERT(n > 0);
895         cu = string->buffer[--n];
896         if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
897             SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
898         {
899             --n;
900         }
901         ++incrementCodePoints;
902     }
903     OSL_ASSERT(n >= 0 && n < string->length);
904     cu = string->buffer[n];
905     if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
906         SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
907     {
908         cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
909     } else {
910         cp = cu;
911     }
912     while (incrementCodePoints > 0) {
913         OSL_ASSERT(n < string->length);
914         cu = string->buffer[n++];
915         if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
916             SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
917         {
918             ++n;
919         }
920         --incrementCodePoints;
921     }
922     OSL_ASSERT(n >= 0 && n <= string->length);
923     *indexUtf16 = n;
924     return cp;
925 }
926 
927 sal_Bool rtl_convertStringToUString(
928     rtl_uString ** target, char const * source, sal_Int32 length,
929     rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
930 {
931     sal_uInt32 info;
932     rtl_string2UString_status(target, source, length, encoding, flags, &info);
933     return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
934 }
935