xref: /aoo41x/main/sal/rtl/source/ustring.c (revision 647f063d)
1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*647f063dSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*647f063dSAndrew Rist  * distributed with this work for additional information
6*647f063dSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*647f063dSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist  * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*647f063dSAndrew Rist  *
11*647f063dSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*647f063dSAndrew Rist  *
13*647f063dSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist  * software distributed under the License is distributed on an
15*647f063dSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist  * KIND, either express or implied.  See the License for the
17*647f063dSAndrew Rist  * specific language governing permissions and limitations
18*647f063dSAndrew Rist  * under the License.
19*647f063dSAndrew Rist  *
20*647f063dSAndrew Rist  *************************************************************/
21*647f063dSAndrew Rist 
22*647f063dSAndrew Rist 
23cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25cdf0e10cSrcweir #endif
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <rtl/memory.h>
28cdf0e10cSrcweir #include <osl/diagnose.h>
29cdf0e10cSrcweir #include <osl/interlck.h>
30cdf0e10cSrcweir #include <rtl/alloc.h>
31cdf0e10cSrcweir #include <osl/mutex.h>
32cdf0e10cSrcweir #include <osl/doublecheckedlocking.h>
33cdf0e10cSrcweir #include <rtl/tencinfo.h>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir #include <string.h>
36cdf0e10cSrcweir #include <sal/alloca.h>
37cdf0e10cSrcweir 
38cdf0e10cSrcweir #include "hash.h"
39cdf0e10cSrcweir #include "strimp.h"
40cdf0e10cSrcweir #include "surrogates.h"
41cdf0e10cSrcweir #include <rtl/ustring.h>
42cdf0e10cSrcweir 
43cdf0e10cSrcweir #include "rtl/math.h"
44cdf0e10cSrcweir #include "rtl/tencinfo.h"
45cdf0e10cSrcweir 
46cdf0e10cSrcweir /* ======================================================================= */
47cdf0e10cSrcweir 
48cdf0e10cSrcweir /* static data to be referenced by all empty strings
49cdf0e10cSrcweir  * the refCount is predefined to 1 and must never become 0 !
50cdf0e10cSrcweir  */
51cdf0e10cSrcweir static rtl_uString const aImplEmpty_rtl_uString =
52cdf0e10cSrcweir {
53cdf0e10cSrcweir     (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32    refCount; */
54cdf0e10cSrcweir     0,                                               /*sal_Int32    length;   */
55cdf0e10cSrcweir     { 0 }                                            /*sal_Unicode  buffer[1];*/
56cdf0e10cSrcweir };
57cdf0e10cSrcweir 
58cdf0e10cSrcweir /* ======================================================================= */
59cdf0e10cSrcweir 
60cdf0e10cSrcweir #define IMPL_RTL_STRCODE            sal_Unicode
61cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c )      (c)
62cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n )       rtl_ustr_ ## n
63cdf0e10cSrcweir 
64cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n )    rtl_uString_ ## n
65cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA         rtl_uString
66cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_uString
67cdf0e10cSrcweir #define IMPL_RTL_INTERN
68cdf0e10cSrcweir static void internRelease (rtl_uString *pThis);
69cdf0e10cSrcweir 
70cdf0e10cSrcweir /* ======================================================================= */
71cdf0e10cSrcweir 
72cdf0e10cSrcweir /* Include String/UString template code */
73cdf0e10cSrcweir 
74cdf0e10cSrcweir #include "strtmpl.c"
75cdf0e10cSrcweir 
rtl_ustr_indexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)76cdf0e10cSrcweir sal_Int32 rtl_ustr_indexOfAscii_WithLength(
77cdf0e10cSrcweir     sal_Unicode const * str, sal_Int32 len,
78cdf0e10cSrcweir     char const * subStr, sal_Int32 subLen)
79cdf0e10cSrcweir {
80cdf0e10cSrcweir     if (subLen > 0 && subLen <= len) {
81cdf0e10cSrcweir         sal_Int32 i;
82cdf0e10cSrcweir         for (i = 0; i <= len - subLen; ++i) {
83cdf0e10cSrcweir             if (rtl_ustr_asciil_reverseEquals_WithLength(
84cdf0e10cSrcweir                     str + i, subStr, subLen))
85cdf0e10cSrcweir             {
86cdf0e10cSrcweir                 return i;
87cdf0e10cSrcweir             }
88cdf0e10cSrcweir         }
89cdf0e10cSrcweir     }
90cdf0e10cSrcweir     return -1;
91cdf0e10cSrcweir }
92cdf0e10cSrcweir 
rtl_ustr_lastIndexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)93cdf0e10cSrcweir sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
94cdf0e10cSrcweir     sal_Unicode const * str, sal_Int32 len,
95cdf0e10cSrcweir     char const * subStr, sal_Int32 subLen)
96cdf0e10cSrcweir {
97cdf0e10cSrcweir     if (subLen > 0 && subLen <= len) {
98cdf0e10cSrcweir         sal_Int32 i;
99cdf0e10cSrcweir         for (i = len - subLen; i >= 0; --i) {
100cdf0e10cSrcweir             if (rtl_ustr_asciil_reverseEquals_WithLength(
101cdf0e10cSrcweir                     str + i, subStr, subLen))
102cdf0e10cSrcweir             {
103cdf0e10cSrcweir                 return i;
104cdf0e10cSrcweir             }
105cdf0e10cSrcweir         }
106cdf0e10cSrcweir     }
107cdf0e10cSrcweir     return -1;
108cdf0e10cSrcweir }
109cdf0e10cSrcweir 
rtl_ustr_valueOfFloat(sal_Unicode * pStr,float f)110cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
111cdf0e10cSrcweir {
112cdf0e10cSrcweir     rtl_uString * pResult = NULL;
113cdf0e10cSrcweir     sal_Int32 nLen;
114cdf0e10cSrcweir     rtl_math_doubleToUString(
115cdf0e10cSrcweir         &pResult, 0, 0, f, rtl_math_StringFormat_G,
116cdf0e10cSrcweir         RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
117cdf0e10cSrcweir         0, sal_True);
118cdf0e10cSrcweir     nLen = pResult->length;
119cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
120cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
121cdf0e10cSrcweir     rtl_uString_release(pResult);
122cdf0e10cSrcweir     return nLen;
123cdf0e10cSrcweir }
124cdf0e10cSrcweir 
rtl_ustr_valueOfDouble(sal_Unicode * pStr,double d)125cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
126cdf0e10cSrcweir {
127cdf0e10cSrcweir     rtl_uString * pResult = NULL;
128cdf0e10cSrcweir     sal_Int32 nLen;
129cdf0e10cSrcweir     rtl_math_doubleToUString(
130cdf0e10cSrcweir         &pResult, 0, 0, d, rtl_math_StringFormat_G,
131cdf0e10cSrcweir         RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
132cdf0e10cSrcweir         0, sal_True);
133cdf0e10cSrcweir     nLen = pResult->length;
134cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
135cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
136cdf0e10cSrcweir     rtl_uString_release(pResult);
137cdf0e10cSrcweir     return nLen;
138cdf0e10cSrcweir }
139cdf0e10cSrcweir 
rtl_ustr_toFloat(sal_Unicode const * pStr)140cdf0e10cSrcweir float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
141cdf0e10cSrcweir {
142cdf0e10cSrcweir     return (float) rtl_math_uStringToDouble(pStr,
143cdf0e10cSrcweir                                             pStr + rtl_ustr_getLength(pStr),
144cdf0e10cSrcweir                                             '.', 0, 0, 0);
145cdf0e10cSrcweir }
146cdf0e10cSrcweir 
rtl_ustr_toDouble(sal_Unicode const * pStr)147cdf0e10cSrcweir double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
148cdf0e10cSrcweir {
149cdf0e10cSrcweir     return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
150cdf0e10cSrcweir                                     0, 0, 0);
151cdf0e10cSrcweir }
152cdf0e10cSrcweir 
153cdf0e10cSrcweir /* ======================================================================= */
154cdf0e10cSrcweir 
rtl_ustr_ascii_compare(const sal_Unicode * pStr1,const sal_Char * pStr2)155cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
156cdf0e10cSrcweir                                            const sal_Char* pStr2 )
157cdf0e10cSrcweir {
158cdf0e10cSrcweir     sal_Int32 nRet;
159cdf0e10cSrcweir     while ( ((nRet = ((sal_Int32)(*pStr1))-
160cdf0e10cSrcweir                      ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
161cdf0e10cSrcweir             *pStr2 )
162cdf0e10cSrcweir     {
163cdf0e10cSrcweir         pStr1++;
164cdf0e10cSrcweir         pStr2++;
165cdf0e10cSrcweir     }
166cdf0e10cSrcweir 
167cdf0e10cSrcweir     return nRet;
168cdf0e10cSrcweir }
169cdf0e10cSrcweir 
170cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
171cdf0e10cSrcweir 
rtl_ustr_ascii_compare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)172cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
173cdf0e10cSrcweir                                                       sal_Int32 nStr1Len,
174cdf0e10cSrcweir                                                       const sal_Char* pStr2 )
175cdf0e10cSrcweir {
176cdf0e10cSrcweir 	sal_Int32 nRet = 0;
177cdf0e10cSrcweir     while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
178cdf0e10cSrcweir                     ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
179cdf0e10cSrcweir            nStr1Len && *pStr2 )
180cdf0e10cSrcweir     {
181cdf0e10cSrcweir         pStr1++;
182cdf0e10cSrcweir         pStr2++;
183cdf0e10cSrcweir         nStr1Len--;
184cdf0e10cSrcweir     }
185cdf0e10cSrcweir 
186cdf0e10cSrcweir     return nRet;
187cdf0e10cSrcweir }
188cdf0e10cSrcweir 
189cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
190cdf0e10cSrcweir 
rtl_ustr_ascii_shortenedCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)191cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
192cdf0e10cSrcweir                                                                sal_Int32 nStr1Len,
193cdf0e10cSrcweir                                                                const sal_Char* pStr2,
194cdf0e10cSrcweir                                                                sal_Int32 nShortenedLength )
195cdf0e10cSrcweir {
196cdf0e10cSrcweir     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
197cdf0e10cSrcweir     sal_Int32           nRet;
198cdf0e10cSrcweir     while ( (nShortenedLength > 0) &&
199cdf0e10cSrcweir             (pStr1 < pStr1End) && *pStr2 )
200cdf0e10cSrcweir     {
201cdf0e10cSrcweir         /* Check ASCII range */
202cdf0e10cSrcweir         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
203cdf0e10cSrcweir 
204cdf0e10cSrcweir         nRet = ((sal_Int32)*pStr1)-
205cdf0e10cSrcweir                ((sal_Int32)(unsigned char)*pStr2);
206cdf0e10cSrcweir         if ( nRet != 0 )
207cdf0e10cSrcweir             return nRet;
208cdf0e10cSrcweir 
209cdf0e10cSrcweir         nShortenedLength--;
210cdf0e10cSrcweir         pStr1++;
211cdf0e10cSrcweir         pStr2++;
212cdf0e10cSrcweir     }
213cdf0e10cSrcweir 
214cdf0e10cSrcweir     if ( nShortenedLength <= 0 )
215cdf0e10cSrcweir         return 0;
216cdf0e10cSrcweir 
217cdf0e10cSrcweir     if ( *pStr2 )
218cdf0e10cSrcweir     {
219cdf0e10cSrcweir         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
220cdf0e10cSrcweir         // first is a substring of the second string => less (negative value)
221cdf0e10cSrcweir         nRet = -1;
222cdf0e10cSrcweir     }
223cdf0e10cSrcweir     else
224cdf0e10cSrcweir     {
225cdf0e10cSrcweir         // greater or equal
226cdf0e10cSrcweir         nRet = pStr1End - pStr1;
227cdf0e10cSrcweir     }
228cdf0e10cSrcweir 
229cdf0e10cSrcweir     return nRet;
230cdf0e10cSrcweir }
231cdf0e10cSrcweir 
232cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
233cdf0e10cSrcweir 
rtl_ustr_asciil_reverseCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nStr2Len)234cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
235cdf0e10cSrcweir                                                               sal_Int32 nStr1Len,
236cdf0e10cSrcweir                                                               const sal_Char* pStr2,
237cdf0e10cSrcweir                                                               sal_Int32 nStr2Len )
238cdf0e10cSrcweir {
239cdf0e10cSrcweir     const sal_Unicode*  pStr1Run = pStr1+nStr1Len;
240cdf0e10cSrcweir     const sal_Char*     pStr2Run = pStr2+nStr2Len;
241cdf0e10cSrcweir     sal_Int32           nRet;
242cdf0e10cSrcweir     while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
243cdf0e10cSrcweir     {
244cdf0e10cSrcweir         pStr1Run--;
245cdf0e10cSrcweir         pStr2Run--;
246cdf0e10cSrcweir         nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
247cdf0e10cSrcweir         if ( nRet )
248cdf0e10cSrcweir             return nRet;
249cdf0e10cSrcweir     }
250cdf0e10cSrcweir 
251cdf0e10cSrcweir     return nStr1Len - nStr2Len;
252cdf0e10cSrcweir }
253cdf0e10cSrcweir 
254cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
255cdf0e10cSrcweir 
rtl_ustr_asciil_reverseEquals_WithLength(const sal_Unicode * pStr1,const sal_Char * pStr2,sal_Int32 nStrLen)256cdf0e10cSrcweir sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
257cdf0e10cSrcweir                                                               const sal_Char* pStr2,
258cdf0e10cSrcweir                                                               sal_Int32 nStrLen )
259cdf0e10cSrcweir {
260cdf0e10cSrcweir     const sal_Unicode*  pStr1Run = pStr1+nStrLen;
261cdf0e10cSrcweir     const sal_Char*     pStr2Run = pStr2+nStrLen;
262cdf0e10cSrcweir     while ( pStr1 < pStr1Run )
263cdf0e10cSrcweir     {
264cdf0e10cSrcweir         pStr1Run--;
265cdf0e10cSrcweir         pStr2Run--;
266cdf0e10cSrcweir 		if( *pStr1Run != (sal_Unicode)*pStr2Run )
267cdf0e10cSrcweir 			return sal_False;
268cdf0e10cSrcweir     }
269cdf0e10cSrcweir 
270cdf0e10cSrcweir     return sal_True;
271cdf0e10cSrcweir }
272cdf0e10cSrcweir 
273cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
274cdf0e10cSrcweir 
rtl_ustr_ascii_compareIgnoreAsciiCase(const sal_Unicode * pStr1,const sal_Char * pStr2)275cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
276cdf0e10cSrcweir                                                           const sal_Char* pStr2 )
277cdf0e10cSrcweir {
278cdf0e10cSrcweir     sal_Int32   nRet;
279cdf0e10cSrcweir     sal_Int32   c1;
280cdf0e10cSrcweir     sal_Int32   c2;
281cdf0e10cSrcweir     do
282cdf0e10cSrcweir     {
283cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
284cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
285cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
286cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
287cdf0e10cSrcweir             c1 += 32;
288cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
289cdf0e10cSrcweir             c2 += 32;
290cdf0e10cSrcweir         nRet = c1-c2;
291cdf0e10cSrcweir         if ( nRet != 0 )
292cdf0e10cSrcweir             return nRet;
293cdf0e10cSrcweir 
294cdf0e10cSrcweir         pStr1++;
295cdf0e10cSrcweir         pStr2++;
296cdf0e10cSrcweir     }
297cdf0e10cSrcweir     while ( c2 );
298cdf0e10cSrcweir 
299cdf0e10cSrcweir     return 0;
300cdf0e10cSrcweir }
301cdf0e10cSrcweir 
302cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
303cdf0e10cSrcweir 
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)304cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
305cdf0e10cSrcweir                                                                      sal_Int32 nStr1Len,
306cdf0e10cSrcweir                                                                      const sal_Char* pStr2 )
307cdf0e10cSrcweir {
308cdf0e10cSrcweir     sal_Int32   nRet;
309cdf0e10cSrcweir     sal_Int32   c1;
310cdf0e10cSrcweir     sal_Int32   c2;
311cdf0e10cSrcweir     do
312cdf0e10cSrcweir     {
313cdf0e10cSrcweir         if ( !nStr1Len )
314cdf0e10cSrcweir             return *pStr2 == '\0' ? 0 : -1;
315cdf0e10cSrcweir 
316cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
317cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
318cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
319cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
320cdf0e10cSrcweir             c1 += 32;
321cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
322cdf0e10cSrcweir             c2 += 32;
323cdf0e10cSrcweir         nRet = c1-c2;
324cdf0e10cSrcweir         if ( nRet != 0 )
325cdf0e10cSrcweir             return nRet;
326cdf0e10cSrcweir 
327cdf0e10cSrcweir         pStr1++;
328cdf0e10cSrcweir         pStr2++;
329cdf0e10cSrcweir         nStr1Len--;
330cdf0e10cSrcweir     }
331cdf0e10cSrcweir     while( c2 );
332cdf0e10cSrcweir 
333cdf0e10cSrcweir     return 0;
334cdf0e10cSrcweir }
335cdf0e10cSrcweir 
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(sal_Unicode const * first,sal_Int32 firstLen,char const * second,sal_Int32 secondLen)336cdf0e10cSrcweir sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
337cdf0e10cSrcweir     sal_Unicode const * first, sal_Int32 firstLen,
338cdf0e10cSrcweir     char const * second, sal_Int32 secondLen)
339cdf0e10cSrcweir {
340cdf0e10cSrcweir     sal_Int32 i;
341cdf0e10cSrcweir     sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
342cdf0e10cSrcweir     for (i = 0; i < len; ++i) {
343cdf0e10cSrcweir         sal_Int32 c1 = *first++;
344cdf0e10cSrcweir         sal_Int32 c2 = (unsigned char) *second++;
345cdf0e10cSrcweir         sal_Int32 d;
346cdf0e10cSrcweir         if (c1 >= 65 && c1 <= 90) {
347cdf0e10cSrcweir             c1 += 32;
348cdf0e10cSrcweir         }
349cdf0e10cSrcweir         if (c2 >= 65 && c2 <= 90) {
350cdf0e10cSrcweir             c2 += 32;
351cdf0e10cSrcweir         }
352cdf0e10cSrcweir         d = c1 - c2;
353cdf0e10cSrcweir         if (d != 0) {
354cdf0e10cSrcweir             return d;
355cdf0e10cSrcweir         }
356cdf0e10cSrcweir     }
357cdf0e10cSrcweir     return firstLen - secondLen;
358cdf0e10cSrcweir }
359cdf0e10cSrcweir 
360cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
361cdf0e10cSrcweir 
rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)362cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
363cdf0e10cSrcweir                                                                               sal_Int32 nStr1Len,
364cdf0e10cSrcweir                                                                               const sal_Char* pStr2,
365cdf0e10cSrcweir                                                                               sal_Int32 nShortenedLength )
366cdf0e10cSrcweir {
367cdf0e10cSrcweir     const sal_Unicode*  pStr1End = pStr1 + nStr1Len;
368cdf0e10cSrcweir     sal_Int32           nRet;
369cdf0e10cSrcweir     sal_Int32           c1;
370cdf0e10cSrcweir     sal_Int32           c2;
371cdf0e10cSrcweir     while ( (nShortenedLength > 0) &&
372cdf0e10cSrcweir             (pStr1 < pStr1End) && *pStr2 )
373cdf0e10cSrcweir     {
374cdf0e10cSrcweir         /* Check ASCII range */
375cdf0e10cSrcweir         OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
376cdf0e10cSrcweir 
377cdf0e10cSrcweir         /* If character between 'A' and 'Z', than convert it to lowercase */
378cdf0e10cSrcweir         c1 = (sal_Int32)*pStr1;
379cdf0e10cSrcweir         c2 = (sal_Int32)((unsigned char)*pStr2);
380cdf0e10cSrcweir         if ( (c1 >= 65) && (c1 <= 90) )
381cdf0e10cSrcweir             c1 += 32;
382cdf0e10cSrcweir         if ( (c2 >= 65) && (c2 <= 90) )
383cdf0e10cSrcweir             c2 += 32;
384cdf0e10cSrcweir         nRet = c1-c2;
385cdf0e10cSrcweir         if ( nRet != 0 )
386cdf0e10cSrcweir             return nRet;
387cdf0e10cSrcweir 
388cdf0e10cSrcweir         nShortenedLength--;
389cdf0e10cSrcweir         pStr1++;
390cdf0e10cSrcweir         pStr2++;
391cdf0e10cSrcweir     }
392cdf0e10cSrcweir 
393cdf0e10cSrcweir     if ( nShortenedLength <= 0 )
394cdf0e10cSrcweir         return 0;
395cdf0e10cSrcweir 
396cdf0e10cSrcweir     if ( *pStr2 )
397cdf0e10cSrcweir     {
398cdf0e10cSrcweir         OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
399cdf0e10cSrcweir         // first is a substring of the second string => less (negative value)
400cdf0e10cSrcweir         nRet = -1;
401cdf0e10cSrcweir     }
402cdf0e10cSrcweir     else
403cdf0e10cSrcweir     {
404cdf0e10cSrcweir         // greater or equal
405cdf0e10cSrcweir         nRet = pStr1End - pStr1;
406cdf0e10cSrcweir     }
407cdf0e10cSrcweir 
408cdf0e10cSrcweir     return nRet;
409cdf0e10cSrcweir }
410cdf0e10cSrcweir 
411cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
412cdf0e10cSrcweir 
rtl_uString_newFromAscii(rtl_uString ** ppThis,const sal_Char * pCharStr)413cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
414cdf0e10cSrcweir                                         const sal_Char* pCharStr )
415cdf0e10cSrcweir {
416cdf0e10cSrcweir     sal_Int32 nLen;
417cdf0e10cSrcweir 
418cdf0e10cSrcweir     if ( pCharStr )
419cdf0e10cSrcweir     {
420cdf0e10cSrcweir         const sal_Char* pTempStr = pCharStr;
421cdf0e10cSrcweir         while( *pTempStr )
422cdf0e10cSrcweir             pTempStr++;
423cdf0e10cSrcweir         nLen = pTempStr-pCharStr;
424cdf0e10cSrcweir     }
425cdf0e10cSrcweir     else
426cdf0e10cSrcweir         nLen = 0;
427cdf0e10cSrcweir 
428cdf0e10cSrcweir     if ( !nLen )
429cdf0e10cSrcweir     {
430cdf0e10cSrcweir         IMPL_RTL_STRINGNAME( new )( ppThis );
431cdf0e10cSrcweir         return;
432cdf0e10cSrcweir     }
433cdf0e10cSrcweir 
434cdf0e10cSrcweir     if ( *ppThis )
435cdf0e10cSrcweir         IMPL_RTL_STRINGNAME( release )( *ppThis );
436cdf0e10cSrcweir 
437cdf0e10cSrcweir     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
438cdf0e10cSrcweir     OSL_ASSERT(*ppThis != NULL);
439cdf0e10cSrcweir     if ( (*ppThis) )
440cdf0e10cSrcweir     {
441cdf0e10cSrcweir         IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
442cdf0e10cSrcweir         do
443cdf0e10cSrcweir         {
444cdf0e10cSrcweir             /* Check ASCII range */
445cdf0e10cSrcweir             OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
446cdf0e10cSrcweir                         "rtl_uString_newFromAscii() - Found ASCII char > 127" );
447cdf0e10cSrcweir 
448cdf0e10cSrcweir             *pBuffer = *pCharStr;
449cdf0e10cSrcweir             pBuffer++;
450cdf0e10cSrcweir             pCharStr++;
451cdf0e10cSrcweir         }
452cdf0e10cSrcweir         while ( *pCharStr );
453cdf0e10cSrcweir     }
454cdf0e10cSrcweir }
455cdf0e10cSrcweir 
rtl_uString_newFromCodePoints(rtl_uString ** newString,sal_uInt32 const * codePoints,sal_Int32 codePointCount)456cdf0e10cSrcweir void SAL_CALL rtl_uString_newFromCodePoints(
457cdf0e10cSrcweir     rtl_uString ** newString, sal_uInt32 const * codePoints,
458cdf0e10cSrcweir     sal_Int32 codePointCount)
459cdf0e10cSrcweir {
460cdf0e10cSrcweir     sal_Int32 n;
461cdf0e10cSrcweir     sal_Int32 i;
462cdf0e10cSrcweir     sal_Unicode * p;
463cdf0e10cSrcweir     OSL_ASSERT(
464cdf0e10cSrcweir         newString != NULL &&
465cdf0e10cSrcweir         (codePoints != NULL || codePointCount == 0) &&
466cdf0e10cSrcweir         codePointCount >= 0);
467cdf0e10cSrcweir     if (codePointCount == 0) {
468cdf0e10cSrcweir         rtl_uString_new(newString);
469cdf0e10cSrcweir         return;
470cdf0e10cSrcweir     }
471cdf0e10cSrcweir     if (*newString != NULL) {
472cdf0e10cSrcweir         rtl_uString_release(*newString);
473cdf0e10cSrcweir     }
474cdf0e10cSrcweir     n = codePointCount;
475cdf0e10cSrcweir     for (i = 0; i < codePointCount; ++i) {
476cdf0e10cSrcweir         OSL_ASSERT(codePoints[i] <= 0x10FFFF);
477cdf0e10cSrcweir         if (codePoints[i] >= 0x10000) {
478cdf0e10cSrcweir             ++n;
479cdf0e10cSrcweir         }
480cdf0e10cSrcweir     }
481cdf0e10cSrcweir     /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
482cdf0e10cSrcweir        representation with wrap around (the necessary number of UTF-16 code
483cdf0e10cSrcweir        units will be no larger than 2 * SAL_MAX_INT32, represented as
484cdf0e10cSrcweir        sal_Int32 -2): */
485cdf0e10cSrcweir     if (n < 0) {
486cdf0e10cSrcweir         *newString = NULL;
487cdf0e10cSrcweir         return;
488cdf0e10cSrcweir     }
489cdf0e10cSrcweir     *newString = rtl_uString_ImplAlloc(n);
490cdf0e10cSrcweir     if (*newString == NULL) {
491cdf0e10cSrcweir         return;
492cdf0e10cSrcweir     }
493cdf0e10cSrcweir     p = (*newString)->buffer;
494cdf0e10cSrcweir     for (i = 0; i < codePointCount; ++i) {
495cdf0e10cSrcweir         sal_uInt32 c = codePoints[i];
496cdf0e10cSrcweir         if (c < 0x10000) {
497cdf0e10cSrcweir             *p++ = (sal_Unicode) c;
498cdf0e10cSrcweir         } else {
499cdf0e10cSrcweir             c -= 0x10000;
500cdf0e10cSrcweir             *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
501cdf0e10cSrcweir             *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
502cdf0e10cSrcweir         }
503cdf0e10cSrcweir     }
504cdf0e10cSrcweir }
505cdf0e10cSrcweir 
506cdf0e10cSrcweir /* ======================================================================= */
507cdf0e10cSrcweir 
rtl_ImplGetFastUTF8UnicodeLen(const sal_Char * pStr,sal_Int32 nLen)508cdf0e10cSrcweir static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
509cdf0e10cSrcweir {
510cdf0e10cSrcweir     int             n;
511cdf0e10cSrcweir     sal_uChar       c;
512cdf0e10cSrcweir     const sal_Char* pEndStr;
513cdf0e10cSrcweir 
514cdf0e10cSrcweir     n = 0;
515cdf0e10cSrcweir     pEndStr  = pStr+nLen;
516cdf0e10cSrcweir     while ( pStr < pEndStr )
517cdf0e10cSrcweir     {
518cdf0e10cSrcweir         c = (sal_uChar)*pStr;
519cdf0e10cSrcweir 
520cdf0e10cSrcweir         if ( !(c & 0x80) )
521cdf0e10cSrcweir             pStr++;
522cdf0e10cSrcweir         else if ( (c & 0xE0) == 0xC0 )
523cdf0e10cSrcweir             pStr += 2;
524cdf0e10cSrcweir         else if ( (c & 0xF0) == 0xE0 )
525cdf0e10cSrcweir             pStr += 3;
526cdf0e10cSrcweir         else if ( (c & 0xF8) == 0xF0 )
527cdf0e10cSrcweir             pStr += 4;
528cdf0e10cSrcweir         else if ( (c & 0xFC) == 0xF8 )
529cdf0e10cSrcweir             pStr += 5;
530cdf0e10cSrcweir         else if ( (c & 0xFE) == 0xFC )
531cdf0e10cSrcweir             pStr += 6;
532cdf0e10cSrcweir         else
533cdf0e10cSrcweir             pStr++;
534cdf0e10cSrcweir 
535cdf0e10cSrcweir         n++;
536cdf0e10cSrcweir     }
537cdf0e10cSrcweir 
538cdf0e10cSrcweir     return n;
539cdf0e10cSrcweir }
540cdf0e10cSrcweir 
541cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
542cdf0e10cSrcweir 
rtl_string2UString_status(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags,sal_uInt32 * pInfo)543cdf0e10cSrcweir static void rtl_string2UString_status( rtl_uString** ppThis,
544cdf0e10cSrcweir                                        const sal_Char* pStr,
545cdf0e10cSrcweir                                        sal_Int32 nLen,
546cdf0e10cSrcweir                                        rtl_TextEncoding eTextEncoding,
547cdf0e10cSrcweir                                        sal_uInt32 nCvtFlags,
548cdf0e10cSrcweir                                        sal_uInt32 *pInfo )
549cdf0e10cSrcweir {
550cdf0e10cSrcweir     OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
551cdf0e10cSrcweir                "rtl_string2UString_status() - Wrong TextEncoding" );
552cdf0e10cSrcweir 
553cdf0e10cSrcweir     if ( !nLen )
554cdf0e10cSrcweir     {
555cdf0e10cSrcweir         rtl_uString_new( ppThis );
556cdf0e10cSrcweir         if (pInfo != NULL) {
557cdf0e10cSrcweir             *pInfo = 0;
558cdf0e10cSrcweir         }
559cdf0e10cSrcweir     }
560cdf0e10cSrcweir     else
561cdf0e10cSrcweir     {
562cdf0e10cSrcweir         if ( *ppThis )
563cdf0e10cSrcweir             IMPL_RTL_STRINGNAME( release )( *ppThis );
564cdf0e10cSrcweir 
565cdf0e10cSrcweir         /* Optimization for US-ASCII */
566cdf0e10cSrcweir         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
567cdf0e10cSrcweir         {
568cdf0e10cSrcweir             IMPL_RTL_STRCODE* pBuffer;
569cdf0e10cSrcweir             *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
570cdf0e10cSrcweir             if (*ppThis == NULL) {
571cdf0e10cSrcweir                 if (pInfo != NULL) {
572cdf0e10cSrcweir                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
573cdf0e10cSrcweir                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
574cdf0e10cSrcweir                 }
575cdf0e10cSrcweir                 return;
576cdf0e10cSrcweir             }
577cdf0e10cSrcweir             pBuffer = (*ppThis)->buffer;
578cdf0e10cSrcweir             do
579cdf0e10cSrcweir             {
580cdf0e10cSrcweir                 /* Check ASCII range */
581cdf0e10cSrcweir                 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
582cdf0e10cSrcweir                             "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
583cdf0e10cSrcweir 
584cdf0e10cSrcweir                 *pBuffer = *pStr;
585cdf0e10cSrcweir                 pBuffer++;
586cdf0e10cSrcweir                 pStr++;
587cdf0e10cSrcweir                 nLen--;
588cdf0e10cSrcweir             }
589cdf0e10cSrcweir             while ( nLen );
590cdf0e10cSrcweir             if (pInfo != NULL) {
591cdf0e10cSrcweir                 *pInfo = 0;
592cdf0e10cSrcweir             }
593cdf0e10cSrcweir         }
594cdf0e10cSrcweir         else
595cdf0e10cSrcweir         {
596cdf0e10cSrcweir             rtl_uString*                pTemp;
597cdf0e10cSrcweir             rtl_uString*                pTemp2 = NULL;
598cdf0e10cSrcweir             rtl_TextToUnicodeConverter  hConverter;
599cdf0e10cSrcweir             sal_uInt32                  nInfo;
600cdf0e10cSrcweir             sal_Size                    nSrcBytes;
601cdf0e10cSrcweir             sal_Size                    nDestChars;
602cdf0e10cSrcweir             sal_Size                    nNewLen;
603cdf0e10cSrcweir 
604cdf0e10cSrcweir             /* Optimization for UTF-8 - we try to calculate the exact length */
605cdf0e10cSrcweir             /* For all other encoding we try the maximum - and reallocate
606cdf0e10cSrcweir                the buffer if needed */
607cdf0e10cSrcweir             if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
608cdf0e10cSrcweir             {
609cdf0e10cSrcweir                 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
610cdf0e10cSrcweir                 /* Includes the string only ASCII, then we could copy
611cdf0e10cSrcweir                    the buffer faster */
612cdf0e10cSrcweir                 if ( nNewLen == (sal_Size)nLen )
613cdf0e10cSrcweir                 {
614cdf0e10cSrcweir                     IMPL_RTL_STRCODE* pBuffer;
615cdf0e10cSrcweir                     *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
616cdf0e10cSrcweir                     if (*ppThis == NULL)
617cdf0e10cSrcweir                     {
618cdf0e10cSrcweir                         if (pInfo != NULL) {
619cdf0e10cSrcweir                             *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
620cdf0e10cSrcweir                                 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
621cdf0e10cSrcweir                         }
622cdf0e10cSrcweir                         return;
623cdf0e10cSrcweir                     }
624cdf0e10cSrcweir                     pBuffer = (*ppThis)->buffer;
625cdf0e10cSrcweir                     do
626cdf0e10cSrcweir                     {
627cdf0e10cSrcweir                         /* Check ASCII range */
628cdf0e10cSrcweir                         OSL_ENSURE( ((unsigned char)*pStr) <= 127,
629cdf0e10cSrcweir                                     "rtl_string2UString_status() - UTF8 test encoding is wrong" );
630cdf0e10cSrcweir 
631cdf0e10cSrcweir                         *pBuffer = *pStr;
632cdf0e10cSrcweir                         pBuffer++;
633cdf0e10cSrcweir                         pStr++;
634cdf0e10cSrcweir                         nLen--;
635cdf0e10cSrcweir                     }
636cdf0e10cSrcweir                     while ( nLen );
637cdf0e10cSrcweir                     if (pInfo != NULL) {
638cdf0e10cSrcweir                         *pInfo = 0;
639cdf0e10cSrcweir                     }
640cdf0e10cSrcweir                     return;
641cdf0e10cSrcweir                 }
642cdf0e10cSrcweir             }
643cdf0e10cSrcweir             else
644cdf0e10cSrcweir                 nNewLen = nLen;
645cdf0e10cSrcweir 
646cdf0e10cSrcweir             nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
647cdf0e10cSrcweir             hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
648cdf0e10cSrcweir 
649cdf0e10cSrcweir             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
650cdf0e10cSrcweir             if (pTemp == NULL) {
651cdf0e10cSrcweir                 if (pInfo != NULL) {
652cdf0e10cSrcweir                     *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
653cdf0e10cSrcweir                         RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
654cdf0e10cSrcweir                 }
655cdf0e10cSrcweir                 return;
656cdf0e10cSrcweir             }
657cdf0e10cSrcweir             nDestChars = rtl_convertTextToUnicode( hConverter, 0,
658cdf0e10cSrcweir                                                    pStr, nLen,
659cdf0e10cSrcweir                                                    pTemp->buffer, nNewLen,
660cdf0e10cSrcweir                                                    nCvtFlags,
661cdf0e10cSrcweir                                                    &nInfo, &nSrcBytes );
662cdf0e10cSrcweir 
663cdf0e10cSrcweir             /* Buffer not big enough, try again with enough space */
664cdf0e10cSrcweir             /* Shouldn't be the case, but if we get textencoding which
665cdf0e10cSrcweir                could results in more unicode characters we have this
666cdf0e10cSrcweir                code here. Could be the case for apple encodings */
667cdf0e10cSrcweir             while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
668cdf0e10cSrcweir             {
669cdf0e10cSrcweir                 rtl_freeMemory( pTemp );
670cdf0e10cSrcweir                 nNewLen += 8;
671cdf0e10cSrcweir                 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
672cdf0e10cSrcweir                 if (pTemp == NULL) {
673cdf0e10cSrcweir                     if (pInfo != NULL) {
674cdf0e10cSrcweir                         *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
675cdf0e10cSrcweir                             RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
676cdf0e10cSrcweir                     }
677cdf0e10cSrcweir                     return;
678cdf0e10cSrcweir                 }
679cdf0e10cSrcweir                 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
680cdf0e10cSrcweir                                                        pStr, nLen,
681cdf0e10cSrcweir                                                        pTemp->buffer, nNewLen,
682cdf0e10cSrcweir                                                        nCvtFlags,
683cdf0e10cSrcweir                                                        &nInfo, &nSrcBytes );
684cdf0e10cSrcweir             }
685cdf0e10cSrcweir 
686cdf0e10cSrcweir             if (pInfo)
687cdf0e10cSrcweir                 *pInfo = nInfo;
688cdf0e10cSrcweir 
689cdf0e10cSrcweir             /* Set the buffer to the correct size or if there is too
690cdf0e10cSrcweir                much overhead, reallocate to the correct size */
691cdf0e10cSrcweir             if ( nNewLen > nDestChars+8 )
692cdf0e10cSrcweir             {
693cdf0e10cSrcweir                 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
694cdf0e10cSrcweir             }
695cdf0e10cSrcweir             if (pTemp2 != NULL)
696cdf0e10cSrcweir             {
697cdf0e10cSrcweir                 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
698cdf0e10cSrcweir                 rtl_freeMemory(pTemp);
699cdf0e10cSrcweir                 pTemp = pTemp2;
700cdf0e10cSrcweir             }
701cdf0e10cSrcweir             else
702cdf0e10cSrcweir             {
703cdf0e10cSrcweir                 pTemp->length = nDestChars;
704cdf0e10cSrcweir                 pTemp->buffer[nDestChars] = 0;
705cdf0e10cSrcweir             }
706cdf0e10cSrcweir 
707cdf0e10cSrcweir             rtl_destroyTextToUnicodeConverter( hConverter );
708cdf0e10cSrcweir             *ppThis = pTemp;
709cdf0e10cSrcweir 
710cdf0e10cSrcweir             /* Results the conversion in an empty buffer -
711cdf0e10cSrcweir                create an empty string */
712cdf0e10cSrcweir             if ( pTemp && !nDestChars )
713cdf0e10cSrcweir                 rtl_uString_new( ppThis );
714cdf0e10cSrcweir         }
715cdf0e10cSrcweir     }
716cdf0e10cSrcweir }
717cdf0e10cSrcweir 
rtl_string2UString(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)718cdf0e10cSrcweir void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
719cdf0e10cSrcweir                                   const sal_Char* pStr,
720cdf0e10cSrcweir                                   sal_Int32 nLen,
721cdf0e10cSrcweir                                   rtl_TextEncoding eTextEncoding,
722cdf0e10cSrcweir                                   sal_uInt32 nCvtFlags )
723cdf0e10cSrcweir {
724cdf0e10cSrcweir     rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
725cdf0e10cSrcweir                                nCvtFlags, NULL );
726cdf0e10cSrcweir }
727cdf0e10cSrcweir 
728cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
729cdf0e10cSrcweir 
730cdf0e10cSrcweir typedef enum {
731cdf0e10cSrcweir     CANNOT_RETURN,
732cdf0e10cSrcweir     CAN_RETURN = 1
733cdf0e10cSrcweir } StrLifecycle;
734cdf0e10cSrcweir 
735cdf0e10cSrcweir static oslMutex
getInternMutex()736cdf0e10cSrcweir getInternMutex()
737cdf0e10cSrcweir {
738cdf0e10cSrcweir     static oslMutex pPoolGuard = NULL;
739cdf0e10cSrcweir     if( !pPoolGuard )
740cdf0e10cSrcweir     {
741cdf0e10cSrcweir         oslMutex pGlobalGuard;
742cdf0e10cSrcweir         pGlobalGuard = *osl_getGlobalMutex();
743cdf0e10cSrcweir         osl_acquireMutex( pGlobalGuard );
744cdf0e10cSrcweir         if( !pPoolGuard )
745cdf0e10cSrcweir         {
746cdf0e10cSrcweir             oslMutex p = osl_createMutex();
747cdf0e10cSrcweir             OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
748cdf0e10cSrcweir             pPoolGuard = p;
749cdf0e10cSrcweir         }
750cdf0e10cSrcweir         osl_releaseMutex( pGlobalGuard );
751cdf0e10cSrcweir     }
752cdf0e10cSrcweir     else
753cdf0e10cSrcweir     {
754cdf0e10cSrcweir         OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
755cdf0e10cSrcweir     }
756cdf0e10cSrcweir 
757cdf0e10cSrcweir     return pPoolGuard;
758cdf0e10cSrcweir }
759cdf0e10cSrcweir 
760cdf0e10cSrcweir /* returns true if we found a dup in the pool */
rtl_ustring_intern_internal(rtl_uString ** newStr,rtl_uString * str,StrLifecycle can_return)761cdf0e10cSrcweir static void rtl_ustring_intern_internal( rtl_uString ** newStr,
762cdf0e10cSrcweir                                          rtl_uString  * str,
763cdf0e10cSrcweir                                          StrLifecycle   can_return )
764cdf0e10cSrcweir {
765cdf0e10cSrcweir     oslMutex pPoolMutex;
766cdf0e10cSrcweir 
767cdf0e10cSrcweir     pPoolMutex = getInternMutex();
768cdf0e10cSrcweir 
769cdf0e10cSrcweir     osl_acquireMutex( pPoolMutex );
770cdf0e10cSrcweir 
771cdf0e10cSrcweir     *newStr = rtl_str_hash_intern (str, can_return);
772cdf0e10cSrcweir 
773cdf0e10cSrcweir     osl_releaseMutex( pPoolMutex );
774cdf0e10cSrcweir 
775cdf0e10cSrcweir     if( can_return && *newStr != str )
776cdf0e10cSrcweir     { /* we dupped, then found a match */
777cdf0e10cSrcweir         rtl_freeMemory( str );
778cdf0e10cSrcweir     }
779cdf0e10cSrcweir }
780cdf0e10cSrcweir 
rtl_uString_intern(rtl_uString ** newStr,rtl_uString * str)781cdf0e10cSrcweir void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
782cdf0e10cSrcweir                                   rtl_uString  * str)
783cdf0e10cSrcweir {
784cdf0e10cSrcweir     if (SAL_STRING_IS_INTERN(str))
785cdf0e10cSrcweir     {
786cdf0e10cSrcweir         IMPL_RTL_AQUIRE( str );
787cdf0e10cSrcweir         *newStr = str;
788cdf0e10cSrcweir     }
789cdf0e10cSrcweir     else
790cdf0e10cSrcweir     {
791cdf0e10cSrcweir         rtl_uString *pOrg = *newStr;
792cdf0e10cSrcweir         *newStr = NULL;
793cdf0e10cSrcweir         rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
794cdf0e10cSrcweir         if (pOrg)
795cdf0e10cSrcweir             rtl_uString_release (pOrg);
796cdf0e10cSrcweir     }
797cdf0e10cSrcweir }
798cdf0e10cSrcweir 
rtl_uString_internConvert(rtl_uString ** newStr,const sal_Char * str,sal_Int32 len,rtl_TextEncoding eTextEncoding,sal_uInt32 convertFlags,sal_uInt32 * pInfo)799cdf0e10cSrcweir void SAL_CALL rtl_uString_internConvert( rtl_uString   ** newStr,
800cdf0e10cSrcweir                                          const sal_Char * str,
801cdf0e10cSrcweir                                          sal_Int32        len,
802cdf0e10cSrcweir                                          rtl_TextEncoding eTextEncoding,
803cdf0e10cSrcweir                                          sal_uInt32       convertFlags,
804cdf0e10cSrcweir                                          sal_uInt32     * pInfo )
805cdf0e10cSrcweir {
806cdf0e10cSrcweir     rtl_uString *scratch;
807cdf0e10cSrcweir 
808cdf0e10cSrcweir     if (*newStr)
809cdf0e10cSrcweir     {
810cdf0e10cSrcweir         rtl_uString_release (*newStr);
811cdf0e10cSrcweir         *newStr = NULL;
812cdf0e10cSrcweir     }
813cdf0e10cSrcweir 
814cdf0e10cSrcweir     if ( len < 256 )
815cdf0e10cSrcweir     { // try various optimisations
816cdf0e10cSrcweir         if ( len < 0 )
817cdf0e10cSrcweir             len = strlen( str );
818cdf0e10cSrcweir         if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
819cdf0e10cSrcweir         {
820cdf0e10cSrcweir             int i;
821cdf0e10cSrcweir             rtl_uString *pScratch;
822cdf0e10cSrcweir             pScratch = alloca( sizeof( rtl_uString )
823cdf0e10cSrcweir                                + len * sizeof (IMPL_RTL_STRCODE ) );
824cdf0e10cSrcweir             for (i = 0; i < len; i++)
825cdf0e10cSrcweir             {
826cdf0e10cSrcweir                 /* Check ASCII range */
827cdf0e10cSrcweir                 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
828cdf0e10cSrcweir                             "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
829cdf0e10cSrcweir                 pScratch->buffer[i] = str[i];
830cdf0e10cSrcweir             }
831cdf0e10cSrcweir             pScratch->length = len;
832cdf0e10cSrcweir             rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
833cdf0e10cSrcweir             return;
834cdf0e10cSrcweir         }
835cdf0e10cSrcweir         /* FIXME: we want a nice UTF-8 / alloca shortcut here */
836cdf0e10cSrcweir     }
837cdf0e10cSrcweir 
838cdf0e10cSrcweir     scratch = NULL;
839cdf0e10cSrcweir     rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
840cdf0e10cSrcweir                                pInfo );
841cdf0e10cSrcweir     if (!scratch) {
842cdf0e10cSrcweir         return;
843cdf0e10cSrcweir     }
844cdf0e10cSrcweir     rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
845cdf0e10cSrcweir }
846cdf0e10cSrcweir 
847cdf0e10cSrcweir static void
internRelease(rtl_uString * pThis)848cdf0e10cSrcweir internRelease (rtl_uString *pThis)
849cdf0e10cSrcweir {
850cdf0e10cSrcweir     oslMutex pPoolMutex;
851cdf0e10cSrcweir 
852cdf0e10cSrcweir     rtl_uString *pFree = NULL;
853cdf0e10cSrcweir     if ( SAL_STRING_REFCOUNT(
854cdf0e10cSrcweir              osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
855cdf0e10cSrcweir     {
856cdf0e10cSrcweir         pPoolMutex = getInternMutex();
857cdf0e10cSrcweir         osl_acquireMutex( pPoolMutex );
858cdf0e10cSrcweir 
859cdf0e10cSrcweir         rtl_str_hash_remove (pThis);
860cdf0e10cSrcweir 
861cdf0e10cSrcweir         /* May have been separately acquired */
862cdf0e10cSrcweir         if ( SAL_STRING_REFCOUNT(
863cdf0e10cSrcweir                  osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
864cdf0e10cSrcweir         {
865cdf0e10cSrcweir             /* we got the last ref */
866cdf0e10cSrcweir             pFree = pThis;
867cdf0e10cSrcweir         }
868cdf0e10cSrcweir         else /* very unusual */
869cdf0e10cSrcweir         {
870cdf0e10cSrcweir             internRelease (pThis);
871cdf0e10cSrcweir         }
872cdf0e10cSrcweir 
873cdf0e10cSrcweir         osl_releaseMutex( pPoolMutex );
874cdf0e10cSrcweir     }
875cdf0e10cSrcweir     if (pFree)
876cdf0e10cSrcweir         rtl_freeMemory (pFree);
877cdf0e10cSrcweir }
878cdf0e10cSrcweir 
rtl_uString_iterateCodePoints(rtl_uString const * string,sal_Int32 * indexUtf16,sal_Int32 incrementCodePoints)879cdf0e10cSrcweir sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
880cdf0e10cSrcweir     rtl_uString const * string, sal_Int32 * indexUtf16,
881cdf0e10cSrcweir     sal_Int32 incrementCodePoints)
882cdf0e10cSrcweir {
883cdf0e10cSrcweir     sal_Int32 n;
884cdf0e10cSrcweir     sal_Unicode cu;
885cdf0e10cSrcweir     sal_uInt32 cp;
886cdf0e10cSrcweir     OSL_ASSERT(string != NULL && indexUtf16 != NULL);
887cdf0e10cSrcweir     n = *indexUtf16;
888cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n <= string->length);
889cdf0e10cSrcweir     while (incrementCodePoints < 0) {
890cdf0e10cSrcweir         OSL_ASSERT(n > 0);
891cdf0e10cSrcweir         cu = string->buffer[--n];
892cdf0e10cSrcweir         if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
893cdf0e10cSrcweir             SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
894cdf0e10cSrcweir         {
895cdf0e10cSrcweir             --n;
896cdf0e10cSrcweir         }
897cdf0e10cSrcweir         ++incrementCodePoints;
898cdf0e10cSrcweir     }
899cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n < string->length);
900cdf0e10cSrcweir     cu = string->buffer[n];
901cdf0e10cSrcweir     if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
902cdf0e10cSrcweir         SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
903cdf0e10cSrcweir     {
904cdf0e10cSrcweir         cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
905cdf0e10cSrcweir     } else {
906cdf0e10cSrcweir         cp = cu;
907cdf0e10cSrcweir     }
908cdf0e10cSrcweir     while (incrementCodePoints > 0) {
909cdf0e10cSrcweir         OSL_ASSERT(n < string->length);
910cdf0e10cSrcweir         cu = string->buffer[n++];
911cdf0e10cSrcweir         if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
912cdf0e10cSrcweir             SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
913cdf0e10cSrcweir         {
914cdf0e10cSrcweir             ++n;
915cdf0e10cSrcweir         }
916cdf0e10cSrcweir         --incrementCodePoints;
917cdf0e10cSrcweir     }
918cdf0e10cSrcweir     OSL_ASSERT(n >= 0 && n <= string->length);
919cdf0e10cSrcweir     *indexUtf16 = n;
920cdf0e10cSrcweir     return cp;
921cdf0e10cSrcweir }
922cdf0e10cSrcweir 
rtl_convertStringToUString(rtl_uString ** target,char const * source,sal_Int32 length,rtl_TextEncoding encoding,sal_uInt32 flags)923cdf0e10cSrcweir sal_Bool rtl_convertStringToUString(
924cdf0e10cSrcweir     rtl_uString ** target, char const * source, sal_Int32 length,
925cdf0e10cSrcweir     rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
926cdf0e10cSrcweir {
927cdf0e10cSrcweir     sal_uInt32 info;
928cdf0e10cSrcweir     rtl_string2UString_status(target, source, length, encoding, flags, &info);
929cdf0e10cSrcweir     return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
930cdf0e10cSrcweir }
931