1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25 #endif
26
27 #include <rtl/memory.h>
28 #include <osl/diagnose.h>
29 #include <osl/interlck.h>
30 #include <rtl/alloc.h>
31 #include <osl/mutex.h>
32 #include <osl/doublecheckedlocking.h>
33 #include <rtl/tencinfo.h>
34
35 #include <string.h>
36 #include <sal/alloca.h>
37
38 #include "hash.h"
39 #include "strimp.h"
40 #include "surrogates.h"
41 #include <rtl/ustring.h>
42
43 #include "rtl/math.h"
44 #include "rtl/tencinfo.h"
45
46 /* ======================================================================= */
47
48 /* static data to be referenced by all empty strings
49 * the refCount is predefined to 1 and must never become 0 !
50 */
51 static rtl_uString const aImplEmpty_rtl_uString =
52 {
53 (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */
54 0, /*sal_Int32 length; */
55 { 0 } /*sal_Unicode buffer[1];*/
56 };
57
58 /* ======================================================================= */
59
60 #define IMPL_RTL_STRCODE sal_Unicode
61 #define IMPL_RTL_USTRCODE( c ) (c)
62 #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n
63
64 #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n
65 #define IMPL_RTL_STRINGDATA rtl_uString
66 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString
67 #define IMPL_RTL_INTERN
68 static void internRelease (rtl_uString *pThis);
69
70 /* ======================================================================= */
71
72 /* Include String/UString template code */
73
74 #include "strtmpl.c"
75
rtl_ustr_indexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)76 sal_Int32 rtl_ustr_indexOfAscii_WithLength(
77 sal_Unicode const * str, sal_Int32 len,
78 char const * subStr, sal_Int32 subLen)
79 {
80 if (subLen > 0 && subLen <= len) {
81 sal_Int32 i;
82 for (i = 0; i <= len - subLen; ++i) {
83 if (rtl_ustr_asciil_reverseEquals_WithLength(
84 str + i, subStr, subLen))
85 {
86 return i;
87 }
88 }
89 }
90 return -1;
91 }
92
rtl_ustr_lastIndexOfAscii_WithLength(sal_Unicode const * str,sal_Int32 len,char const * subStr,sal_Int32 subLen)93 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength(
94 sal_Unicode const * str, sal_Int32 len,
95 char const * subStr, sal_Int32 subLen)
96 {
97 if (subLen > 0 && subLen <= len) {
98 sal_Int32 i;
99 for (i = len - subLen; i >= 0; --i) {
100 if (rtl_ustr_asciil_reverseEquals_WithLength(
101 str + i, subStr, subLen))
102 {
103 return i;
104 }
105 }
106 }
107 return -1;
108 }
109
rtl_ustr_valueOfFloat(sal_Unicode * pStr,float f)110 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f)
111 {
112 rtl_uString * pResult = NULL;
113 sal_Int32 nLen;
114 rtl_math_doubleToUString(
115 &pResult, 0, 0, f, rtl_math_StringFormat_G,
116 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
117 0, sal_True);
118 nLen = pResult->length;
119 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT);
120 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
121 rtl_uString_release(pResult);
122 return nLen;
123 }
124
rtl_ustr_valueOfDouble(sal_Unicode * pStr,double d)125 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d)
126 {
127 rtl_uString * pResult = NULL;
128 sal_Int32 nLen;
129 rtl_math_doubleToUString(
130 &pResult, 0, 0, d, rtl_math_StringFormat_G,
131 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
132 0, sal_True);
133 nLen = pResult->length;
134 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE);
135 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode));
136 rtl_uString_release(pResult);
137 return nLen;
138 }
139
rtl_ustr_toFloat(sal_Unicode const * pStr)140 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr)
141 {
142 return (float) rtl_math_uStringToDouble(pStr,
143 pStr + rtl_ustr_getLength(pStr),
144 '.', 0, 0, 0);
145 }
146
rtl_ustr_toDouble(sal_Unicode const * pStr)147 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr)
148 {
149 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.',
150 0, 0, 0);
151 }
152
153 /* ======================================================================= */
154
rtl_ustr_ascii_compare(const sal_Unicode * pStr1,const sal_Char * pStr2)155 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1,
156 const sal_Char* pStr2 )
157 {
158 sal_Int32 nRet;
159 while ( ((nRet = ((sal_Int32)(*pStr1))-
160 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
161 *pStr2 )
162 {
163 pStr1++;
164 pStr2++;
165 }
166
167 return nRet;
168 }
169
170 /* ----------------------------------------------------------------------- */
171
rtl_ustr_ascii_compare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)172 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1,
173 sal_Int32 nStr1Len,
174 const sal_Char* pStr2 )
175 {
176 sal_Int32 nRet = 0;
177 while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)-
178 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) &&
179 nStr1Len && *pStr2 )
180 {
181 pStr1++;
182 pStr2++;
183 nStr1Len--;
184 }
185
186 return nRet;
187 }
188
189 /* ----------------------------------------------------------------------- */
190
rtl_ustr_ascii_shortenedCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)191 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1,
192 sal_Int32 nStr1Len,
193 const sal_Char* pStr2,
194 sal_Int32 nShortenedLength )
195 {
196 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
197 sal_Int32 nRet;
198 while ( (nShortenedLength > 0) &&
199 (pStr1 < pStr1End) && *pStr2 )
200 {
201 /* Check ASCII range */
202 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
203
204 nRet = ((sal_Int32)*pStr1)-
205 ((sal_Int32)(unsigned char)*pStr2);
206 if ( nRet != 0 )
207 return nRet;
208
209 nShortenedLength--;
210 pStr1++;
211 pStr2++;
212 }
213
214 if ( nShortenedLength <= 0 )
215 return 0;
216
217 if ( *pStr2 )
218 {
219 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
220 // first is a substring of the second string => less (negative value)
221 nRet = -1;
222 }
223 else
224 {
225 // greater or equal
226 nRet = pStr1End - pStr1;
227 }
228
229 return nRet;
230 }
231
232 /* ----------------------------------------------------------------------- */
233
rtl_ustr_asciil_reverseCompare_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nStr2Len)234 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1,
235 sal_Int32 nStr1Len,
236 const sal_Char* pStr2,
237 sal_Int32 nStr2Len )
238 {
239 const sal_Unicode* pStr1Run = pStr1+nStr1Len;
240 const sal_Char* pStr2Run = pStr2+nStr2Len;
241 sal_Int32 nRet;
242 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) )
243 {
244 pStr1Run--;
245 pStr2Run--;
246 nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run);
247 if ( nRet )
248 return nRet;
249 }
250
251 return nStr1Len - nStr2Len;
252 }
253
254 /* ----------------------------------------------------------------------- */
255
rtl_ustr_asciil_reverseEquals_WithLength(const sal_Unicode * pStr1,const sal_Char * pStr2,sal_Int32 nStrLen)256 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1,
257 const sal_Char* pStr2,
258 sal_Int32 nStrLen )
259 {
260 const sal_Unicode* pStr1Run = pStr1+nStrLen;
261 const sal_Char* pStr2Run = pStr2+nStrLen;
262 while ( pStr1 < pStr1Run )
263 {
264 pStr1Run--;
265 pStr2Run--;
266 if( *pStr1Run != (sal_Unicode)*pStr2Run )
267 return sal_False;
268 }
269
270 return sal_True;
271 }
272
273 /* ----------------------------------------------------------------------- */
274
rtl_ustr_ascii_compareIgnoreAsciiCase(const sal_Unicode * pStr1,const sal_Char * pStr2)275 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1,
276 const sal_Char* pStr2 )
277 {
278 sal_Int32 nRet;
279 sal_Int32 c1;
280 sal_Int32 c2;
281 do
282 {
283 /* If character between 'A' and 'Z', than convert it to lowercase */
284 c1 = (sal_Int32)*pStr1;
285 c2 = (sal_Int32)((unsigned char)*pStr2);
286 if ( (c1 >= 65) && (c1 <= 90) )
287 c1 += 32;
288 if ( (c2 >= 65) && (c2 <= 90) )
289 c2 += 32;
290 nRet = c1-c2;
291 if ( nRet != 0 )
292 return nRet;
293
294 pStr1++;
295 pStr2++;
296 }
297 while ( c2 );
298
299 return 0;
300 }
301
302 /* ----------------------------------------------------------------------- */
303
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2)304 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
305 sal_Int32 nStr1Len,
306 const sal_Char* pStr2 )
307 {
308 sal_Int32 nRet;
309 sal_Int32 c1;
310 sal_Int32 c2;
311 do
312 {
313 if ( !nStr1Len )
314 return *pStr2 == '\0' ? 0 : -1;
315
316 /* If character between 'A' and 'Z', than convert it to lowercase */
317 c1 = (sal_Int32)*pStr1;
318 c2 = (sal_Int32)((unsigned char)*pStr2);
319 if ( (c1 >= 65) && (c1 <= 90) )
320 c1 += 32;
321 if ( (c2 >= 65) && (c2 <= 90) )
322 c2 += 32;
323 nRet = c1-c2;
324 if ( nRet != 0 )
325 return nRet;
326
327 pStr1++;
328 pStr2++;
329 nStr1Len--;
330 }
331 while( c2 );
332
333 return 0;
334 }
335
rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(sal_Unicode const * first,sal_Int32 firstLen,char const * second,sal_Int32 secondLen)336 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
337 sal_Unicode const * first, sal_Int32 firstLen,
338 char const * second, sal_Int32 secondLen)
339 {
340 sal_Int32 i;
341 sal_Int32 len = firstLen < secondLen ? firstLen : secondLen;
342 for (i = 0; i < len; ++i) {
343 sal_Int32 c1 = *first++;
344 sal_Int32 c2 = (unsigned char) *second++;
345 sal_Int32 d;
346 if (c1 >= 65 && c1 <= 90) {
347 c1 += 32;
348 }
349 if (c2 >= 65 && c2 <= 90) {
350 c2 += 32;
351 }
352 d = c1 - c2;
353 if (d != 0) {
354 return d;
355 }
356 }
357 return firstLen - secondLen;
358 }
359
360 /* ----------------------------------------------------------------------- */
361
rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength(const sal_Unicode * pStr1,sal_Int32 nStr1Len,const sal_Char * pStr2,sal_Int32 nShortenedLength)362 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1,
363 sal_Int32 nStr1Len,
364 const sal_Char* pStr2,
365 sal_Int32 nShortenedLength )
366 {
367 const sal_Unicode* pStr1End = pStr1 + nStr1Len;
368 sal_Int32 nRet;
369 sal_Int32 c1;
370 sal_Int32 c2;
371 while ( (nShortenedLength > 0) &&
372 (pStr1 < pStr1End) && *pStr2 )
373 {
374 /* Check ASCII range */
375 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127");
376
377 /* If character between 'A' and 'Z', than convert it to lowercase */
378 c1 = (sal_Int32)*pStr1;
379 c2 = (sal_Int32)((unsigned char)*pStr2);
380 if ( (c1 >= 65) && (c1 <= 90) )
381 c1 += 32;
382 if ( (c2 >= 65) && (c2 <= 90) )
383 c2 += 32;
384 nRet = c1-c2;
385 if ( nRet != 0 )
386 return nRet;
387
388 nShortenedLength--;
389 pStr1++;
390 pStr2++;
391 }
392
393 if ( nShortenedLength <= 0 )
394 return 0;
395
396 if ( *pStr2 )
397 {
398 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" );
399 // first is a substring of the second string => less (negative value)
400 nRet = -1;
401 }
402 else
403 {
404 // greater or equal
405 nRet = pStr1End - pStr1;
406 }
407
408 return nRet;
409 }
410
411 /* ----------------------------------------------------------------------- */
412
rtl_uString_newFromAscii(rtl_uString ** ppThis,const sal_Char * pCharStr)413 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis,
414 const sal_Char* pCharStr )
415 {
416 sal_Int32 nLen;
417
418 if ( pCharStr )
419 {
420 const sal_Char* pTempStr = pCharStr;
421 while( *pTempStr )
422 pTempStr++;
423 nLen = pTempStr-pCharStr;
424 }
425 else
426 nLen = 0;
427
428 if ( !nLen )
429 {
430 IMPL_RTL_STRINGNAME( new )( ppThis );
431 return;
432 }
433
434 if ( *ppThis )
435 IMPL_RTL_STRINGNAME( release )( *ppThis );
436
437 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
438 OSL_ASSERT(*ppThis != NULL);
439 if ( (*ppThis) )
440 {
441 IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer;
442 do
443 {
444 /* Check ASCII range */
445 OSL_ENSURE( ((unsigned char)*pCharStr) <= 127,
446 "rtl_uString_newFromAscii() - Found ASCII char > 127" );
447
448 *pBuffer = *pCharStr;
449 pBuffer++;
450 pCharStr++;
451 }
452 while ( *pCharStr );
453 }
454 }
455
rtl_uString_newFromCodePoints(rtl_uString ** newString,sal_uInt32 const * codePoints,sal_Int32 codePointCount)456 void SAL_CALL rtl_uString_newFromCodePoints(
457 rtl_uString ** newString, sal_uInt32 const * codePoints,
458 sal_Int32 codePointCount)
459 {
460 sal_Int32 n;
461 sal_Int32 i;
462 sal_Unicode * p;
463 OSL_ASSERT(
464 newString != NULL &&
465 (codePoints != NULL || codePointCount == 0) &&
466 codePointCount >= 0);
467 if (codePointCount == 0) {
468 rtl_uString_new(newString);
469 return;
470 }
471 if (*newString != NULL) {
472 rtl_uString_release(*newString);
473 }
474 n = codePointCount;
475 for (i = 0; i < codePointCount; ++i) {
476 OSL_ASSERT(codePoints[i] <= 0x10FFFF);
477 if (codePoints[i] >= 0x10000) {
478 ++n;
479 }
480 }
481 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement
482 representation with wrap around (the necessary number of UTF-16 code
483 units will be no larger than 2 * SAL_MAX_INT32, represented as
484 sal_Int32 -2): */
485 if (n < 0) {
486 *newString = NULL;
487 return;
488 }
489 *newString = rtl_uString_ImplAlloc(n);
490 if (*newString == NULL) {
491 return;
492 }
493 p = (*newString)->buffer;
494 for (i = 0; i < codePointCount; ++i) {
495 sal_uInt32 c = codePoints[i];
496 if (c < 0x10000) {
497 *p++ = (sal_Unicode) c;
498 } else {
499 c -= 0x10000;
500 *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
501 *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
502 }
503 }
504 }
505
506 /* ======================================================================= */
507
rtl_ImplGetFastUTF8UnicodeLen(const sal_Char * pStr,sal_Int32 nLen)508 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen )
509 {
510 int n;
511 sal_uChar c;
512 const sal_Char* pEndStr;
513
514 n = 0;
515 pEndStr = pStr+nLen;
516 while ( pStr < pEndStr )
517 {
518 c = (sal_uChar)*pStr;
519
520 if ( !(c & 0x80) )
521 pStr++;
522 else if ( (c & 0xE0) == 0xC0 )
523 pStr += 2;
524 else if ( (c & 0xF0) == 0xE0 )
525 pStr += 3;
526 else if ( (c & 0xF8) == 0xF0 )
527 pStr += 4;
528 else if ( (c & 0xFC) == 0xF8 )
529 pStr += 5;
530 else if ( (c & 0xFE) == 0xFC )
531 pStr += 6;
532 else
533 pStr++;
534
535 n++;
536 }
537
538 return n;
539 }
540
541 /* ----------------------------------------------------------------------- */
542
rtl_string2UString_status(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags,sal_uInt32 * pInfo)543 static void rtl_string2UString_status( rtl_uString** ppThis,
544 const sal_Char* pStr,
545 sal_Int32 nLen,
546 rtl_TextEncoding eTextEncoding,
547 sal_uInt32 nCvtFlags,
548 sal_uInt32 *pInfo )
549 {
550 OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
551 "rtl_string2UString_status() - Wrong TextEncoding" );
552
553 if ( !nLen )
554 {
555 rtl_uString_new( ppThis );
556 if (pInfo != NULL) {
557 *pInfo = 0;
558 }
559 }
560 else
561 {
562 if ( *ppThis )
563 IMPL_RTL_STRINGNAME( release )( *ppThis );
564
565 /* Optimization for US-ASCII */
566 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
567 {
568 IMPL_RTL_STRCODE* pBuffer;
569 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
570 if (*ppThis == NULL) {
571 if (pInfo != NULL) {
572 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
573 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
574 }
575 return;
576 }
577 pBuffer = (*ppThis)->buffer;
578 do
579 {
580 /* Check ASCII range */
581 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
582 "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
583
584 *pBuffer = *pStr;
585 pBuffer++;
586 pStr++;
587 nLen--;
588 }
589 while ( nLen );
590 if (pInfo != NULL) {
591 *pInfo = 0;
592 }
593 }
594 else
595 {
596 rtl_uString* pTemp;
597 rtl_uString* pTemp2 = NULL;
598 rtl_TextToUnicodeConverter hConverter;
599 sal_uInt32 nInfo;
600 sal_Size nSrcBytes;
601 sal_Size nDestChars;
602 sal_Size nNewLen;
603
604 /* Optimization for UTF-8 - we try to calculate the exact length */
605 /* For all other encoding we try the maximum - and reallocate
606 the buffer if needed */
607 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 )
608 {
609 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen );
610 /* Includes the string only ASCII, then we could copy
611 the buffer faster */
612 if ( nNewLen == (sal_Size)nLen )
613 {
614 IMPL_RTL_STRCODE* pBuffer;
615 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen );
616 if (*ppThis == NULL)
617 {
618 if (pInfo != NULL) {
619 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
620 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
621 }
622 return;
623 }
624 pBuffer = (*ppThis)->buffer;
625 do
626 {
627 /* Check ASCII range */
628 OSL_ENSURE( ((unsigned char)*pStr) <= 127,
629 "rtl_string2UString_status() - UTF8 test encoding is wrong" );
630
631 *pBuffer = *pStr;
632 pBuffer++;
633 pStr++;
634 nLen--;
635 }
636 while ( nLen );
637 if (pInfo != NULL) {
638 *pInfo = 0;
639 }
640 return;
641 }
642 }
643 else
644 nNewLen = nLen;
645
646 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH;
647 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
648
649 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
650 if (pTemp == NULL) {
651 if (pInfo != NULL) {
652 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
653 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
654 }
655 return;
656 }
657 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
658 pStr, nLen,
659 pTemp->buffer, nNewLen,
660 nCvtFlags,
661 &nInfo, &nSrcBytes );
662
663 /* Buffer not big enough, try again with enough space */
664 /* Shouldn't be the case, but if we get textencoding which
665 could results in more unicode characters we have this
666 code here. Could be the case for apple encodings */
667 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL )
668 {
669 rtl_freeMemory( pTemp );
670 nNewLen += 8;
671 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
672 if (pTemp == NULL) {
673 if (pInfo != NULL) {
674 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR |
675 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
676 }
677 return;
678 }
679 nDestChars = rtl_convertTextToUnicode( hConverter, 0,
680 pStr, nLen,
681 pTemp->buffer, nNewLen,
682 nCvtFlags,
683 &nInfo, &nSrcBytes );
684 }
685
686 if (pInfo)
687 *pInfo = nInfo;
688
689 /* Set the buffer to the correct size or if there is too
690 much overhead, reallocate to the correct size */
691 if ( nNewLen > nDestChars+8 )
692 {
693 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars );
694 }
695 if (pTemp2 != NULL)
696 {
697 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars);
698 rtl_freeMemory(pTemp);
699 pTemp = pTemp2;
700 }
701 else
702 {
703 pTemp->length = nDestChars;
704 pTemp->buffer[nDestChars] = 0;
705 }
706
707 rtl_destroyTextToUnicodeConverter( hConverter );
708 *ppThis = pTemp;
709
710 /* Results the conversion in an empty buffer -
711 create an empty string */
712 if ( pTemp && !nDestChars )
713 rtl_uString_new( ppThis );
714 }
715 }
716 }
717
rtl_string2UString(rtl_uString ** ppThis,const sal_Char * pStr,sal_Int32 nLen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)718 void SAL_CALL rtl_string2UString( rtl_uString** ppThis,
719 const sal_Char* pStr,
720 sal_Int32 nLen,
721 rtl_TextEncoding eTextEncoding,
722 sal_uInt32 nCvtFlags )
723 {
724 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding,
725 nCvtFlags, NULL );
726 }
727
728 /* ----------------------------------------------------------------------- */
729
730 typedef enum {
731 CANNOT_RETURN,
732 CAN_RETURN = 1
733 } StrLifecycle;
734
735 static oslMutex
getInternMutex()736 getInternMutex()
737 {
738 static oslMutex pPoolGuard = NULL;
739 if( !pPoolGuard )
740 {
741 oslMutex pGlobalGuard;
742 pGlobalGuard = *osl_getGlobalMutex();
743 osl_acquireMutex( pGlobalGuard );
744 if( !pPoolGuard )
745 {
746 oslMutex p = osl_createMutex();
747 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
748 pPoolGuard = p;
749 }
750 osl_releaseMutex( pGlobalGuard );
751 }
752 else
753 {
754 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER();
755 }
756
757 return pPoolGuard;
758 }
759
760 /* returns true if we found a dup in the pool */
rtl_ustring_intern_internal(rtl_uString ** newStr,rtl_uString * str,StrLifecycle can_return)761 static void rtl_ustring_intern_internal( rtl_uString ** newStr,
762 rtl_uString * str,
763 StrLifecycle can_return )
764 {
765 oslMutex pPoolMutex;
766
767 pPoolMutex = getInternMutex();
768
769 osl_acquireMutex( pPoolMutex );
770
771 *newStr = rtl_str_hash_intern (str, can_return);
772
773 osl_releaseMutex( pPoolMutex );
774
775 if( can_return && *newStr != str )
776 { /* we dupped, then found a match */
777 rtl_freeMemory( str );
778 }
779 }
780
rtl_uString_intern(rtl_uString ** newStr,rtl_uString * str)781 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
782 rtl_uString * str)
783 {
784 if (SAL_STRING_IS_INTERN(str))
785 {
786 IMPL_RTL_AQUIRE( str );
787 *newStr = str;
788 }
789 else
790 {
791 rtl_uString *pOrg = *newStr;
792 *newStr = NULL;
793 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN );
794 if (pOrg)
795 rtl_uString_release (pOrg);
796 }
797 }
798
rtl_uString_internConvert(rtl_uString ** newStr,const sal_Char * str,sal_Int32 len,rtl_TextEncoding eTextEncoding,sal_uInt32 convertFlags,sal_uInt32 * pInfo)799 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
800 const sal_Char * str,
801 sal_Int32 len,
802 rtl_TextEncoding eTextEncoding,
803 sal_uInt32 convertFlags,
804 sal_uInt32 * pInfo )
805 {
806 rtl_uString *scratch;
807
808 if (*newStr)
809 {
810 rtl_uString_release (*newStr);
811 *newStr = NULL;
812 }
813
814 if ( len < 256 )
815 { // try various optimisations
816 if ( len < 0 )
817 len = strlen( str );
818 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
819 {
820 int i;
821 rtl_uString *pScratch;
822 pScratch = alloca( sizeof( rtl_uString )
823 + len * sizeof (IMPL_RTL_STRCODE ) );
824 for (i = 0; i < len; i++)
825 {
826 /* Check ASCII range */
827 OSL_ENSURE( ((unsigned char)str[i]) <= 127,
828 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" );
829 pScratch->buffer[i] = str[i];
830 }
831 pScratch->length = len;
832 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
833 return;
834 }
835 /* FIXME: we want a nice UTF-8 / alloca shortcut here */
836 }
837
838 scratch = NULL;
839 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags,
840 pInfo );
841 if (!scratch) {
842 return;
843 }
844 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN );
845 }
846
847 static void
internRelease(rtl_uString * pThis)848 internRelease (rtl_uString *pThis)
849 {
850 oslMutex pPoolMutex;
851
852 rtl_uString *pFree = NULL;
853 if ( SAL_STRING_REFCOUNT(
854 osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0)
855 {
856 pPoolMutex = getInternMutex();
857 osl_acquireMutex( pPoolMutex );
858
859 rtl_str_hash_remove (pThis);
860
861 /* May have been separately acquired */
862 if ( SAL_STRING_REFCOUNT(
863 osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 )
864 {
865 /* we got the last ref */
866 pFree = pThis;
867 }
868 else /* very unusual */
869 {
870 internRelease (pThis);
871 }
872
873 osl_releaseMutex( pPoolMutex );
874 }
875 if (pFree)
876 rtl_freeMemory (pFree);
877 }
878
rtl_uString_iterateCodePoints(rtl_uString const * string,sal_Int32 * indexUtf16,sal_Int32 incrementCodePoints)879 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
880 rtl_uString const * string, sal_Int32 * indexUtf16,
881 sal_Int32 incrementCodePoints)
882 {
883 sal_Int32 n;
884 sal_Unicode cu;
885 sal_uInt32 cp;
886 OSL_ASSERT(string != NULL && indexUtf16 != NULL);
887 n = *indexUtf16;
888 OSL_ASSERT(n >= 0 && n <= string->length);
889 while (incrementCodePoints < 0) {
890 OSL_ASSERT(n > 0);
891 cu = string->buffer[--n];
892 if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 &&
893 SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1]))
894 {
895 --n;
896 }
897 ++incrementCodePoints;
898 }
899 OSL_ASSERT(n >= 0 && n < string->length);
900 cu = string->buffer[n];
901 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 &&
902 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1]))
903 {
904 cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]);
905 } else {
906 cp = cu;
907 }
908 while (incrementCodePoints > 0) {
909 OSL_ASSERT(n < string->length);
910 cu = string->buffer[n++];
911 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length &&
912 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n]))
913 {
914 ++n;
915 }
916 --incrementCodePoints;
917 }
918 OSL_ASSERT(n >= 0 && n <= string->length);
919 *indexUtf16 = n;
920 return cp;
921 }
922
rtl_convertStringToUString(rtl_uString ** target,char const * source,sal_Int32 length,rtl_TextEncoding encoding,sal_uInt32 flags)923 sal_Bool rtl_convertStringToUString(
924 rtl_uString ** target, char const * source, sal_Int32 length,
925 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C()
926 {
927 sal_uInt32 info;
928 rtl_string2UString_status(target, source, length, encoding, flags, &info);
929 return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
930 }
931