1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 #if defined(_MSC_VER) && (_MSC_VER >= 1400) 28 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 29 #endif 30 31 #include <rtl/memory.h> 32 #include <osl/diagnose.h> 33 #include <osl/interlck.h> 34 #include <rtl/alloc.h> 35 #include <osl/mutex.h> 36 #include <osl/doublecheckedlocking.h> 37 #include <rtl/tencinfo.h> 38 39 #include <string.h> 40 #include <sal/alloca.h> 41 42 #include "hash.h" 43 #include "strimp.h" 44 #include "surrogates.h" 45 #include <rtl/ustring.h> 46 47 #include "rtl/math.h" 48 #include "rtl/tencinfo.h" 49 50 /* ======================================================================= */ 51 52 /* static data to be referenced by all empty strings 53 * the refCount is predefined to 1 and must never become 0 ! 54 */ 55 static rtl_uString const aImplEmpty_rtl_uString = 56 { 57 (sal_Int32) (SAL_STRING_INTERN_FLAG|SAL_STRING_STATIC_FLAG|1), /*sal_Int32 refCount; */ 58 0, /*sal_Int32 length; */ 59 { 0 } /*sal_Unicode buffer[1];*/ 60 }; 61 62 /* ======================================================================= */ 63 64 #define IMPL_RTL_STRCODE sal_Unicode 65 #define IMPL_RTL_USTRCODE( c ) (c) 66 #define IMPL_RTL_STRNAME( n ) rtl_ustr_ ## n 67 68 #define IMPL_RTL_STRINGNAME( n ) rtl_uString_ ## n 69 #define IMPL_RTL_STRINGDATA rtl_uString 70 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_uString 71 #define IMPL_RTL_INTERN 72 static void internRelease (rtl_uString *pThis); 73 74 /* ======================================================================= */ 75 76 /* Include String/UString template code */ 77 78 #include "strtmpl.c" 79 80 sal_Int32 rtl_ustr_indexOfAscii_WithLength( 81 sal_Unicode const * str, sal_Int32 len, 82 char const * subStr, sal_Int32 subLen) 83 { 84 if (subLen > 0 && subLen <= len) { 85 sal_Int32 i; 86 for (i = 0; i <= len - subLen; ++i) { 87 if (rtl_ustr_asciil_reverseEquals_WithLength( 88 str + i, subStr, subLen)) 89 { 90 return i; 91 } 92 } 93 } 94 return -1; 95 } 96 97 sal_Int32 rtl_ustr_lastIndexOfAscii_WithLength( 98 sal_Unicode const * str, sal_Int32 len, 99 char const * subStr, sal_Int32 subLen) 100 { 101 if (subLen > 0 && subLen <= len) { 102 sal_Int32 i; 103 for (i = len - subLen; i >= 0; --i) { 104 if (rtl_ustr_asciil_reverseEquals_WithLength( 105 str + i, subStr, subLen)) 106 { 107 return i; 108 } 109 } 110 } 111 return -1; 112 } 113 114 sal_Int32 SAL_CALL rtl_ustr_valueOfFloat(sal_Unicode * pStr, float f) 115 { 116 rtl_uString * pResult = NULL; 117 sal_Int32 nLen; 118 rtl_math_doubleToUString( 119 &pResult, 0, 0, f, rtl_math_StringFormat_G, 120 RTL_USTR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 121 0, sal_True); 122 nLen = pResult->length; 123 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFFLOAT); 124 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 125 rtl_uString_release(pResult); 126 return nLen; 127 } 128 129 sal_Int32 SAL_CALL rtl_ustr_valueOfDouble(sal_Unicode * pStr, double d) 130 { 131 rtl_uString * pResult = NULL; 132 sal_Int32 nLen; 133 rtl_math_doubleToUString( 134 &pResult, 0, 0, d, rtl_math_StringFormat_G, 135 RTL_USTR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 136 0, sal_True); 137 nLen = pResult->length; 138 OSL_ASSERT(nLen < RTL_USTR_MAX_VALUEOFDOUBLE); 139 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Unicode)); 140 rtl_uString_release(pResult); 141 return nLen; 142 } 143 144 float SAL_CALL rtl_ustr_toFloat(sal_Unicode const * pStr) 145 { 146 return (float) rtl_math_uStringToDouble(pStr, 147 pStr + rtl_ustr_getLength(pStr), 148 '.', 0, 0, 0); 149 } 150 151 double SAL_CALL rtl_ustr_toDouble(sal_Unicode const * pStr) 152 { 153 return rtl_math_uStringToDouble(pStr, pStr + rtl_ustr_getLength(pStr), '.', 154 0, 0, 0); 155 } 156 157 /* ======================================================================= */ 158 159 sal_Int32 SAL_CALL rtl_ustr_ascii_compare( const sal_Unicode* pStr1, 160 const sal_Char* pStr2 ) 161 { 162 sal_Int32 nRet; 163 while ( ((nRet = ((sal_Int32)(*pStr1))- 164 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 165 *pStr2 ) 166 { 167 pStr1++; 168 pStr2++; 169 } 170 171 return nRet; 172 } 173 174 /* ----------------------------------------------------------------------- */ 175 176 sal_Int32 SAL_CALL rtl_ustr_ascii_compare_WithLength( const sal_Unicode* pStr1, 177 sal_Int32 nStr1Len, 178 const sal_Char* pStr2 ) 179 { 180 sal_Int32 nRet = 0; 181 while( ((nRet = (nStr1Len ? (sal_Int32)(*pStr1) : 0)- 182 ((sal_Int32)((unsigned char)(*pStr2)))) == 0) && 183 nStr1Len && *pStr2 ) 184 { 185 pStr1++; 186 pStr2++; 187 nStr1Len--; 188 } 189 190 return nRet; 191 } 192 193 /* ----------------------------------------------------------------------- */ 194 195 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompare_WithLength( const sal_Unicode* pStr1, 196 sal_Int32 nStr1Len, 197 const sal_Char* pStr2, 198 sal_Int32 nShortenedLength ) 199 { 200 const sal_Unicode* pStr1End = pStr1 + nStr1Len; 201 sal_Int32 nRet; 202 while ( (nShortenedLength > 0) && 203 (pStr1 < pStr1End) && *pStr2 ) 204 { 205 /* Check ASCII range */ 206 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 207 208 nRet = ((sal_Int32)*pStr1)- 209 ((sal_Int32)(unsigned char)*pStr2); 210 if ( nRet != 0 ) 211 return nRet; 212 213 nShortenedLength--; 214 pStr1++; 215 pStr2++; 216 } 217 218 if ( nShortenedLength <= 0 ) 219 return 0; 220 221 if ( *pStr2 ) 222 { 223 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 224 // first is a substring of the second string => less (negative value) 225 nRet = -1; 226 } 227 else 228 { 229 // greater or equal 230 nRet = pStr1End - pStr1; 231 } 232 233 return nRet; 234 } 235 236 /* ----------------------------------------------------------------------- */ 237 238 sal_Int32 SAL_CALL rtl_ustr_asciil_reverseCompare_WithLength( const sal_Unicode* pStr1, 239 sal_Int32 nStr1Len, 240 const sal_Char* pStr2, 241 sal_Int32 nStr2Len ) 242 { 243 const sal_Unicode* pStr1Run = pStr1+nStr1Len; 244 const sal_Char* pStr2Run = pStr2+nStr2Len; 245 sal_Int32 nRet; 246 while ( (pStr1 < pStr1Run) && (pStr2 < pStr2Run) ) 247 { 248 pStr1Run--; 249 pStr2Run--; 250 nRet = ((sal_Int32)*pStr1Run)-((sal_Int32)*pStr2Run); 251 if ( nRet ) 252 return nRet; 253 } 254 255 return nStr1Len - nStr2Len; 256 } 257 258 /* ----------------------------------------------------------------------- */ 259 260 sal_Bool SAL_CALL rtl_ustr_asciil_reverseEquals_WithLength( const sal_Unicode* pStr1, 261 const sal_Char* pStr2, 262 sal_Int32 nStrLen ) 263 { 264 const sal_Unicode* pStr1Run = pStr1+nStrLen; 265 const sal_Char* pStr2Run = pStr2+nStrLen; 266 while ( pStr1 < pStr1Run ) 267 { 268 pStr1Run--; 269 pStr2Run--; 270 if( *pStr1Run != (sal_Unicode)*pStr2Run ) 271 return sal_False; 272 } 273 274 return sal_True; 275 } 276 277 /* ----------------------------------------------------------------------- */ 278 279 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase( const sal_Unicode* pStr1, 280 const sal_Char* pStr2 ) 281 { 282 sal_Int32 nRet; 283 sal_Int32 c1; 284 sal_Int32 c2; 285 do 286 { 287 /* If character between 'A' and 'Z', than convert it to lowercase */ 288 c1 = (sal_Int32)*pStr1; 289 c2 = (sal_Int32)((unsigned char)*pStr2); 290 if ( (c1 >= 65) && (c1 <= 90) ) 291 c1 += 32; 292 if ( (c2 >= 65) && (c2 <= 90) ) 293 c2 += 32; 294 nRet = c1-c2; 295 if ( nRet != 0 ) 296 return nRet; 297 298 pStr1++; 299 pStr2++; 300 } 301 while ( c2 ); 302 303 return 0; 304 } 305 306 /* ----------------------------------------------------------------------- */ 307 308 sal_Int32 SAL_CALL rtl_ustr_ascii_compareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 309 sal_Int32 nStr1Len, 310 const sal_Char* pStr2 ) 311 { 312 sal_Int32 nRet; 313 sal_Int32 c1; 314 sal_Int32 c2; 315 do 316 { 317 if ( !nStr1Len ) 318 return *pStr2 == '\0' ? 0 : -1; 319 320 /* If character between 'A' and 'Z', than convert it to lowercase */ 321 c1 = (sal_Int32)*pStr1; 322 c2 = (sal_Int32)((unsigned char)*pStr2); 323 if ( (c1 >= 65) && (c1 <= 90) ) 324 c1 += 32; 325 if ( (c2 >= 65) && (c2 <= 90) ) 326 c2 += 32; 327 nRet = c1-c2; 328 if ( nRet != 0 ) 329 return nRet; 330 331 pStr1++; 332 pStr2++; 333 nStr1Len--; 334 } 335 while( c2 ); 336 337 return 0; 338 } 339 340 sal_Int32 rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths( 341 sal_Unicode const * first, sal_Int32 firstLen, 342 char const * second, sal_Int32 secondLen) 343 { 344 sal_Int32 i; 345 sal_Int32 len = firstLen < secondLen ? firstLen : secondLen; 346 for (i = 0; i < len; ++i) { 347 sal_Int32 c1 = *first++; 348 sal_Int32 c2 = (unsigned char) *second++; 349 sal_Int32 d; 350 if (c1 >= 65 && c1 <= 90) { 351 c1 += 32; 352 } 353 if (c2 >= 65 && c2 <= 90) { 354 c2 += 32; 355 } 356 d = c1 - c2; 357 if (d != 0) { 358 return d; 359 } 360 } 361 return firstLen - secondLen; 362 } 363 364 /* ----------------------------------------------------------------------- */ 365 366 sal_Int32 SAL_CALL rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( const sal_Unicode* pStr1, 367 sal_Int32 nStr1Len, 368 const sal_Char* pStr2, 369 sal_Int32 nShortenedLength ) 370 { 371 const sal_Unicode* pStr1End = pStr1 + nStr1Len; 372 sal_Int32 nRet; 373 sal_Int32 c1; 374 sal_Int32 c2; 375 while ( (nShortenedLength > 0) && 376 (pStr1 < pStr1End) && *pStr2 ) 377 { 378 /* Check ASCII range */ 379 OSL_ENSURE( (*pStr2 & 0x80) == 0, "Found ASCII char > 127"); 380 381 /* If character between 'A' and 'Z', than convert it to lowercase */ 382 c1 = (sal_Int32)*pStr1; 383 c2 = (sal_Int32)((unsigned char)*pStr2); 384 if ( (c1 >= 65) && (c1 <= 90) ) 385 c1 += 32; 386 if ( (c2 >= 65) && (c2 <= 90) ) 387 c2 += 32; 388 nRet = c1-c2; 389 if ( nRet != 0 ) 390 return nRet; 391 392 nShortenedLength--; 393 pStr1++; 394 pStr2++; 395 } 396 397 if ( nShortenedLength <= 0 ) 398 return 0; 399 400 if ( *pStr2 ) 401 { 402 OSL_ENSURE( pStr1 == pStr1End, "pStr1 == pStr1End failed" ); 403 // first is a substring of the second string => less (negative value) 404 nRet = -1; 405 } 406 else 407 { 408 // greater or equal 409 nRet = pStr1End - pStr1; 410 } 411 412 return nRet; 413 } 414 415 /* ----------------------------------------------------------------------- */ 416 417 void SAL_CALL rtl_uString_newFromAscii( rtl_uString** ppThis, 418 const sal_Char* pCharStr ) 419 { 420 sal_Int32 nLen; 421 422 if ( pCharStr ) 423 { 424 const sal_Char* pTempStr = pCharStr; 425 while( *pTempStr ) 426 pTempStr++; 427 nLen = pTempStr-pCharStr; 428 } 429 else 430 nLen = 0; 431 432 if ( !nLen ) 433 { 434 IMPL_RTL_STRINGNAME( new )( ppThis ); 435 return; 436 } 437 438 if ( *ppThis ) 439 IMPL_RTL_STRINGNAME( release )( *ppThis ); 440 441 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 442 OSL_ASSERT(*ppThis != NULL); 443 if ( (*ppThis) ) 444 { 445 IMPL_RTL_STRCODE* pBuffer = (*ppThis)->buffer; 446 do 447 { 448 /* Check ASCII range */ 449 OSL_ENSURE( ((unsigned char)*pCharStr) <= 127, 450 "rtl_uString_newFromAscii() - Found ASCII char > 127" ); 451 452 *pBuffer = *pCharStr; 453 pBuffer++; 454 pCharStr++; 455 } 456 while ( *pCharStr ); 457 } 458 } 459 460 void SAL_CALL rtl_uString_newFromCodePoints( 461 rtl_uString ** newString, sal_uInt32 const * codePoints, 462 sal_Int32 codePointCount) 463 { 464 sal_Int32 n; 465 sal_Int32 i; 466 sal_Unicode * p; 467 OSL_ASSERT( 468 newString != NULL && 469 (codePoints != NULL || codePointCount == 0) && 470 codePointCount >= 0); 471 if (codePointCount == 0) { 472 rtl_uString_new(newString); 473 return; 474 } 475 if (*newString != NULL) { 476 rtl_uString_release(*newString); 477 } 478 n = codePointCount; 479 for (i = 0; i < codePointCount; ++i) { 480 OSL_ASSERT(codePoints[i] <= 0x10FFFF); 481 if (codePoints[i] >= 0x10000) { 482 ++n; 483 } 484 } 485 /* Builds on the assumption that sal_Int32 uses 32 bit two's complement 486 representation with wrap around (the necessary number of UTF-16 code 487 units will be no larger than 2 * SAL_MAX_INT32, represented as 488 sal_Int32 -2): */ 489 if (n < 0) { 490 *newString = NULL; 491 return; 492 } 493 *newString = rtl_uString_ImplAlloc(n); 494 if (*newString == NULL) { 495 return; 496 } 497 p = (*newString)->buffer; 498 for (i = 0; i < codePointCount; ++i) { 499 sal_uInt32 c = codePoints[i]; 500 if (c < 0x10000) { 501 *p++ = (sal_Unicode) c; 502 } else { 503 c -= 0x10000; 504 *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE); 505 *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE); 506 } 507 } 508 } 509 510 /* ======================================================================= */ 511 512 static int rtl_ImplGetFastUTF8UnicodeLen( const sal_Char* pStr, sal_Int32 nLen ) 513 { 514 int n; 515 sal_uChar c; 516 const sal_Char* pEndStr; 517 518 n = 0; 519 pEndStr = pStr+nLen; 520 while ( pStr < pEndStr ) 521 { 522 c = (sal_uChar)*pStr; 523 524 if ( !(c & 0x80) ) 525 pStr++; 526 else if ( (c & 0xE0) == 0xC0 ) 527 pStr += 2; 528 else if ( (c & 0xF0) == 0xE0 ) 529 pStr += 3; 530 else if ( (c & 0xF8) == 0xF0 ) 531 pStr += 4; 532 else if ( (c & 0xFC) == 0xF8 ) 533 pStr += 5; 534 else if ( (c & 0xFE) == 0xFC ) 535 pStr += 6; 536 else 537 pStr++; 538 539 n++; 540 } 541 542 return n; 543 } 544 545 /* ----------------------------------------------------------------------- */ 546 547 static void rtl_string2UString_status( rtl_uString** ppThis, 548 const sal_Char* pStr, 549 sal_Int32 nLen, 550 rtl_TextEncoding eTextEncoding, 551 sal_uInt32 nCvtFlags, 552 sal_uInt32 *pInfo ) 553 { 554 OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding), 555 "rtl_string2UString_status() - Wrong TextEncoding" ); 556 557 if ( !nLen ) 558 { 559 rtl_uString_new( ppThis ); 560 if (pInfo != NULL) { 561 *pInfo = 0; 562 } 563 } 564 else 565 { 566 if ( *ppThis ) 567 IMPL_RTL_STRINGNAME( release )( *ppThis ); 568 569 /* Optimization for US-ASCII */ 570 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 571 { 572 IMPL_RTL_STRCODE* pBuffer; 573 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 574 if (*ppThis == NULL) { 575 if (pInfo != NULL) { 576 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 577 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 578 } 579 return; 580 } 581 pBuffer = (*ppThis)->buffer; 582 do 583 { 584 /* Check ASCII range */ 585 OSL_ENSURE( ((unsigned char)*pStr) <= 127, 586 "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 587 588 *pBuffer = *pStr; 589 pBuffer++; 590 pStr++; 591 nLen--; 592 } 593 while ( nLen ); 594 if (pInfo != NULL) { 595 *pInfo = 0; 596 } 597 } 598 else 599 { 600 rtl_uString* pTemp; 601 rtl_uString* pTemp2 = NULL; 602 rtl_TextToUnicodeConverter hConverter; 603 sal_uInt32 nInfo; 604 sal_Size nSrcBytes; 605 sal_Size nDestChars; 606 sal_Size nNewLen; 607 608 /* Optimization for UTF-8 - we try to calculate the exact length */ 609 /* For all other encoding we try the maximum - and reallocate 610 the buffer if needed */ 611 if ( eTextEncoding == RTL_TEXTENCODING_UTF8 ) 612 { 613 nNewLen = rtl_ImplGetFastUTF8UnicodeLen( pStr, nLen ); 614 /* Includes the string only ASCII, then we could copy 615 the buffer faster */ 616 if ( nNewLen == (sal_Size)nLen ) 617 { 618 IMPL_RTL_STRCODE* pBuffer; 619 *ppThis = IMPL_RTL_STRINGNAME( ImplAlloc )( nLen ); 620 if (*ppThis == NULL) 621 { 622 if (pInfo != NULL) { 623 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 624 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 625 } 626 return; 627 } 628 pBuffer = (*ppThis)->buffer; 629 do 630 { 631 /* Check ASCII range */ 632 OSL_ENSURE( ((unsigned char)*pStr) <= 127, 633 "rtl_string2UString_status() - UTF8 test encoding is wrong" ); 634 635 *pBuffer = *pStr; 636 pBuffer++; 637 pStr++; 638 nLen--; 639 } 640 while ( nLen ); 641 if (pInfo != NULL) { 642 *pInfo = 0; 643 } 644 return; 645 } 646 } 647 else 648 nNewLen = nLen; 649 650 nCvtFlags |= RTL_TEXTTOUNICODE_FLAGS_FLUSH; 651 hConverter = rtl_createTextToUnicodeConverter( eTextEncoding ); 652 653 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 654 if (pTemp == NULL) { 655 if (pInfo != NULL) { 656 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 657 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 658 } 659 return; 660 } 661 nDestChars = rtl_convertTextToUnicode( hConverter, 0, 662 pStr, nLen, 663 pTemp->buffer, nNewLen, 664 nCvtFlags, 665 &nInfo, &nSrcBytes ); 666 667 /* Buffer not big enough, try again with enough space */ 668 /* Shouldn't be the case, but if we get textencoding which 669 could results in more unicode characters we have this 670 code here. Could be the case for apple encodings */ 671 while ( nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL ) 672 { 673 rtl_freeMemory( pTemp ); 674 nNewLen += 8; 675 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 676 if (pTemp == NULL) { 677 if (pInfo != NULL) { 678 *pInfo = RTL_TEXTTOUNICODE_INFO_ERROR | 679 RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 680 } 681 return; 682 } 683 nDestChars = rtl_convertTextToUnicode( hConverter, 0, 684 pStr, nLen, 685 pTemp->buffer, nNewLen, 686 nCvtFlags, 687 &nInfo, &nSrcBytes ); 688 } 689 690 if (pInfo) 691 *pInfo = nInfo; 692 693 /* Set the buffer to the correct size or if there is too 694 much overhead, reallocate to the correct size */ 695 if ( nNewLen > nDestChars+8 ) 696 { 697 pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestChars ); 698 } 699 if (pTemp2 != NULL) 700 { 701 rtl_str_ImplCopy(pTemp2->buffer, pTemp->buffer, nDestChars); 702 rtl_freeMemory(pTemp); 703 pTemp = pTemp2; 704 } 705 else 706 { 707 pTemp->length = nDestChars; 708 pTemp->buffer[nDestChars] = 0; 709 } 710 711 rtl_destroyTextToUnicodeConverter( hConverter ); 712 *ppThis = pTemp; 713 714 /* Results the conversion in an empty buffer - 715 create an empty string */ 716 if ( pTemp && !nDestChars ) 717 rtl_uString_new( ppThis ); 718 } 719 } 720 } 721 722 void SAL_CALL rtl_string2UString( rtl_uString** ppThis, 723 const sal_Char* pStr, 724 sal_Int32 nLen, 725 rtl_TextEncoding eTextEncoding, 726 sal_uInt32 nCvtFlags ) 727 { 728 rtl_string2UString_status( ppThis, pStr, nLen, eTextEncoding, 729 nCvtFlags, NULL ); 730 } 731 732 /* ----------------------------------------------------------------------- */ 733 734 typedef enum { 735 CANNOT_RETURN, 736 CAN_RETURN = 1 737 } StrLifecycle; 738 739 static oslMutex 740 getInternMutex() 741 { 742 static oslMutex pPoolGuard = NULL; 743 if( !pPoolGuard ) 744 { 745 oslMutex pGlobalGuard; 746 pGlobalGuard = *osl_getGlobalMutex(); 747 osl_acquireMutex( pGlobalGuard ); 748 if( !pPoolGuard ) 749 { 750 oslMutex p = osl_createMutex(); 751 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 752 pPoolGuard = p; 753 } 754 osl_releaseMutex( pGlobalGuard ); 755 } 756 else 757 { 758 OSL_DOUBLE_CHECKED_LOCKING_MEMORY_BARRIER(); 759 } 760 761 return pPoolGuard; 762 } 763 764 /* returns true if we found a dup in the pool */ 765 static void rtl_ustring_intern_internal( rtl_uString ** newStr, 766 rtl_uString * str, 767 StrLifecycle can_return ) 768 { 769 oslMutex pPoolMutex; 770 771 pPoolMutex = getInternMutex(); 772 773 osl_acquireMutex( pPoolMutex ); 774 775 *newStr = rtl_str_hash_intern (str, can_return); 776 777 osl_releaseMutex( pPoolMutex ); 778 779 if( can_return && *newStr != str ) 780 { /* we dupped, then found a match */ 781 rtl_freeMemory( str ); 782 } 783 } 784 785 void SAL_CALL rtl_uString_intern( rtl_uString ** newStr, 786 rtl_uString * str) 787 { 788 if (SAL_STRING_IS_INTERN(str)) 789 { 790 IMPL_RTL_AQUIRE( str ); 791 *newStr = str; 792 } 793 else 794 { 795 rtl_uString *pOrg = *newStr; 796 *newStr = NULL; 797 rtl_ustring_intern_internal( newStr, str, CANNOT_RETURN ); 798 if (pOrg) 799 rtl_uString_release (pOrg); 800 } 801 } 802 803 void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr, 804 const sal_Char * str, 805 sal_Int32 len, 806 rtl_TextEncoding eTextEncoding, 807 sal_uInt32 convertFlags, 808 sal_uInt32 * pInfo ) 809 { 810 rtl_uString *scratch; 811 812 if (*newStr) 813 { 814 rtl_uString_release (*newStr); 815 *newStr = NULL; 816 } 817 818 if ( len < 256 ) 819 { // try various optimisations 820 if ( len < 0 ) 821 len = strlen( str ); 822 if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US ) 823 { 824 int i; 825 rtl_uString *pScratch; 826 pScratch = alloca( sizeof( rtl_uString ) 827 + len * sizeof (IMPL_RTL_STRCODE ) ); 828 for (i = 0; i < len; i++) 829 { 830 /* Check ASCII range */ 831 OSL_ENSURE( ((unsigned char)str[i]) <= 127, 832 "rtl_ustring_internConvert() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); 833 pScratch->buffer[i] = str[i]; 834 } 835 pScratch->length = len; 836 rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN ); 837 return; 838 } 839 /* FIXME: we want a nice UTF-8 / alloca shortcut here */ 840 } 841 842 scratch = NULL; 843 rtl_string2UString_status( &scratch, str, len, eTextEncoding, convertFlags, 844 pInfo ); 845 if (!scratch) { 846 return; 847 } 848 rtl_ustring_intern_internal( newStr, scratch, CAN_RETURN ); 849 } 850 851 static void 852 internRelease (rtl_uString *pThis) 853 { 854 oslMutex pPoolMutex; 855 856 rtl_uString *pFree = NULL; 857 if ( SAL_STRING_REFCOUNT( 858 osl_decrementInterlockedCount( &(pThis->refCount) ) ) == 0) 859 { 860 pPoolMutex = getInternMutex(); 861 osl_acquireMutex( pPoolMutex ); 862 863 rtl_str_hash_remove (pThis); 864 865 /* May have been separately acquired */ 866 if ( SAL_STRING_REFCOUNT( 867 osl_incrementInterlockedCount( &(pThis->refCount) ) ) == 1 ) 868 { 869 /* we got the last ref */ 870 pFree = pThis; 871 } 872 else /* very unusual */ 873 { 874 internRelease (pThis); 875 } 876 877 osl_releaseMutex( pPoolMutex ); 878 } 879 if (pFree) 880 rtl_freeMemory (pFree); 881 } 882 883 sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints( 884 rtl_uString const * string, sal_Int32 * indexUtf16, 885 sal_Int32 incrementCodePoints) 886 { 887 sal_Int32 n; 888 sal_Unicode cu; 889 sal_uInt32 cp; 890 OSL_ASSERT(string != NULL && indexUtf16 != NULL); 891 n = *indexUtf16; 892 OSL_ASSERT(n >= 0 && n <= string->length); 893 while (incrementCodePoints < 0) { 894 OSL_ASSERT(n > 0); 895 cu = string->buffer[--n]; 896 if (SAL_RTL_IS_LOW_SURROGATE(cu) && n != 0 && 897 SAL_RTL_IS_HIGH_SURROGATE(string->buffer[n - 1])) 898 { 899 --n; 900 } 901 ++incrementCodePoints; 902 } 903 OSL_ASSERT(n >= 0 && n < string->length); 904 cu = string->buffer[n]; 905 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && string->length - n >= 2 && 906 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n + 1])) 907 { 908 cp = SAL_RTL_COMBINE_SURROGATES(cu, string->buffer[n + 1]); 909 } else { 910 cp = cu; 911 } 912 while (incrementCodePoints > 0) { 913 OSL_ASSERT(n < string->length); 914 cu = string->buffer[n++]; 915 if (SAL_RTL_IS_HIGH_SURROGATE(cu) && n != string->length && 916 SAL_RTL_IS_LOW_SURROGATE(string->buffer[n])) 917 { 918 ++n; 919 } 920 --incrementCodePoints; 921 } 922 OSL_ASSERT(n >= 0 && n <= string->length); 923 *indexUtf16 = n; 924 return cp; 925 } 926 927 sal_Bool rtl_convertStringToUString( 928 rtl_uString ** target, char const * source, sal_Int32 length, 929 rtl_TextEncoding encoding, sal_uInt32 flags) SAL_THROW_EXTERN_C() 930 { 931 sal_uInt32 info; 932 rtl_string2UString_status(target, source, length, encoding, flags, &info); 933 return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0); 934 } 935