1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 #if defined(_MSC_VER) && (_MSC_VER >= 1400) 28 #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance 29 #endif 30 31 #include <rtl/memory.h> 32 #include <osl/interlck.h> 33 #include <rtl/alloc.h> 34 #include <osl/diagnose.h> 35 #include <rtl/tencinfo.h> 36 37 #include "strimp.h" 38 #include "surrogates.h" 39 #include <rtl/string.h> 40 41 #include "rtl/math.h" 42 #include "rtl/tencinfo.h" 43 44 /* ======================================================================= */ 45 46 /* static data to be referenced by all empty strings 47 * the refCount is predefined to 1 and must never become 0 ! 48 */ 49 static rtl_String const aImplEmpty_rtl_String = 50 { 51 SAL_STRING_STATIC_FLAG|1, 52 /* sal_Int32 refCount; */ 53 0, /* sal_Int32 length; */ 54 { 0 } /* sal_Char buffer[1]; */ 55 }; 56 57 /* ======================================================================= */ 58 59 #define IMPL_RTL_STRCODE sal_Char 60 #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c) 61 #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n 62 63 #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n 64 #define IMPL_RTL_STRINGDATA rtl_String 65 #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String 66 67 /* ======================================================================= */ 68 69 /* Include String/UString template code */ 70 71 #include "strtmpl.c" 72 73 sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f) 74 { 75 rtl_String * pResult = NULL; 76 sal_Int32 nLen; 77 rtl_math_doubleToString( 78 &pResult, 0, 0, f, rtl_math_StringFormat_G, 79 RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0, 80 sal_True); 81 nLen = pResult->length; 82 OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT); 83 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 84 rtl_string_release(pResult); 85 return nLen; 86 } 87 88 sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d) 89 { 90 rtl_String * pResult = NULL; 91 sal_Int32 nLen; 92 rtl_math_doubleToString( 93 &pResult, 0, 0, d, rtl_math_StringFormat_G, 94 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 95 0, sal_True); 96 nLen = pResult->length; 97 OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE); 98 rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char)); 99 rtl_string_release(pResult); 100 return nLen; 101 } 102 103 float SAL_CALL rtl_str_toFloat(sal_Char const * pStr) 104 { 105 return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), 106 '.', 0, 0, 0); 107 } 108 109 double SAL_CALL rtl_str_toDouble(sal_Char const * pStr) 110 { 111 return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0, 112 0, 0); 113 } 114 115 /* ======================================================================= */ 116 117 static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen ) 118 { 119 int n; 120 sal_Unicode c; 121 sal_uInt32 nUCS4Char; 122 const sal_Unicode* pEndStr; 123 124 n = 0; 125 pEndStr = pStr+nLen; 126 while ( pStr < pEndStr ) 127 { 128 c = *pStr; 129 130 if ( c < 0x80 ) 131 n++; 132 else if ( c < 0x800 ) 133 n += 2; 134 else 135 { 136 if ( !SAL_RTL_IS_HIGH_SURROGATE(c) ) 137 n += 3; 138 else 139 { 140 nUCS4Char = c; 141 142 if ( pStr+1 < pEndStr ) 143 { 144 c = *(pStr+1); 145 if ( SAL_RTL_IS_LOW_SURROGATE(c) ) 146 { 147 nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c); 148 pStr++; 149 } 150 } 151 152 if ( nUCS4Char < 0x10000 ) 153 n += 3; 154 else if ( nUCS4Char < 0x200000 ) 155 n += 4; 156 else if ( nUCS4Char < 0x4000000 ) 157 n += 5; 158 else 159 n += 6; 160 } 161 } 162 163 pStr++; 164 } 165 166 return n; 167 } 168 169 /* ----------------------------------------------------------------------- */ 170 171 sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget, 172 sal_Unicode const * pSource, 173 sal_Int32 nLength, 174 rtl_TextEncoding nEncoding, 175 sal_uInt32 nFlags, 176 sal_Bool bCheckErrors) 177 { 178 OSL_ASSERT(pTarget != NULL 179 && (pSource != NULL || nLength == 0) 180 && nLength >= 0 181 && rtl_isOctetTextEncoding(nEncoding)); 182 183 if ( !nLength ) 184 rtl_string_new( pTarget ); 185 else 186 { 187 rtl_String* pTemp; 188 rtl_UnicodeToTextConverter hConverter; 189 sal_uInt32 nInfo; 190 sal_Size nSrcChars; 191 sal_Size nDestBytes; 192 sal_Size nNewLen; 193 sal_Size nNotConvertedChars; 194 sal_Size nMaxCharLen; 195 196 /* Optimization for UTF-8 - we try to calculate the exact length */ 197 /* For all other encoding we try an good estimation */ 198 if ( nEncoding == RTL_TEXTENCODING_UTF8 ) 199 { 200 nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength ); 201 /* Includes the string only ASCII, then we could copy 202 the buffer faster */ 203 if ( nNewLen == (sal_Size)nLength ) 204 { 205 IMPL_RTL_STRCODE* pBuffer; 206 if ( *pTarget ) 207 IMPL_RTL_STRINGNAME( release )( *pTarget ); 208 *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength ); 209 OSL_ASSERT(*pTarget != NULL); 210 pBuffer = (*pTarget)->buffer; 211 do 212 { 213 /* Check ASCII range */ 214 OSL_ENSURE( *pSource <= 127, 215 "rtl_uString2String() - UTF8 test is encoding is wrong" ); 216 217 *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource; 218 pBuffer++; 219 pSource++; 220 nLength--; 221 } 222 while ( nLength ); 223 return sal_True; 224 } 225 226 nMaxCharLen = 4; 227 } 228 else 229 { 230 rtl_TextEncodingInfo aTextEncInfo; 231 aTextEncInfo.StructSize = sizeof( aTextEncInfo ); 232 if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) ) 233 { 234 aTextEncInfo.AverageCharSize = 1; 235 aTextEncInfo.MaximumCharSize = 8; 236 } 237 238 nNewLen = nLength*aTextEncInfo.AverageCharSize; 239 nMaxCharLen = aTextEncInfo.MaximumCharSize; 240 } 241 242 nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH; 243 hConverter = rtl_createUnicodeToTextConverter( nEncoding ); 244 245 for (;;) 246 { 247 pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen ); 248 OSL_ASSERT(pTemp != NULL); 249 nDestBytes = rtl_convertUnicodeToText( hConverter, 0, 250 pSource, nLength, 251 pTemp->buffer, nNewLen, 252 nFlags, 253 &nInfo, &nSrcChars ); 254 if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0) 255 { 256 rtl_freeMemory(pTemp); 257 rtl_destroyUnicodeToTextConverter(hConverter); 258 return sal_False; 259 } 260 261 if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0) 262 break; 263 264 /* Buffer not big enough, try again with enough space */ 265 rtl_freeMemory( pTemp ); 266 267 /* Try with the max. count of characters with 268 additional overhead for replacing functionality */ 269 nNotConvertedChars = nLength-nSrcChars; 270 nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4; 271 } 272 273 /* Set the buffer to the correct size or is there to 274 much overhead, reallocate to the correct size */ 275 if ( nNewLen > nDestBytes+8 ) 276 { 277 rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes ); 278 OSL_ASSERT(pTemp2 != NULL); 279 rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes ); 280 rtl_freeMemory( pTemp ); 281 pTemp = pTemp2; 282 } 283 else 284 { 285 pTemp->length = nDestBytes; 286 pTemp->buffer[nDestBytes] = 0; 287 } 288 289 rtl_destroyUnicodeToTextConverter( hConverter ); 290 if ( *pTarget ) 291 IMPL_RTL_STRINGNAME( release )( *pTarget ); 292 *pTarget = pTemp; 293 294 /* Results the conversion in an empty buffer - 295 create an empty string */ 296 if ( pTemp && !nDestBytes ) 297 rtl_string_new( pTarget ); 298 } 299 return sal_True; 300 } 301 302 void SAL_CALL rtl_uString2String( rtl_String** ppThis, 303 const sal_Unicode* pUStr, 304 sal_Int32 nULen, 305 rtl_TextEncoding eTextEncoding, 306 sal_uInt32 nCvtFlags ) 307 { 308 rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding, 309 nCvtFlags, sal_False); 310 } 311 312 sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget, 313 sal_Unicode const * pSource, 314 sal_Int32 nLength, 315 rtl_TextEncoding nEncoding, 316 sal_uInt32 nFlags) 317 { 318 return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding, 319 nFlags, sal_True); 320 } 321