1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*647f063dSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*647f063dSAndrew Rist * distributed with this work for additional information
6*647f063dSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*647f063dSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist * with the License. You may obtain a copy of the License at
10*647f063dSAndrew Rist *
11*647f063dSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*647f063dSAndrew Rist *
13*647f063dSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist * software distributed under the License is distributed on an
15*647f063dSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist * KIND, either express or implied. See the License for the
17*647f063dSAndrew Rist * specific language governing permissions and limitations
18*647f063dSAndrew Rist * under the License.
19*647f063dSAndrew Rist *
20*647f063dSAndrew Rist *************************************************************/
21*647f063dSAndrew Rist
22*647f063dSAndrew Rist
23cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25cdf0e10cSrcweir #endif
26cdf0e10cSrcweir
27cdf0e10cSrcweir #include <rtl/memory.h>
28cdf0e10cSrcweir #include <osl/interlck.h>
29cdf0e10cSrcweir #include <rtl/alloc.h>
30cdf0e10cSrcweir #include <osl/diagnose.h>
31cdf0e10cSrcweir #include <rtl/tencinfo.h>
32cdf0e10cSrcweir
33cdf0e10cSrcweir #include "strimp.h"
34cdf0e10cSrcweir #include "surrogates.h"
35cdf0e10cSrcweir #include <rtl/string.h>
36cdf0e10cSrcweir
37cdf0e10cSrcweir #include "rtl/math.h"
38cdf0e10cSrcweir #include "rtl/tencinfo.h"
39cdf0e10cSrcweir
40cdf0e10cSrcweir /* ======================================================================= */
41cdf0e10cSrcweir
42cdf0e10cSrcweir /* static data to be referenced by all empty strings
43cdf0e10cSrcweir * the refCount is predefined to 1 and must never become 0 !
44cdf0e10cSrcweir */
45cdf0e10cSrcweir static rtl_String const aImplEmpty_rtl_String =
46cdf0e10cSrcweir {
47cdf0e10cSrcweir SAL_STRING_STATIC_FLAG|1,
48cdf0e10cSrcweir /* sal_Int32 refCount; */
49cdf0e10cSrcweir 0, /* sal_Int32 length; */
50cdf0e10cSrcweir { 0 } /* sal_Char buffer[1]; */
51cdf0e10cSrcweir };
52cdf0e10cSrcweir
53cdf0e10cSrcweir /* ======================================================================= */
54cdf0e10cSrcweir
55cdf0e10cSrcweir #define IMPL_RTL_STRCODE sal_Char
56cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c ) ((unsigned char)c)
57cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n ) rtl_str_ ## n
58cdf0e10cSrcweir
59cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n ) rtl_string_ ## n
60cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA rtl_String
61cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING aImplEmpty_rtl_String
62cdf0e10cSrcweir
63cdf0e10cSrcweir /* ======================================================================= */
64cdf0e10cSrcweir
65cdf0e10cSrcweir /* Include String/UString template code */
66cdf0e10cSrcweir
67cdf0e10cSrcweir #include "strtmpl.c"
68cdf0e10cSrcweir
rtl_str_valueOfFloat(sal_Char * pStr,float f)69cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
70cdf0e10cSrcweir {
71cdf0e10cSrcweir rtl_String * pResult = NULL;
72cdf0e10cSrcweir sal_Int32 nLen;
73cdf0e10cSrcweir rtl_math_doubleToString(
74cdf0e10cSrcweir &pResult, 0, 0, f, rtl_math_StringFormat_G,
75cdf0e10cSrcweir RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
76cdf0e10cSrcweir sal_True);
77cdf0e10cSrcweir nLen = pResult->length;
78cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
79cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
80cdf0e10cSrcweir rtl_string_release(pResult);
81cdf0e10cSrcweir return nLen;
82cdf0e10cSrcweir }
83cdf0e10cSrcweir
rtl_str_valueOfDouble(sal_Char * pStr,double d)84cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
85cdf0e10cSrcweir {
86cdf0e10cSrcweir rtl_String * pResult = NULL;
87cdf0e10cSrcweir sal_Int32 nLen;
88cdf0e10cSrcweir rtl_math_doubleToString(
89cdf0e10cSrcweir &pResult, 0, 0, d, rtl_math_StringFormat_G,
90cdf0e10cSrcweir RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
91cdf0e10cSrcweir 0, sal_True);
92cdf0e10cSrcweir nLen = pResult->length;
93cdf0e10cSrcweir OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
94cdf0e10cSrcweir rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
95cdf0e10cSrcweir rtl_string_release(pResult);
96cdf0e10cSrcweir return nLen;
97cdf0e10cSrcweir }
98cdf0e10cSrcweir
rtl_str_toFloat(sal_Char const * pStr)99cdf0e10cSrcweir float SAL_CALL rtl_str_toFloat(sal_Char const * pStr)
100cdf0e10cSrcweir {
101cdf0e10cSrcweir return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
102cdf0e10cSrcweir '.', 0, 0, 0);
103cdf0e10cSrcweir }
104cdf0e10cSrcweir
rtl_str_toDouble(sal_Char const * pStr)105cdf0e10cSrcweir double SAL_CALL rtl_str_toDouble(sal_Char const * pStr)
106cdf0e10cSrcweir {
107cdf0e10cSrcweir return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
108cdf0e10cSrcweir 0, 0);
109cdf0e10cSrcweir }
110cdf0e10cSrcweir
111cdf0e10cSrcweir /* ======================================================================= */
112cdf0e10cSrcweir
rtl_ImplGetFastUTF8ByteLen(const sal_Unicode * pStr,sal_Int32 nLen)113cdf0e10cSrcweir static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
114cdf0e10cSrcweir {
115cdf0e10cSrcweir int n;
116cdf0e10cSrcweir sal_Unicode c;
117cdf0e10cSrcweir sal_uInt32 nUCS4Char;
118cdf0e10cSrcweir const sal_Unicode* pEndStr;
119cdf0e10cSrcweir
120cdf0e10cSrcweir n = 0;
121cdf0e10cSrcweir pEndStr = pStr+nLen;
122cdf0e10cSrcweir while ( pStr < pEndStr )
123cdf0e10cSrcweir {
124cdf0e10cSrcweir c = *pStr;
125cdf0e10cSrcweir
126cdf0e10cSrcweir if ( c < 0x80 )
127cdf0e10cSrcweir n++;
128cdf0e10cSrcweir else if ( c < 0x800 )
129cdf0e10cSrcweir n += 2;
130cdf0e10cSrcweir else
131cdf0e10cSrcweir {
132cdf0e10cSrcweir if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
133cdf0e10cSrcweir n += 3;
134cdf0e10cSrcweir else
135cdf0e10cSrcweir {
136cdf0e10cSrcweir nUCS4Char = c;
137cdf0e10cSrcweir
138cdf0e10cSrcweir if ( pStr+1 < pEndStr )
139cdf0e10cSrcweir {
140cdf0e10cSrcweir c = *(pStr+1);
141cdf0e10cSrcweir if ( SAL_RTL_IS_LOW_SURROGATE(c) )
142cdf0e10cSrcweir {
143cdf0e10cSrcweir nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
144cdf0e10cSrcweir pStr++;
145cdf0e10cSrcweir }
146cdf0e10cSrcweir }
147cdf0e10cSrcweir
148cdf0e10cSrcweir if ( nUCS4Char < 0x10000 )
149cdf0e10cSrcweir n += 3;
150cdf0e10cSrcweir else if ( nUCS4Char < 0x200000 )
151cdf0e10cSrcweir n += 4;
152cdf0e10cSrcweir else if ( nUCS4Char < 0x4000000 )
153cdf0e10cSrcweir n += 5;
154cdf0e10cSrcweir else
155cdf0e10cSrcweir n += 6;
156cdf0e10cSrcweir }
157cdf0e10cSrcweir }
158cdf0e10cSrcweir
159cdf0e10cSrcweir pStr++;
160cdf0e10cSrcweir }
161cdf0e10cSrcweir
162cdf0e10cSrcweir return n;
163cdf0e10cSrcweir }
164cdf0e10cSrcweir
165cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
166cdf0e10cSrcweir
rtl_impl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags,sal_Bool bCheckErrors)167cdf0e10cSrcweir sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
168cdf0e10cSrcweir sal_Unicode const * pSource,
169cdf0e10cSrcweir sal_Int32 nLength,
170cdf0e10cSrcweir rtl_TextEncoding nEncoding,
171cdf0e10cSrcweir sal_uInt32 nFlags,
172cdf0e10cSrcweir sal_Bool bCheckErrors)
173cdf0e10cSrcweir {
174cdf0e10cSrcweir OSL_ASSERT(pTarget != NULL
175cdf0e10cSrcweir && (pSource != NULL || nLength == 0)
176cdf0e10cSrcweir && nLength >= 0
177cdf0e10cSrcweir && rtl_isOctetTextEncoding(nEncoding));
178cdf0e10cSrcweir
179cdf0e10cSrcweir if ( !nLength )
180cdf0e10cSrcweir rtl_string_new( pTarget );
181cdf0e10cSrcweir else
182cdf0e10cSrcweir {
183cdf0e10cSrcweir rtl_String* pTemp;
184cdf0e10cSrcweir rtl_UnicodeToTextConverter hConverter;
185cdf0e10cSrcweir sal_uInt32 nInfo;
186cdf0e10cSrcweir sal_Size nSrcChars;
187cdf0e10cSrcweir sal_Size nDestBytes;
188cdf0e10cSrcweir sal_Size nNewLen;
189cdf0e10cSrcweir sal_Size nNotConvertedChars;
190cdf0e10cSrcweir sal_Size nMaxCharLen;
191cdf0e10cSrcweir
192cdf0e10cSrcweir /* Optimization for UTF-8 - we try to calculate the exact length */
193cdf0e10cSrcweir /* For all other encoding we try an good estimation */
194cdf0e10cSrcweir if ( nEncoding == RTL_TEXTENCODING_UTF8 )
195cdf0e10cSrcweir {
196cdf0e10cSrcweir nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
197cdf0e10cSrcweir /* Includes the string only ASCII, then we could copy
198cdf0e10cSrcweir the buffer faster */
199cdf0e10cSrcweir if ( nNewLen == (sal_Size)nLength )
200cdf0e10cSrcweir {
201cdf0e10cSrcweir IMPL_RTL_STRCODE* pBuffer;
202cdf0e10cSrcweir if ( *pTarget )
203cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget );
204cdf0e10cSrcweir *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
205cdf0e10cSrcweir OSL_ASSERT(*pTarget != NULL);
206cdf0e10cSrcweir pBuffer = (*pTarget)->buffer;
207cdf0e10cSrcweir do
208cdf0e10cSrcweir {
209cdf0e10cSrcweir /* Check ASCII range */
210cdf0e10cSrcweir OSL_ENSURE( *pSource <= 127,
211cdf0e10cSrcweir "rtl_uString2String() - UTF8 test is encoding is wrong" );
212cdf0e10cSrcweir
213cdf0e10cSrcweir *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
214cdf0e10cSrcweir pBuffer++;
215cdf0e10cSrcweir pSource++;
216cdf0e10cSrcweir nLength--;
217cdf0e10cSrcweir }
218cdf0e10cSrcweir while ( nLength );
219cdf0e10cSrcweir return sal_True;
220cdf0e10cSrcweir }
221cdf0e10cSrcweir
222cdf0e10cSrcweir nMaxCharLen = 4;
223cdf0e10cSrcweir }
224cdf0e10cSrcweir else
225cdf0e10cSrcweir {
226cdf0e10cSrcweir rtl_TextEncodingInfo aTextEncInfo;
227cdf0e10cSrcweir aTextEncInfo.StructSize = sizeof( aTextEncInfo );
228cdf0e10cSrcweir if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
229cdf0e10cSrcweir {
230cdf0e10cSrcweir aTextEncInfo.AverageCharSize = 1;
231cdf0e10cSrcweir aTextEncInfo.MaximumCharSize = 8;
232cdf0e10cSrcweir }
233cdf0e10cSrcweir
234cdf0e10cSrcweir nNewLen = nLength*aTextEncInfo.AverageCharSize;
235cdf0e10cSrcweir nMaxCharLen = aTextEncInfo.MaximumCharSize;
236cdf0e10cSrcweir }
237cdf0e10cSrcweir
238cdf0e10cSrcweir nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
239cdf0e10cSrcweir hConverter = rtl_createUnicodeToTextConverter( nEncoding );
240cdf0e10cSrcweir
241cdf0e10cSrcweir for (;;)
242cdf0e10cSrcweir {
243cdf0e10cSrcweir pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
244cdf0e10cSrcweir OSL_ASSERT(pTemp != NULL);
245cdf0e10cSrcweir nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
246cdf0e10cSrcweir pSource, nLength,
247cdf0e10cSrcweir pTemp->buffer, nNewLen,
248cdf0e10cSrcweir nFlags,
249cdf0e10cSrcweir &nInfo, &nSrcChars );
250cdf0e10cSrcweir if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
251cdf0e10cSrcweir {
252cdf0e10cSrcweir rtl_freeMemory(pTemp);
253cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter(hConverter);
254cdf0e10cSrcweir return sal_False;
255cdf0e10cSrcweir }
256cdf0e10cSrcweir
257cdf0e10cSrcweir if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
258cdf0e10cSrcweir break;
259cdf0e10cSrcweir
260cdf0e10cSrcweir /* Buffer not big enough, try again with enough space */
261cdf0e10cSrcweir rtl_freeMemory( pTemp );
262cdf0e10cSrcweir
263cdf0e10cSrcweir /* Try with the max. count of characters with
264cdf0e10cSrcweir additional overhead for replacing functionality */
265cdf0e10cSrcweir nNotConvertedChars = nLength-nSrcChars;
266cdf0e10cSrcweir nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
267cdf0e10cSrcweir }
268cdf0e10cSrcweir
269cdf0e10cSrcweir /* Set the buffer to the correct size or is there to
270cdf0e10cSrcweir much overhead, reallocate to the correct size */
271cdf0e10cSrcweir if ( nNewLen > nDestBytes+8 )
272cdf0e10cSrcweir {
273cdf0e10cSrcweir rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
274cdf0e10cSrcweir OSL_ASSERT(pTemp2 != NULL);
275cdf0e10cSrcweir rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
276cdf0e10cSrcweir rtl_freeMemory( pTemp );
277cdf0e10cSrcweir pTemp = pTemp2;
278cdf0e10cSrcweir }
279cdf0e10cSrcweir else
280cdf0e10cSrcweir {
281cdf0e10cSrcweir pTemp->length = nDestBytes;
282cdf0e10cSrcweir pTemp->buffer[nDestBytes] = 0;
283cdf0e10cSrcweir }
284cdf0e10cSrcweir
285cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter( hConverter );
286cdf0e10cSrcweir if ( *pTarget )
287cdf0e10cSrcweir IMPL_RTL_STRINGNAME( release )( *pTarget );
288cdf0e10cSrcweir *pTarget = pTemp;
289cdf0e10cSrcweir
290cdf0e10cSrcweir /* Results the conversion in an empty buffer -
291cdf0e10cSrcweir create an empty string */
292cdf0e10cSrcweir if ( pTemp && !nDestBytes )
293cdf0e10cSrcweir rtl_string_new( pTarget );
294cdf0e10cSrcweir }
295cdf0e10cSrcweir return sal_True;
296cdf0e10cSrcweir }
297cdf0e10cSrcweir
rtl_uString2String(rtl_String ** ppThis,const sal_Unicode * pUStr,sal_Int32 nULen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)298cdf0e10cSrcweir void SAL_CALL rtl_uString2String( rtl_String** ppThis,
299cdf0e10cSrcweir const sal_Unicode* pUStr,
300cdf0e10cSrcweir sal_Int32 nULen,
301cdf0e10cSrcweir rtl_TextEncoding eTextEncoding,
302cdf0e10cSrcweir sal_uInt32 nCvtFlags )
303cdf0e10cSrcweir {
304cdf0e10cSrcweir rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
305cdf0e10cSrcweir nCvtFlags, sal_False);
306cdf0e10cSrcweir }
307cdf0e10cSrcweir
rtl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags)308cdf0e10cSrcweir sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
309cdf0e10cSrcweir sal_Unicode const * pSource,
310cdf0e10cSrcweir sal_Int32 nLength,
311cdf0e10cSrcweir rtl_TextEncoding nEncoding,
312cdf0e10cSrcweir sal_uInt32 nFlags)
313cdf0e10cSrcweir {
314cdf0e10cSrcweir return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
315cdf0e10cSrcweir nFlags, sal_True);
316cdf0e10cSrcweir }
317