xref: /trunk/main/sal/rtl/source/string.c (revision 647f063d)
1*647f063dSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*647f063dSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*647f063dSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*647f063dSAndrew Rist  * distributed with this work for additional information
6*647f063dSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*647f063dSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*647f063dSAndrew Rist  * "License"); you may not use this file except in compliance
9*647f063dSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*647f063dSAndrew Rist  *
11*647f063dSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*647f063dSAndrew Rist  *
13*647f063dSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*647f063dSAndrew Rist  * software distributed under the License is distributed on an
15*647f063dSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*647f063dSAndrew Rist  * KIND, either express or implied.  See the License for the
17*647f063dSAndrew Rist  * specific language governing permissions and limitations
18*647f063dSAndrew Rist  * under the License.
19*647f063dSAndrew Rist  *
20*647f063dSAndrew Rist  *************************************************************/
21*647f063dSAndrew Rist 
22*647f063dSAndrew Rist 
23cdf0e10cSrcweir #if defined(_MSC_VER) && (_MSC_VER >= 1400)
24cdf0e10cSrcweir #pragma warning(disable:4738) // storing 32-bit float result in memory, possible loss of performance
25cdf0e10cSrcweir #endif
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <rtl/memory.h>
28cdf0e10cSrcweir #include <osl/interlck.h>
29cdf0e10cSrcweir #include <rtl/alloc.h>
30cdf0e10cSrcweir #include <osl/diagnose.h>
31cdf0e10cSrcweir #include <rtl/tencinfo.h>
32cdf0e10cSrcweir 
33cdf0e10cSrcweir #include "strimp.h"
34cdf0e10cSrcweir #include "surrogates.h"
35cdf0e10cSrcweir #include <rtl/string.h>
36cdf0e10cSrcweir 
37cdf0e10cSrcweir #include "rtl/math.h"
38cdf0e10cSrcweir #include "rtl/tencinfo.h"
39cdf0e10cSrcweir 
40cdf0e10cSrcweir /* ======================================================================= */
41cdf0e10cSrcweir 
42cdf0e10cSrcweir /* static data to be referenced by all empty strings
43cdf0e10cSrcweir  * the refCount is predefined to 1 and must never become 0 !
44cdf0e10cSrcweir  */
45cdf0e10cSrcweir static rtl_String const aImplEmpty_rtl_String =
46cdf0e10cSrcweir {
47cdf0e10cSrcweir     SAL_STRING_STATIC_FLAG|1,
48cdf0e10cSrcweir             /* sal_Int32    refCount;   */
49cdf0e10cSrcweir     0,      /* sal_Int32    length;     */
50cdf0e10cSrcweir     { 0 }   /* sal_Char     buffer[1];  */
51cdf0e10cSrcweir };
52cdf0e10cSrcweir 
53cdf0e10cSrcweir /* ======================================================================= */
54cdf0e10cSrcweir 
55cdf0e10cSrcweir #define IMPL_RTL_STRCODE            sal_Char
56cdf0e10cSrcweir #define IMPL_RTL_USTRCODE( c )      ((unsigned char)c)
57cdf0e10cSrcweir #define IMPL_RTL_STRNAME( n )       rtl_str_ ## n
58cdf0e10cSrcweir 
59cdf0e10cSrcweir #define IMPL_RTL_STRINGNAME( n )    rtl_string_ ## n
60cdf0e10cSrcweir #define IMPL_RTL_STRINGDATA         rtl_String
61cdf0e10cSrcweir #define IMPL_RTL_EMPTYSTRING        aImplEmpty_rtl_String
62cdf0e10cSrcweir 
63cdf0e10cSrcweir /* ======================================================================= */
64cdf0e10cSrcweir 
65cdf0e10cSrcweir /* Include String/UString template code */
66cdf0e10cSrcweir 
67cdf0e10cSrcweir #include "strtmpl.c"
68cdf0e10cSrcweir 
rtl_str_valueOfFloat(sal_Char * pStr,float f)69cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfFloat(sal_Char * pStr, float f)
70cdf0e10cSrcweir {
71cdf0e10cSrcweir     rtl_String * pResult = NULL;
72cdf0e10cSrcweir     sal_Int32 nLen;
73cdf0e10cSrcweir     rtl_math_doubleToString(
74cdf0e10cSrcweir         &pResult, 0, 0, f, rtl_math_StringFormat_G,
75cdf0e10cSrcweir         RTL_STR_MAX_VALUEOFFLOAT - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0, 0,
76cdf0e10cSrcweir         sal_True);
77cdf0e10cSrcweir     nLen = pResult->length;
78cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFFLOAT);
79cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
80cdf0e10cSrcweir     rtl_string_release(pResult);
81cdf0e10cSrcweir     return nLen;
82cdf0e10cSrcweir }
83cdf0e10cSrcweir 
rtl_str_valueOfDouble(sal_Char * pStr,double d)84cdf0e10cSrcweir sal_Int32 SAL_CALL rtl_str_valueOfDouble(sal_Char * pStr, double d)
85cdf0e10cSrcweir {
86cdf0e10cSrcweir     rtl_String * pResult = NULL;
87cdf0e10cSrcweir     sal_Int32 nLen;
88cdf0e10cSrcweir     rtl_math_doubleToString(
89cdf0e10cSrcweir         &pResult, 0, 0, d, rtl_math_StringFormat_G,
90cdf0e10cSrcweir         RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
91cdf0e10cSrcweir         0, sal_True);
92cdf0e10cSrcweir     nLen = pResult->length;
93cdf0e10cSrcweir     OSL_ASSERT(nLen < RTL_STR_MAX_VALUEOFDOUBLE);
94cdf0e10cSrcweir     rtl_copyMemory(pStr, pResult->buffer, (nLen + 1) * sizeof(sal_Char));
95cdf0e10cSrcweir     rtl_string_release(pResult);
96cdf0e10cSrcweir     return nLen;
97cdf0e10cSrcweir }
98cdf0e10cSrcweir 
rtl_str_toFloat(sal_Char const * pStr)99cdf0e10cSrcweir float SAL_CALL rtl_str_toFloat(sal_Char const * pStr)
100cdf0e10cSrcweir {
101cdf0e10cSrcweir     return (float) rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr),
102cdf0e10cSrcweir                                            '.', 0, 0, 0);
103cdf0e10cSrcweir }
104cdf0e10cSrcweir 
rtl_str_toDouble(sal_Char const * pStr)105cdf0e10cSrcweir double SAL_CALL rtl_str_toDouble(sal_Char const * pStr)
106cdf0e10cSrcweir {
107cdf0e10cSrcweir     return rtl_math_stringToDouble(pStr, pStr + rtl_str_getLength(pStr), '.', 0,
108cdf0e10cSrcweir                                    0, 0);
109cdf0e10cSrcweir }
110cdf0e10cSrcweir 
111cdf0e10cSrcweir /* ======================================================================= */
112cdf0e10cSrcweir 
rtl_ImplGetFastUTF8ByteLen(const sal_Unicode * pStr,sal_Int32 nLen)113cdf0e10cSrcweir static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
114cdf0e10cSrcweir {
115cdf0e10cSrcweir     int                 n;
116cdf0e10cSrcweir     sal_Unicode         c;
117cdf0e10cSrcweir     sal_uInt32          nUCS4Char;
118cdf0e10cSrcweir     const sal_Unicode*  pEndStr;
119cdf0e10cSrcweir 
120cdf0e10cSrcweir     n = 0;
121cdf0e10cSrcweir     pEndStr  = pStr+nLen;
122cdf0e10cSrcweir     while ( pStr < pEndStr )
123cdf0e10cSrcweir     {
124cdf0e10cSrcweir         c = *pStr;
125cdf0e10cSrcweir 
126cdf0e10cSrcweir         if ( c < 0x80 )
127cdf0e10cSrcweir             n++;
128cdf0e10cSrcweir         else if ( c < 0x800 )
129cdf0e10cSrcweir             n += 2;
130cdf0e10cSrcweir         else
131cdf0e10cSrcweir         {
132cdf0e10cSrcweir             if ( !SAL_RTL_IS_HIGH_SURROGATE(c) )
133cdf0e10cSrcweir                 n += 3;
134cdf0e10cSrcweir             else
135cdf0e10cSrcweir             {
136cdf0e10cSrcweir                 nUCS4Char = c;
137cdf0e10cSrcweir 
138cdf0e10cSrcweir                 if ( pStr+1 < pEndStr )
139cdf0e10cSrcweir                 {
140cdf0e10cSrcweir                     c = *(pStr+1);
141cdf0e10cSrcweir                     if ( SAL_RTL_IS_LOW_SURROGATE(c) )
142cdf0e10cSrcweir                     {
143cdf0e10cSrcweir                         nUCS4Char = SAL_RTL_COMBINE_SURROGATES(nUCS4Char, c);
144cdf0e10cSrcweir                         pStr++;
145cdf0e10cSrcweir                     }
146cdf0e10cSrcweir                 }
147cdf0e10cSrcweir 
148cdf0e10cSrcweir                 if ( nUCS4Char < 0x10000 )
149cdf0e10cSrcweir                     n += 3;
150cdf0e10cSrcweir                 else if ( nUCS4Char < 0x200000 )
151cdf0e10cSrcweir                     n += 4;
152cdf0e10cSrcweir                 else if ( nUCS4Char < 0x4000000 )
153cdf0e10cSrcweir                     n += 5;
154cdf0e10cSrcweir                 else
155cdf0e10cSrcweir                     n += 6;
156cdf0e10cSrcweir             }
157cdf0e10cSrcweir         }
158cdf0e10cSrcweir 
159cdf0e10cSrcweir         pStr++;
160cdf0e10cSrcweir     }
161cdf0e10cSrcweir 
162cdf0e10cSrcweir     return n;
163cdf0e10cSrcweir }
164cdf0e10cSrcweir 
165cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
166cdf0e10cSrcweir 
rtl_impl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags,sal_Bool bCheckErrors)167cdf0e10cSrcweir sal_Bool SAL_CALL rtl_impl_convertUStringToString(rtl_String ** pTarget,
168cdf0e10cSrcweir                                                   sal_Unicode const * pSource,
169cdf0e10cSrcweir                                                   sal_Int32 nLength,
170cdf0e10cSrcweir                                                   rtl_TextEncoding nEncoding,
171cdf0e10cSrcweir                                                   sal_uInt32 nFlags,
172cdf0e10cSrcweir                                                   sal_Bool bCheckErrors)
173cdf0e10cSrcweir {
174cdf0e10cSrcweir     OSL_ASSERT(pTarget != NULL
175cdf0e10cSrcweir                && (pSource != NULL || nLength == 0)
176cdf0e10cSrcweir                && nLength >= 0
177cdf0e10cSrcweir                && rtl_isOctetTextEncoding(nEncoding));
178cdf0e10cSrcweir 
179cdf0e10cSrcweir     if ( !nLength )
180cdf0e10cSrcweir         rtl_string_new( pTarget );
181cdf0e10cSrcweir     else
182cdf0e10cSrcweir     {
183cdf0e10cSrcweir         rtl_String*                 pTemp;
184cdf0e10cSrcweir         rtl_UnicodeToTextConverter  hConverter;
185cdf0e10cSrcweir         sal_uInt32                  nInfo;
186cdf0e10cSrcweir         sal_Size                    nSrcChars;
187cdf0e10cSrcweir         sal_Size                    nDestBytes;
188cdf0e10cSrcweir         sal_Size                    nNewLen;
189cdf0e10cSrcweir         sal_Size                    nNotConvertedChars;
190cdf0e10cSrcweir         sal_Size                    nMaxCharLen;
191cdf0e10cSrcweir 
192cdf0e10cSrcweir         /* Optimization for UTF-8 - we try to calculate the exact length */
193cdf0e10cSrcweir         /* For all other encoding we try an good estimation */
194cdf0e10cSrcweir         if ( nEncoding == RTL_TEXTENCODING_UTF8 )
195cdf0e10cSrcweir         {
196cdf0e10cSrcweir             nNewLen = rtl_ImplGetFastUTF8ByteLen( pSource, nLength );
197cdf0e10cSrcweir             /* Includes the string only ASCII, then we could copy
198cdf0e10cSrcweir                the buffer faster */
199cdf0e10cSrcweir             if ( nNewLen == (sal_Size)nLength )
200cdf0e10cSrcweir             {
201cdf0e10cSrcweir                 IMPL_RTL_STRCODE* pBuffer;
202cdf0e10cSrcweir                 if ( *pTarget )
203cdf0e10cSrcweir                     IMPL_RTL_STRINGNAME( release )( *pTarget );
204cdf0e10cSrcweir                 *pTarget = IMPL_RTL_STRINGNAME( ImplAlloc )( nLength );
205cdf0e10cSrcweir                 OSL_ASSERT(*pTarget != NULL);
206cdf0e10cSrcweir                 pBuffer = (*pTarget)->buffer;
207cdf0e10cSrcweir                 do
208cdf0e10cSrcweir                 {
209cdf0e10cSrcweir                     /* Check ASCII range */
210cdf0e10cSrcweir                     OSL_ENSURE( *pSource <= 127,
211cdf0e10cSrcweir                                 "rtl_uString2String() - UTF8 test is encoding is wrong" );
212cdf0e10cSrcweir 
213cdf0e10cSrcweir                     *pBuffer = (IMPL_RTL_STRCODE)(unsigned char)*pSource;
214cdf0e10cSrcweir                     pBuffer++;
215cdf0e10cSrcweir                     pSource++;
216cdf0e10cSrcweir                     nLength--;
217cdf0e10cSrcweir                 }
218cdf0e10cSrcweir                 while ( nLength );
219cdf0e10cSrcweir                 return sal_True;
220cdf0e10cSrcweir             }
221cdf0e10cSrcweir 
222cdf0e10cSrcweir             nMaxCharLen = 4;
223cdf0e10cSrcweir         }
224cdf0e10cSrcweir         else
225cdf0e10cSrcweir         {
226cdf0e10cSrcweir             rtl_TextEncodingInfo aTextEncInfo;
227cdf0e10cSrcweir             aTextEncInfo.StructSize = sizeof( aTextEncInfo );
228cdf0e10cSrcweir             if ( !rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo ) )
229cdf0e10cSrcweir             {
230cdf0e10cSrcweir                 aTextEncInfo.AverageCharSize    = 1;
231cdf0e10cSrcweir                 aTextEncInfo.MaximumCharSize    = 8;
232cdf0e10cSrcweir             }
233cdf0e10cSrcweir 
234cdf0e10cSrcweir             nNewLen = nLength*aTextEncInfo.AverageCharSize;
235cdf0e10cSrcweir             nMaxCharLen = aTextEncInfo.MaximumCharSize;
236cdf0e10cSrcweir         }
237cdf0e10cSrcweir 
238cdf0e10cSrcweir         nFlags |= RTL_UNICODETOTEXT_FLAGS_FLUSH;
239cdf0e10cSrcweir         hConverter = rtl_createUnicodeToTextConverter( nEncoding );
240cdf0e10cSrcweir 
241cdf0e10cSrcweir         for (;;)
242cdf0e10cSrcweir         {
243cdf0e10cSrcweir             pTemp = IMPL_RTL_STRINGNAME( ImplAlloc )( nNewLen );
244cdf0e10cSrcweir             OSL_ASSERT(pTemp != NULL);
245cdf0e10cSrcweir             nDestBytes = rtl_convertUnicodeToText( hConverter, 0,
246cdf0e10cSrcweir                                                    pSource, nLength,
247cdf0e10cSrcweir                                                    pTemp->buffer, nNewLen,
248cdf0e10cSrcweir                                                    nFlags,
249cdf0e10cSrcweir                                                    &nInfo, &nSrcChars );
250cdf0e10cSrcweir             if (bCheckErrors && (nInfo & RTL_UNICODETOTEXT_INFO_ERROR) != 0)
251cdf0e10cSrcweir             {
252cdf0e10cSrcweir                 rtl_freeMemory(pTemp);
253cdf0e10cSrcweir                 rtl_destroyUnicodeToTextConverter(hConverter);
254cdf0e10cSrcweir                 return sal_False;
255cdf0e10cSrcweir             }
256cdf0e10cSrcweir 
257cdf0e10cSrcweir             if ((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0)
258cdf0e10cSrcweir                 break;
259cdf0e10cSrcweir 
260cdf0e10cSrcweir             /* Buffer not big enough, try again with enough space */
261cdf0e10cSrcweir             rtl_freeMemory( pTemp );
262cdf0e10cSrcweir 
263cdf0e10cSrcweir             /* Try with the max. count of characters with
264cdf0e10cSrcweir                additional overhead for replacing functionality */
265cdf0e10cSrcweir             nNotConvertedChars = nLength-nSrcChars;
266cdf0e10cSrcweir             nNewLen = nDestBytes+(nNotConvertedChars*nMaxCharLen)+nNotConvertedChars+4;
267cdf0e10cSrcweir         }
268cdf0e10cSrcweir 
269cdf0e10cSrcweir         /* Set the buffer to the correct size or is there to
270cdf0e10cSrcweir            much overhead, reallocate to the correct size */
271cdf0e10cSrcweir         if ( nNewLen > nDestBytes+8 )
272cdf0e10cSrcweir         {
273cdf0e10cSrcweir             rtl_String* pTemp2 = IMPL_RTL_STRINGNAME( ImplAlloc )( nDestBytes );
274cdf0e10cSrcweir             OSL_ASSERT(pTemp2 != NULL);
275cdf0e10cSrcweir             rtl_str_ImplCopy( pTemp2->buffer, pTemp->buffer, nDestBytes );
276cdf0e10cSrcweir             rtl_freeMemory( pTemp );
277cdf0e10cSrcweir             pTemp = pTemp2;
278cdf0e10cSrcweir         }
279cdf0e10cSrcweir         else
280cdf0e10cSrcweir         {
281cdf0e10cSrcweir             pTemp->length = nDestBytes;
282cdf0e10cSrcweir             pTemp->buffer[nDestBytes] = 0;
283cdf0e10cSrcweir         }
284cdf0e10cSrcweir 
285cdf0e10cSrcweir         rtl_destroyUnicodeToTextConverter( hConverter );
286cdf0e10cSrcweir         if ( *pTarget )
287cdf0e10cSrcweir             IMPL_RTL_STRINGNAME( release )( *pTarget );
288cdf0e10cSrcweir         *pTarget = pTemp;
289cdf0e10cSrcweir 
290cdf0e10cSrcweir         /* Results the conversion in an empty buffer -
291cdf0e10cSrcweir            create an empty string */
292cdf0e10cSrcweir         if ( pTemp && !nDestBytes )
293cdf0e10cSrcweir             rtl_string_new( pTarget );
294cdf0e10cSrcweir     }
295cdf0e10cSrcweir     return sal_True;
296cdf0e10cSrcweir }
297cdf0e10cSrcweir 
rtl_uString2String(rtl_String ** ppThis,const sal_Unicode * pUStr,sal_Int32 nULen,rtl_TextEncoding eTextEncoding,sal_uInt32 nCvtFlags)298cdf0e10cSrcweir void SAL_CALL rtl_uString2String( rtl_String** ppThis,
299cdf0e10cSrcweir                                   const sal_Unicode* pUStr,
300cdf0e10cSrcweir                                   sal_Int32 nULen,
301cdf0e10cSrcweir                                   rtl_TextEncoding eTextEncoding,
302cdf0e10cSrcweir                                   sal_uInt32 nCvtFlags )
303cdf0e10cSrcweir {
304cdf0e10cSrcweir     rtl_impl_convertUStringToString(ppThis, pUStr, nULen, eTextEncoding,
305cdf0e10cSrcweir                                     nCvtFlags, sal_False);
306cdf0e10cSrcweir }
307cdf0e10cSrcweir 
rtl_convertUStringToString(rtl_String ** pTarget,sal_Unicode const * pSource,sal_Int32 nLength,rtl_TextEncoding nEncoding,sal_uInt32 nFlags)308cdf0e10cSrcweir sal_Bool SAL_CALL rtl_convertUStringToString(rtl_String ** pTarget,
309cdf0e10cSrcweir                                              sal_Unicode const * pSource,
310cdf0e10cSrcweir                                              sal_Int32 nLength,
311cdf0e10cSrcweir                                              rtl_TextEncoding nEncoding,
312cdf0e10cSrcweir                                              sal_uInt32 nFlags)
313cdf0e10cSrcweir {
314cdf0e10cSrcweir     return rtl_impl_convertUStringToString(pTarget, pSource, nLength, nEncoding,
315cdf0e10cSrcweir                                            nFlags, sal_True);
316cdf0e10cSrcweir }
317