xref: /aoo41x/main/sal/rtl/source/uri.cxx (revision 87d2adbc)
1*87d2adbcSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*87d2adbcSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*87d2adbcSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*87d2adbcSAndrew Rist  * distributed with this work for additional information
6*87d2adbcSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*87d2adbcSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*87d2adbcSAndrew Rist  * "License"); you may not use this file except in compliance
9*87d2adbcSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*87d2adbcSAndrew Rist  *
11*87d2adbcSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*87d2adbcSAndrew Rist  *
13*87d2adbcSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*87d2adbcSAndrew Rist  * software distributed under the License is distributed on an
15*87d2adbcSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*87d2adbcSAndrew Rist  * KIND, either express or implied.  See the License for the
17*87d2adbcSAndrew Rist  * specific language governing permissions and limitations
18*87d2adbcSAndrew Rist  * under the License.
19*87d2adbcSAndrew Rist  *
20*87d2adbcSAndrew Rist  *************************************************************/
21*87d2adbcSAndrew Rist 
22*87d2adbcSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_sal.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include "rtl/uri.h"
28cdf0e10cSrcweir 
29cdf0e10cSrcweir #include "surrogates.h"
30cdf0e10cSrcweir 
31cdf0e10cSrcweir #include "osl/diagnose.h"
32cdf0e10cSrcweir #include "rtl/strbuf.hxx"
33cdf0e10cSrcweir #include "rtl/textenc.h"
34cdf0e10cSrcweir #include "rtl/textcvt.h"
35cdf0e10cSrcweir #include "rtl/uri.h"
36cdf0e10cSrcweir #include "rtl/ustrbuf.h"
37cdf0e10cSrcweir #include "rtl/ustrbuf.hxx"
38cdf0e10cSrcweir #include "rtl/ustring.h"
39cdf0e10cSrcweir #include "rtl/ustring.hxx"
40cdf0e10cSrcweir #include "sal/types.h"
41cdf0e10cSrcweir 
42cdf0e10cSrcweir #include <cstddef>
43cdf0e10cSrcweir 
44cdf0e10cSrcweir namespace {
45cdf0e10cSrcweir 
46cdf0e10cSrcweir std::size_t const nCharClassSize = 128;
47cdf0e10cSrcweir 
48cdf0e10cSrcweir sal_Unicode const cEscapePrefix = 0x25; // '%'
49cdf0e10cSrcweir 
isDigit(sal_uInt32 nUtf32)50cdf0e10cSrcweir inline bool isDigit(sal_uInt32 nUtf32)
51cdf0e10cSrcweir {
52cdf0e10cSrcweir     return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
53cdf0e10cSrcweir }
54cdf0e10cSrcweir 
isAlpha(sal_uInt32 nUtf32)55cdf0e10cSrcweir inline bool isAlpha(sal_uInt32 nUtf32)
56cdf0e10cSrcweir {
57cdf0e10cSrcweir     // 'A'--'Z', 'a'--'z'
58cdf0e10cSrcweir     return (
59cdf0e10cSrcweir             (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
60cdf0e10cSrcweir             (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
61cdf0e10cSrcweir            );
62cdf0e10cSrcweir }
63cdf0e10cSrcweir 
isHighSurrogate(sal_uInt32 nUtf16)64cdf0e10cSrcweir inline bool isHighSurrogate(sal_uInt32 nUtf16)
65cdf0e10cSrcweir {
66cdf0e10cSrcweir     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
67cdf0e10cSrcweir }
68cdf0e10cSrcweir 
isLowSurrogate(sal_uInt32 nUtf16)69cdf0e10cSrcweir inline bool isLowSurrogate(sal_uInt32 nUtf16)
70cdf0e10cSrcweir {
71cdf0e10cSrcweir     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
72cdf0e10cSrcweir }
73cdf0e10cSrcweir 
combineSurrogates(sal_uInt32 high,sal_uInt32 low)74cdf0e10cSrcweir inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
75cdf0e10cSrcweir {
76cdf0e10cSrcweir     return SAL_RTL_COMBINE_SURROGATES(high, low);
77cdf0e10cSrcweir }
78cdf0e10cSrcweir 
getHexWeight(sal_uInt32 nUtf32)79cdf0e10cSrcweir inline int getHexWeight(sal_uInt32 nUtf32)
80cdf0e10cSrcweir {
81cdf0e10cSrcweir     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
82cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x30) :
83cdf0e10cSrcweir            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
84cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x41 + 10) :
85cdf0e10cSrcweir            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
86cdf0e10cSrcweir                static_cast< int >(nUtf32 - 0x61 + 10) :
87cdf0e10cSrcweir                -1; // not a hex digit
88cdf0e10cSrcweir }
89cdf0e10cSrcweir 
isValid(sal_Bool const * pCharClass,sal_uInt32 nUtf32)90cdf0e10cSrcweir inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
91cdf0e10cSrcweir {
92cdf0e10cSrcweir     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
93cdf0e10cSrcweir }
94cdf0e10cSrcweir 
writeUnicode(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_Unicode cChar)95cdf0e10cSrcweir inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
96cdf0e10cSrcweir                          sal_Unicode cChar)
97cdf0e10cSrcweir {
98cdf0e10cSrcweir     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
99cdf0e10cSrcweir }
100cdf0e10cSrcweir 
101cdf0e10cSrcweir enum EscapeType
102cdf0e10cSrcweir {
103cdf0e10cSrcweir     EscapeNo,
104cdf0e10cSrcweir     EscapeChar,
105cdf0e10cSrcweir     EscapeOctet
106cdf0e10cSrcweir };
107cdf0e10cSrcweir 
108cdf0e10cSrcweir /* Read any of the following:
109cdf0e10cSrcweir 
110cdf0e10cSrcweir    - sequence of escape sequences representing character from eCharset,
111cdf0e10cSrcweir      translated to single UCS4 character; or
112cdf0e10cSrcweir 
113cdf0e10cSrcweir    - pair of UTF-16 surrogates, translated to single UCS4 character; or
114cdf0e10cSrcweir 
115cdf0e10cSrcweir    _ single UTF-16 character, extended to UCS4 character.
116cdf0e10cSrcweir  */
readUcs4(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,bool bEncoded,rtl_TextEncoding eCharset,EscapeType * pType)117cdf0e10cSrcweir sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
118cdf0e10cSrcweir                     bool bEncoded, rtl_TextEncoding eCharset,
119cdf0e10cSrcweir                     EscapeType * pType)
120cdf0e10cSrcweir {
121cdf0e10cSrcweir     sal_uInt32 nChar = *(*pBegin)++;
122cdf0e10cSrcweir     int nWeight1;
123cdf0e10cSrcweir     int nWeight2;
124cdf0e10cSrcweir     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
125cdf0e10cSrcweir         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
126cdf0e10cSrcweir         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
127cdf0e10cSrcweir     {
128cdf0e10cSrcweir         *pBegin += 2;
129cdf0e10cSrcweir         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
130cdf0e10cSrcweir         if (nChar <= 0x7F)
131cdf0e10cSrcweir             *pType = EscapeChar;
132cdf0e10cSrcweir         else if (eCharset == RTL_TEXTENCODING_UTF8)
133cdf0e10cSrcweir         {
134cdf0e10cSrcweir             if (nChar >= 0xC0 && nChar <= 0xF4)
135cdf0e10cSrcweir             {
136cdf0e10cSrcweir                 sal_uInt32 nEncoded;
137cdf0e10cSrcweir                 int nShift;
138cdf0e10cSrcweir                 sal_uInt32 nMin;
139cdf0e10cSrcweir                 if (nChar <= 0xDF)
140cdf0e10cSrcweir                 {
141cdf0e10cSrcweir                     nEncoded = (nChar & 0x1F) << 6;
142cdf0e10cSrcweir                     nShift = 0;
143cdf0e10cSrcweir                     nMin = 0x80;
144cdf0e10cSrcweir                 }
145cdf0e10cSrcweir                 else if (nChar <= 0xEF)
146cdf0e10cSrcweir                 {
147cdf0e10cSrcweir                     nEncoded = (nChar & 0x0F) << 12;
148cdf0e10cSrcweir                     nShift = 6;
149cdf0e10cSrcweir                     nMin = 0x800;
150cdf0e10cSrcweir                 }
151cdf0e10cSrcweir                 else
152cdf0e10cSrcweir                 {
153cdf0e10cSrcweir                     nEncoded = (nChar & 0x07) << 18;
154cdf0e10cSrcweir                     nShift = 12;
155cdf0e10cSrcweir                     nMin = 0x10000;
156cdf0e10cSrcweir                 }
157cdf0e10cSrcweir                 sal_Unicode const * p = *pBegin;
158cdf0e10cSrcweir                 bool bUTF8 = true;
159cdf0e10cSrcweir                 for (; nShift >= 0; nShift -= 6)
160cdf0e10cSrcweir                 {
161cdf0e10cSrcweir                     if (pEnd - p < 3 || p[0] != cEscapePrefix
162cdf0e10cSrcweir                         || (nWeight1 = getHexWeight(p[1])) < 8
163cdf0e10cSrcweir                         || nWeight1 > 11
164cdf0e10cSrcweir                         || (nWeight2 = getHexWeight(p[2])) < 0)
165cdf0e10cSrcweir                     {
166cdf0e10cSrcweir                         bUTF8 = sal_False;
167cdf0e10cSrcweir                         break;
168cdf0e10cSrcweir                     }
169cdf0e10cSrcweir                     p += 3;
170cdf0e10cSrcweir                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
171cdf0e10cSrcweir                 }
172cdf0e10cSrcweir                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
173cdf0e10cSrcweir                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
174cdf0e10cSrcweir                 {
175cdf0e10cSrcweir                     *pBegin = p;
176cdf0e10cSrcweir                     *pType = EscapeChar;
177cdf0e10cSrcweir                     return nEncoded;
178cdf0e10cSrcweir                 }
179cdf0e10cSrcweir             }
180cdf0e10cSrcweir             *pType = EscapeOctet;
181cdf0e10cSrcweir         }
182cdf0e10cSrcweir         else
183cdf0e10cSrcweir         {
184cdf0e10cSrcweir             rtl::OStringBuffer aBuf;
185cdf0e10cSrcweir             aBuf.append(static_cast< char >(nChar));
186cdf0e10cSrcweir             rtl_TextToUnicodeConverter aConverter
187cdf0e10cSrcweir                 = rtl_createTextToUnicodeConverter(eCharset);
188cdf0e10cSrcweir             sal_Unicode const * p = *pBegin;
189cdf0e10cSrcweir             for (;;)
190cdf0e10cSrcweir             {
191cdf0e10cSrcweir                 sal_Unicode aDst[2];
192cdf0e10cSrcweir                 sal_uInt32 nInfo;
193cdf0e10cSrcweir                 sal_Size nConverted;
194cdf0e10cSrcweir                 sal_Size nDstSize = rtl_convertTextToUnicode(
195cdf0e10cSrcweir                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
196cdf0e10cSrcweir                     sizeof aDst / sizeof aDst[0],
197cdf0e10cSrcweir                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
198cdf0e10cSrcweir                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
199cdf0e10cSrcweir                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
200cdf0e10cSrcweir                     &nInfo, &nConverted);
201cdf0e10cSrcweir                 if (nInfo == 0)
202cdf0e10cSrcweir                 {
203cdf0e10cSrcweir                     OSL_ASSERT(
204cdf0e10cSrcweir                         nConverted
205cdf0e10cSrcweir                         == sal::static_int_cast< sal_uInt32 >(
206cdf0e10cSrcweir                             aBuf.getLength()));
207cdf0e10cSrcweir                     rtl_destroyTextToUnicodeConverter(aConverter);
208cdf0e10cSrcweir                     *pBegin = p;
209cdf0e10cSrcweir                     *pType = EscapeChar;
210cdf0e10cSrcweir                     OSL_ASSERT(
211cdf0e10cSrcweir                         nDstSize == 1
212cdf0e10cSrcweir                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
213cdf0e10cSrcweir                             && isLowSurrogate(aDst[1])));
214cdf0e10cSrcweir                     return nDstSize == 1
215cdf0e10cSrcweir                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
216cdf0e10cSrcweir                 }
217cdf0e10cSrcweir                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
218cdf0e10cSrcweir                          && pEnd - p >= 3 && p[0] == cEscapePrefix
219cdf0e10cSrcweir                          && (nWeight1 = getHexWeight(p[1])) >= 0
220cdf0e10cSrcweir                          && (nWeight2 = getHexWeight(p[2])) >= 0)
221cdf0e10cSrcweir                 {
222cdf0e10cSrcweir                     p += 3;
223cdf0e10cSrcweir                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
224cdf0e10cSrcweir                 }
225cdf0e10cSrcweir                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
226cdf0e10cSrcweir                          && p != pEnd && *p <= 0x7F)
227cdf0e10cSrcweir                 {
228cdf0e10cSrcweir                     aBuf.append(static_cast< char >(*p++));
229cdf0e10cSrcweir                 }
230cdf0e10cSrcweir                 else
231cdf0e10cSrcweir                 {
232cdf0e10cSrcweir                     OSL_ASSERT(
233cdf0e10cSrcweir                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
234cdf0e10cSrcweir                         == 0);
235cdf0e10cSrcweir                     break;
236cdf0e10cSrcweir                 }
237cdf0e10cSrcweir             }
238cdf0e10cSrcweir             rtl_destroyTextToUnicodeConverter(aConverter);
239cdf0e10cSrcweir             *pType = EscapeOctet;
240cdf0e10cSrcweir         }
241cdf0e10cSrcweir         return nChar;
242cdf0e10cSrcweir     }
243cdf0e10cSrcweir     else
244cdf0e10cSrcweir     {
245cdf0e10cSrcweir         *pType = EscapeNo;
246cdf0e10cSrcweir         return isHighSurrogate(nChar) && *pBegin < pEnd
247cdf0e10cSrcweir                && isLowSurrogate(**pBegin) ?
248cdf0e10cSrcweir                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
249cdf0e10cSrcweir     }
250cdf0e10cSrcweir }
251cdf0e10cSrcweir 
writeUcs4(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32)252cdf0e10cSrcweir void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
253cdf0e10cSrcweir {
254cdf0e10cSrcweir     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
255cdf0e10cSrcweir     if (nUtf32 <= 0xFFFF) {
256cdf0e10cSrcweir         writeUnicode(
257cdf0e10cSrcweir             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
258cdf0e10cSrcweir     } else {
259cdf0e10cSrcweir         nUtf32 -= 0x10000;
260cdf0e10cSrcweir         writeUnicode(
261cdf0e10cSrcweir             pBuffer, pCapacity,
262cdf0e10cSrcweir             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
263cdf0e10cSrcweir         writeUnicode(
264cdf0e10cSrcweir             pBuffer, pCapacity,
265cdf0e10cSrcweir             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
266cdf0e10cSrcweir     }
267cdf0e10cSrcweir }
268cdf0e10cSrcweir 
writeEscapeOctet(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nOctet)269cdf0e10cSrcweir void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
270cdf0e10cSrcweir                       sal_uInt32 nOctet)
271cdf0e10cSrcweir {
272cdf0e10cSrcweir     OSL_ENSURE(nOctet <= 0xFF, "bad octet");
273cdf0e10cSrcweir 
274cdf0e10cSrcweir     static sal_Unicode const aHex[16]
275cdf0e10cSrcweir         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
276cdf0e10cSrcweir             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
277cdf0e10cSrcweir 
278cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
279cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
280cdf0e10cSrcweir     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
281cdf0e10cSrcweir }
282cdf0e10cSrcweir 
writeEscapeChar(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32,rtl_TextEncoding eCharset,bool bStrict)283cdf0e10cSrcweir bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
284cdf0e10cSrcweir                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
285cdf0e10cSrcweir {
286cdf0e10cSrcweir     OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
287cdf0e10cSrcweir     if (eCharset == RTL_TEXTENCODING_UTF8) {
288cdf0e10cSrcweir         if (nUtf32 < 0x80)
289cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
290cdf0e10cSrcweir         else if (nUtf32 < 0x800)
291cdf0e10cSrcweir         {
292cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
293cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
294cdf0e10cSrcweir         }
295cdf0e10cSrcweir         else if (nUtf32 < 0x10000)
296cdf0e10cSrcweir         {
297cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
298cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
299cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
300cdf0e10cSrcweir         }
301cdf0e10cSrcweir         else
302cdf0e10cSrcweir         {
303cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
304cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
305cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
306cdf0e10cSrcweir             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
307cdf0e10cSrcweir         }
308cdf0e10cSrcweir     } else {
309cdf0e10cSrcweir         rtl_UnicodeToTextConverter aConverter
310cdf0e10cSrcweir             = rtl_createUnicodeToTextConverter(eCharset);
311cdf0e10cSrcweir         sal_Unicode aSrc[2];
312cdf0e10cSrcweir         sal_Size nSrcSize;
313cdf0e10cSrcweir         if (nUtf32 <= 0xFFFF)
314cdf0e10cSrcweir         {
315cdf0e10cSrcweir             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
316cdf0e10cSrcweir             nSrcSize = 1;
317cdf0e10cSrcweir         }
318cdf0e10cSrcweir         else
319cdf0e10cSrcweir         {
320cdf0e10cSrcweir             aSrc[0] = static_cast< sal_Unicode >(
321cdf0e10cSrcweir                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
322cdf0e10cSrcweir             aSrc[1] = static_cast< sal_Unicode >(
323cdf0e10cSrcweir                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
324cdf0e10cSrcweir             nSrcSize = 2;
325cdf0e10cSrcweir         }
326cdf0e10cSrcweir         sal_Char aDst[32]; // FIXME  random value
327cdf0e10cSrcweir         sal_uInt32 nInfo;
328cdf0e10cSrcweir         sal_Size nConverted;
329cdf0e10cSrcweir         sal_Size nDstSize = rtl_convertUnicodeToText(
330cdf0e10cSrcweir             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
331cdf0e10cSrcweir             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
332cdf0e10cSrcweir             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
333cdf0e10cSrcweir             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
334cdf0e10cSrcweir             &nInfo, &nConverted);
335cdf0e10cSrcweir         OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
336cdf0e10cSrcweir         rtl_destroyUnicodeToTextConverter(aConverter);
337cdf0e10cSrcweir         if (nInfo == 0) {
338cdf0e10cSrcweir             OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
339cdf0e10cSrcweir             for (sal_Size i = 0; i < nDstSize; ++i)
340cdf0e10cSrcweir                 writeEscapeOctet(pBuffer, pCapacity,
341cdf0e10cSrcweir                                  static_cast< unsigned char >(aDst[i]));
342cdf0e10cSrcweir                     // FIXME  all octets are escaped, even if there is no need
343cdf0e10cSrcweir         } else {
344cdf0e10cSrcweir             if (bStrict) {
345cdf0e10cSrcweir                 return false;
346cdf0e10cSrcweir             } else {
347cdf0e10cSrcweir                 writeUcs4(pBuffer, pCapacity, nUtf32);
348cdf0e10cSrcweir             }
349cdf0e10cSrcweir         }
350cdf0e10cSrcweir     }
351cdf0e10cSrcweir     return true;
352cdf0e10cSrcweir }
353cdf0e10cSrcweir 
354cdf0e10cSrcweir struct Component
355cdf0e10cSrcweir {
356cdf0e10cSrcweir     sal_Unicode const * pBegin;
357cdf0e10cSrcweir     sal_Unicode const * pEnd;
358cdf0e10cSrcweir 
Component__anon4ac86d970111::Component359cdf0e10cSrcweir     inline Component(): pBegin(0) {}
360cdf0e10cSrcweir 
isPresent__anon4ac86d970111::Component361cdf0e10cSrcweir     inline bool isPresent() const { return pBegin != 0; }
362cdf0e10cSrcweir 
363cdf0e10cSrcweir     inline sal_Int32 getLength() const;
364cdf0e10cSrcweir };
365cdf0e10cSrcweir 
getLength() const366cdf0e10cSrcweir inline sal_Int32 Component::getLength() const
367cdf0e10cSrcweir {
368cdf0e10cSrcweir     OSL_ENSURE(isPresent(), "taking length of non-present component");
369cdf0e10cSrcweir     return static_cast< sal_Int32 >(pEnd - pBegin);
370cdf0e10cSrcweir }
371cdf0e10cSrcweir 
372cdf0e10cSrcweir struct Components
373cdf0e10cSrcweir {
374cdf0e10cSrcweir     Component aScheme;
375cdf0e10cSrcweir     Component aAuthority;
376cdf0e10cSrcweir     Component aPath;
377cdf0e10cSrcweir     Component aQuery;
378cdf0e10cSrcweir     Component aFragment;
379cdf0e10cSrcweir };
380cdf0e10cSrcweir 
parseUriRef(rtl_uString const * pUriRef,Components * pComponents)381cdf0e10cSrcweir void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
382cdf0e10cSrcweir {
383cdf0e10cSrcweir     // This algorithm is liberal and accepts various forms of illegal input.
384cdf0e10cSrcweir 
385cdf0e10cSrcweir     sal_Unicode const * pBegin = pUriRef->buffer;
386cdf0e10cSrcweir     sal_Unicode const * pEnd = pBegin + pUriRef->length;
387cdf0e10cSrcweir     sal_Unicode const * pPos = pBegin;
388cdf0e10cSrcweir 
389cdf0e10cSrcweir     if (pPos != pEnd && isAlpha(*pPos))
390cdf0e10cSrcweir         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
391cdf0e10cSrcweir             if (*p == ':')
392cdf0e10cSrcweir             {
393cdf0e10cSrcweir                 pComponents->aScheme.pBegin = pBegin;
394cdf0e10cSrcweir                 pComponents->aScheme.pEnd = ++p;
395cdf0e10cSrcweir                 pPos = p;
396cdf0e10cSrcweir                 break;
397cdf0e10cSrcweir             }
398cdf0e10cSrcweir             else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
399cdf0e10cSrcweir                      && *p != '.')
400cdf0e10cSrcweir                 break;
401cdf0e10cSrcweir 
402cdf0e10cSrcweir     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
403cdf0e10cSrcweir     {
404cdf0e10cSrcweir         pComponents->aAuthority.pBegin = pPos;
405cdf0e10cSrcweir         pPos += 2;
406cdf0e10cSrcweir         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
407cdf0e10cSrcweir             ++pPos;
408cdf0e10cSrcweir         pComponents->aAuthority.pEnd = pPos;
409cdf0e10cSrcweir     }
410cdf0e10cSrcweir 
411cdf0e10cSrcweir     pComponents->aPath.pBegin = pPos;
412cdf0e10cSrcweir     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
413cdf0e10cSrcweir         ++pPos;
414cdf0e10cSrcweir     pComponents->aPath.pEnd = pPos;
415cdf0e10cSrcweir 
416cdf0e10cSrcweir     if (pPos != pEnd && *pPos == '?')
417cdf0e10cSrcweir     {
418cdf0e10cSrcweir         pComponents->aQuery.pBegin = pPos++;
419cdf0e10cSrcweir         while (pPos != pEnd && * pPos != '#')
420cdf0e10cSrcweir             ++pPos;
421cdf0e10cSrcweir         pComponents->aQuery.pEnd = pPos;
422cdf0e10cSrcweir     }
423cdf0e10cSrcweir 
424cdf0e10cSrcweir     if (pPos != pEnd)
425cdf0e10cSrcweir     {
426cdf0e10cSrcweir         OSL_ASSERT(*pPos == '#');
427cdf0e10cSrcweir         pComponents->aFragment.pBegin = pPos;
428cdf0e10cSrcweir         pComponents->aFragment.pEnd = pEnd;
429cdf0e10cSrcweir     }
430cdf0e10cSrcweir }
431cdf0e10cSrcweir 
joinPaths(Component const & rBasePath,Component const & rRelPath)432cdf0e10cSrcweir rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
433cdf0e10cSrcweir {
434cdf0e10cSrcweir     OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
435cdf0e10cSrcweir     OSL_ASSERT(rRelPath.isPresent());
436cdf0e10cSrcweir 
437cdf0e10cSrcweir     // The invariant of aBuffer is that it always starts and ends with a slash
438cdf0e10cSrcweir     // (until probably right at the end of the algorithm, when the last segment
439cdf0e10cSrcweir     // of rRelPath is added, which does not necessarily end in a slash):
440cdf0e10cSrcweir     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
441cdf0e10cSrcweir         // XXX  numeric overflow
442cdf0e10cSrcweir 
443cdf0e10cSrcweir     // Segments "." and ".." within rBasePath are not conisdered special (but
444cdf0e10cSrcweir     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
445cdf0e10cSrcweir     // bit unclear about this point:
446cdf0e10cSrcweir     sal_Int32 nFixed = 1;
447cdf0e10cSrcweir     sal_Unicode const * p = rBasePath.pBegin + 1;
448cdf0e10cSrcweir     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
449cdf0e10cSrcweir         if (*q == '/')
450cdf0e10cSrcweir         {
451cdf0e10cSrcweir             if (
452cdf0e10cSrcweir                 (q - p == 1 && p[0] == '.') ||
453cdf0e10cSrcweir                 (q - p == 2 && p[0] == '.' && p[1] == '.')
454cdf0e10cSrcweir                )
455cdf0e10cSrcweir             {
456cdf0e10cSrcweir                 nFixed = q + 1 - rBasePath.pBegin;
457cdf0e10cSrcweir             }
458cdf0e10cSrcweir             p = q + 1;
459cdf0e10cSrcweir         }
460cdf0e10cSrcweir     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
461cdf0e10cSrcweir 
462cdf0e10cSrcweir     p = rRelPath.pBegin;
463cdf0e10cSrcweir     if (p != rRelPath.pEnd)
464cdf0e10cSrcweir         for (;;)
465cdf0e10cSrcweir         {
466cdf0e10cSrcweir             sal_Unicode const * q = p;
467cdf0e10cSrcweir             sal_Unicode const * r;
468cdf0e10cSrcweir             for (;;)
469cdf0e10cSrcweir             {
470cdf0e10cSrcweir                 if (q == rRelPath.pEnd)
471cdf0e10cSrcweir                 {
472cdf0e10cSrcweir                     r = q;
473cdf0e10cSrcweir                     break;
474cdf0e10cSrcweir                 }
475cdf0e10cSrcweir                 if (*q == '/')
476cdf0e10cSrcweir                 {
477cdf0e10cSrcweir                     r = q + 1;
478cdf0e10cSrcweir                     break;
479cdf0e10cSrcweir                 }
480cdf0e10cSrcweir                 ++q;
481cdf0e10cSrcweir             }
482cdf0e10cSrcweir             if (q - p == 2 && p[0] == '.' && p[1] == '.')
483cdf0e10cSrcweir             {
484cdf0e10cSrcweir                 // Erroneous excess segments ".." within rRelPath are left
485cdf0e10cSrcweir                 // intact, as the examples in RFC 2396, section C.2, suggest:
486cdf0e10cSrcweir                 sal_Int32 i = aBuffer.getLength() - 1;
487cdf0e10cSrcweir                 if (i < nFixed)
488cdf0e10cSrcweir                 {
489cdf0e10cSrcweir                     aBuffer.append(p, r - p);
490cdf0e10cSrcweir                     nFixed += 3;
491cdf0e10cSrcweir                 }
492cdf0e10cSrcweir                 else
493cdf0e10cSrcweir                 {
494cdf0e10cSrcweir                     while (aBuffer.charAt(i - 1) != '/')
495cdf0e10cSrcweir                         --i;
496cdf0e10cSrcweir                     aBuffer.setLength(i);
497cdf0e10cSrcweir                 }
498cdf0e10cSrcweir             }
499cdf0e10cSrcweir             else if (q - p != 1 || *p != '.')
500cdf0e10cSrcweir                 aBuffer.append(p, r - p);
501cdf0e10cSrcweir             if (q == rRelPath.pEnd)
502cdf0e10cSrcweir                 break;
503cdf0e10cSrcweir             p = q + 1;
504cdf0e10cSrcweir         }
505cdf0e10cSrcweir 
506cdf0e10cSrcweir     return aBuffer.makeStringAndClear();
507cdf0e10cSrcweir }
508cdf0e10cSrcweir 
509cdf0e10cSrcweir }
510cdf0e10cSrcweir 
rtl_getUriCharClass(rtl_UriCharClass eCharClass)511cdf0e10cSrcweir sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
512cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
513cdf0e10cSrcweir {
514cdf0e10cSrcweir     static sal_Bool const aCharClass[][nCharClassSize]
515cdf0e10cSrcweir     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
516cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
518cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
519cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
520cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
521cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
522cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
523cdf0e10cSrcweir        },
524cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
525cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
527cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
528cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
529cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
530cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
531cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
532cdf0e10cSrcweir        },
533cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
534cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
536cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
537cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
538cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
539cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
540cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
541cdf0e10cSrcweir        },
542cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
543cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
545cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
546cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
547cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
548cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
549cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
550cdf0e10cSrcweir        },
551cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
552cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
554cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
555cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
556cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
557cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
558cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
559cdf0e10cSrcweir        },
560cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
561cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
563cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
564cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
565cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
566cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
567cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
568cdf0e10cSrcweir        },
569cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
570cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
572cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
573cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
574cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
575cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
576cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
577cdf0e10cSrcweir        },
578cdf0e10cSrcweir        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
579cdf0e10cSrcweir          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580cdf0e10cSrcweir          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
581cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
582cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
583cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
584cdf0e10cSrcweir          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
585cdf0e10cSrcweir          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
586cdf0e10cSrcweir        }};
587cdf0e10cSrcweir     OSL_ENSURE(
588cdf0e10cSrcweir         (eCharClass >= 0
589cdf0e10cSrcweir          && (sal::static_int_cast< std::size_t >(eCharClass)
590cdf0e10cSrcweir              < sizeof aCharClass / sizeof aCharClass[0])),
591cdf0e10cSrcweir         "bad eCharClass");
592cdf0e10cSrcweir     return aCharClass[eCharClass];
593cdf0e10cSrcweir }
594cdf0e10cSrcweir 
rtl_uriEncode(rtl_uString * pText,sal_Bool const * pCharClass,rtl_UriEncodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)595cdf0e10cSrcweir void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
596cdf0e10cSrcweir                             rtl_UriEncodeMechanism eMechanism,
597cdf0e10cSrcweir                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
598cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
599cdf0e10cSrcweir {
600cdf0e10cSrcweir     OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
601cdf0e10cSrcweir         // make sure the percent sign is encoded...
602cdf0e10cSrcweir 
603cdf0e10cSrcweir     sal_Unicode const * p = pText->buffer;
604cdf0e10cSrcweir     sal_Unicode const * pEnd = p + pText->length;
605cdf0e10cSrcweir     sal_Int32 nCapacity = 0;
606cdf0e10cSrcweir     rtl_uString_new(pResult);
607cdf0e10cSrcweir     while (p < pEnd)
608cdf0e10cSrcweir     {
609cdf0e10cSrcweir         EscapeType eType;
610cdf0e10cSrcweir         sal_uInt32 nUtf32 = readUcs4(
611cdf0e10cSrcweir             &p, pEnd,
612cdf0e10cSrcweir             (eMechanism == rtl_UriEncodeKeepEscapes
613cdf0e10cSrcweir              || eMechanism == rtl_UriEncodeCheckEscapes
614cdf0e10cSrcweir              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
615cdf0e10cSrcweir             eCharset, &eType);
616cdf0e10cSrcweir         switch (eType)
617cdf0e10cSrcweir         {
618cdf0e10cSrcweir         case EscapeNo:
619cdf0e10cSrcweir             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
620cdf0e10cSrcweir                 writeUnicode(pResult, &nCapacity,
621cdf0e10cSrcweir                              static_cast< sal_Unicode >(nUtf32));
622cdf0e10cSrcweir             else if (!writeEscapeChar(
623cdf0e10cSrcweir                          pResult, &nCapacity, nUtf32, eCharset,
624cdf0e10cSrcweir                          (eMechanism == rtl_UriEncodeStrict
625cdf0e10cSrcweir                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
626cdf0e10cSrcweir             {
627cdf0e10cSrcweir                 rtl_uString_new(pResult);
628cdf0e10cSrcweir                 return;
629cdf0e10cSrcweir             }
630cdf0e10cSrcweir             break;
631cdf0e10cSrcweir 
632cdf0e10cSrcweir         case EscapeChar:
633cdf0e10cSrcweir             if (eMechanism == rtl_UriEncodeCheckEscapes
634cdf0e10cSrcweir                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
635cdf0e10cSrcweir                 writeUnicode(pResult, &nCapacity,
636cdf0e10cSrcweir                              static_cast< sal_Unicode >(nUtf32));
637cdf0e10cSrcweir             else if (!writeEscapeChar(
638cdf0e10cSrcweir                          pResult, &nCapacity, nUtf32, eCharset,
639cdf0e10cSrcweir                          (eMechanism == rtl_UriEncodeStrict
640cdf0e10cSrcweir                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
641cdf0e10cSrcweir             {
642cdf0e10cSrcweir                 rtl_uString_new(pResult);
643cdf0e10cSrcweir                 return;
644cdf0e10cSrcweir             }
645cdf0e10cSrcweir             break;
646cdf0e10cSrcweir 
647cdf0e10cSrcweir         case EscapeOctet:
648cdf0e10cSrcweir             writeEscapeOctet(pResult, &nCapacity, nUtf32);
649cdf0e10cSrcweir             break;
650cdf0e10cSrcweir         }
651cdf0e10cSrcweir     }
652cdf0e10cSrcweir }
653cdf0e10cSrcweir 
rtl_uriDecode(rtl_uString * pText,rtl_UriDecodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)654cdf0e10cSrcweir void SAL_CALL rtl_uriDecode(rtl_uString * pText,
655cdf0e10cSrcweir                             rtl_UriDecodeMechanism eMechanism,
656cdf0e10cSrcweir                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
657cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
658cdf0e10cSrcweir {
659cdf0e10cSrcweir     switch (eMechanism)
660cdf0e10cSrcweir     {
661cdf0e10cSrcweir     case rtl_UriDecodeNone:
662cdf0e10cSrcweir         rtl_uString_assign(pResult, pText);
663cdf0e10cSrcweir         break;
664cdf0e10cSrcweir 
665cdf0e10cSrcweir     case rtl_UriDecodeToIuri:
666cdf0e10cSrcweir         eCharset = RTL_TEXTENCODING_UTF8;
667cdf0e10cSrcweir     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
668cdf0e10cSrcweir         {
669cdf0e10cSrcweir             sal_Unicode const * p = pText->buffer;
670cdf0e10cSrcweir             sal_Unicode const * pEnd = p + pText->length;
671cdf0e10cSrcweir             sal_Int32 nCapacity = 0;
672cdf0e10cSrcweir             rtl_uString_new(pResult);
673cdf0e10cSrcweir             while (p < pEnd)
674cdf0e10cSrcweir             {
675cdf0e10cSrcweir                 EscapeType eType;
676cdf0e10cSrcweir                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
677cdf0e10cSrcweir                 switch (eType)
678cdf0e10cSrcweir                 {
679cdf0e10cSrcweir                 case EscapeChar:
680cdf0e10cSrcweir                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
681cdf0e10cSrcweir                     {
682cdf0e10cSrcweir                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
683cdf0e10cSrcweir                         break;
684cdf0e10cSrcweir                     }
685cdf0e10cSrcweir                 case EscapeNo:
686cdf0e10cSrcweir                     writeUcs4(pResult, &nCapacity, nUtf32);
687cdf0e10cSrcweir                     break;
688cdf0e10cSrcweir 
689cdf0e10cSrcweir                 case EscapeOctet:
690cdf0e10cSrcweir                     if (eMechanism == rtl_UriDecodeStrict) {
691cdf0e10cSrcweir                         rtl_uString_new(pResult);
692cdf0e10cSrcweir                         return;
693cdf0e10cSrcweir                     }
694cdf0e10cSrcweir                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
695cdf0e10cSrcweir                     break;
696cdf0e10cSrcweir                 }
697cdf0e10cSrcweir             }
698cdf0e10cSrcweir         }
699cdf0e10cSrcweir         break;
700cdf0e10cSrcweir     }
701cdf0e10cSrcweir }
702cdf0e10cSrcweir 
rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,rtl_uString * pRelUriRef,rtl_uString ** pResult,rtl_uString ** pException)703cdf0e10cSrcweir sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
704cdf0e10cSrcweir                                          rtl_uString * pRelUriRef,
705cdf0e10cSrcweir                                          rtl_uString ** pResult,
706cdf0e10cSrcweir                                          rtl_uString ** pException)
707cdf0e10cSrcweir     SAL_THROW_EXTERN_C()
708cdf0e10cSrcweir {
709cdf0e10cSrcweir     // If pRelUriRef starts with a scheme component it is an absolute URI
710cdf0e10cSrcweir     // reference, and we are done (i.e., this algorithm does not support
711cdf0e10cSrcweir     // backwards-compatible relative URIs starting with a scheme component, see
712cdf0e10cSrcweir     // RFC 2396, section 5.2, step 3):
713cdf0e10cSrcweir     Components aRelComponents;
714cdf0e10cSrcweir     parseUriRef(pRelUriRef, &aRelComponents);
715cdf0e10cSrcweir     if (aRelComponents.aScheme.isPresent())
716cdf0e10cSrcweir     {
717cdf0e10cSrcweir         rtl_uString_assign(pResult, pRelUriRef);
718cdf0e10cSrcweir         return true;
719cdf0e10cSrcweir     }
720cdf0e10cSrcweir 
721cdf0e10cSrcweir     // Parse pBaseUriRef; if the scheme component is not present or not valid,
722cdf0e10cSrcweir     // or the path component is not empty and starts with anything but a slash,
723cdf0e10cSrcweir     // an exception is raised:
724cdf0e10cSrcweir     Components aBaseComponents;
725cdf0e10cSrcweir     parseUriRef(pBaseUriRef, &aBaseComponents);
726cdf0e10cSrcweir     if (!aBaseComponents.aScheme.isPresent())
727cdf0e10cSrcweir     {
728cdf0e10cSrcweir         rtl::OUString aMessage(pBaseUriRef);
729cdf0e10cSrcweir         aMessage += rtl::OUString(
730cdf0e10cSrcweir                         RTL_CONSTASCII_USTRINGPARAM(
731cdf0e10cSrcweir                             " does not start with a scheme component"));
732cdf0e10cSrcweir         rtl_uString_assign(pException,
733cdf0e10cSrcweir                            const_cast< rtl::OUString & >(aMessage).pData);
734cdf0e10cSrcweir         return false;
735cdf0e10cSrcweir     }
736cdf0e10cSrcweir     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
737cdf0e10cSrcweir         && *aBaseComponents.aPath.pBegin != '/')
738cdf0e10cSrcweir     {
739cdf0e10cSrcweir         rtl::OUString aMessage(pBaseUriRef);
740cdf0e10cSrcweir         aMessage += rtl::OUString(
741cdf0e10cSrcweir                         RTL_CONSTASCII_USTRINGPARAM(
742cdf0e10cSrcweir                             "path component does not start with slash"));
743cdf0e10cSrcweir         rtl_uString_assign(pException, aMessage.pData);
744cdf0e10cSrcweir         return false;
745cdf0e10cSrcweir     }
746cdf0e10cSrcweir 
747cdf0e10cSrcweir     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
748cdf0e10cSrcweir     // into an absolute one (if the relative URI is a reference to the "current
749cdf0e10cSrcweir     // document," the "current document" is here taken to be the base URI):
750cdf0e10cSrcweir     rtl::OUStringBuffer aBuffer;
751cdf0e10cSrcweir     aBuffer.append(aBaseComponents.aScheme.pBegin,
752cdf0e10cSrcweir                    aBaseComponents.aScheme.getLength());
753cdf0e10cSrcweir     if (aRelComponents.aAuthority.isPresent())
754cdf0e10cSrcweir     {
755cdf0e10cSrcweir         aBuffer.append(aRelComponents.aAuthority.pBegin,
756cdf0e10cSrcweir                        aRelComponents.aAuthority.getLength());
757cdf0e10cSrcweir         aBuffer.append(aRelComponents.aPath.pBegin,
758cdf0e10cSrcweir                        aRelComponents.aPath.getLength());
759cdf0e10cSrcweir         if (aRelComponents.aQuery.isPresent())
760cdf0e10cSrcweir             aBuffer.append(aRelComponents.aQuery.pBegin,
761cdf0e10cSrcweir                            aRelComponents.aQuery.getLength());
762cdf0e10cSrcweir     }
763cdf0e10cSrcweir     else
764cdf0e10cSrcweir     {
765cdf0e10cSrcweir         if (aBaseComponents.aAuthority.isPresent())
766cdf0e10cSrcweir             aBuffer.append(aBaseComponents.aAuthority.pBegin,
767cdf0e10cSrcweir                            aBaseComponents.aAuthority.getLength());
768cdf0e10cSrcweir         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
769cdf0e10cSrcweir             && !aRelComponents.aQuery.isPresent())
770cdf0e10cSrcweir         {
771cdf0e10cSrcweir             aBuffer.append(aBaseComponents.aPath.pBegin,
772cdf0e10cSrcweir                            aBaseComponents.aPath.getLength());
773cdf0e10cSrcweir             if (aBaseComponents.aQuery.isPresent())
774cdf0e10cSrcweir                 aBuffer.append(aBaseComponents.aQuery.pBegin,
775cdf0e10cSrcweir                                aBaseComponents.aQuery.getLength());
776cdf0e10cSrcweir         }
777cdf0e10cSrcweir         else
778cdf0e10cSrcweir         {
779cdf0e10cSrcweir             if (*aRelComponents.aPath.pBegin == '/')
780cdf0e10cSrcweir                 aBuffer.append(aRelComponents.aPath.pBegin,
781cdf0e10cSrcweir                                aRelComponents.aPath.getLength());
782cdf0e10cSrcweir             else
783cdf0e10cSrcweir                 aBuffer.append(joinPaths(aBaseComponents.aPath,
784cdf0e10cSrcweir                                          aRelComponents.aPath));
785cdf0e10cSrcweir             if (aRelComponents.aQuery.isPresent())
786cdf0e10cSrcweir                 aBuffer.append(aRelComponents.aQuery.pBegin,
787cdf0e10cSrcweir                                aRelComponents.aQuery.getLength());
788cdf0e10cSrcweir         }
789cdf0e10cSrcweir     }
790cdf0e10cSrcweir     if (aRelComponents.aFragment.isPresent())
791cdf0e10cSrcweir         aBuffer.append(aRelComponents.aFragment.pBegin,
792cdf0e10cSrcweir                        aRelComponents.aFragment.getLength());
793cdf0e10cSrcweir     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
794cdf0e10cSrcweir     return true;
795cdf0e10cSrcweir }
796