1*87d2adbcSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*87d2adbcSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*87d2adbcSAndrew Rist * or more contributor license agreements. See the NOTICE file
5*87d2adbcSAndrew Rist * distributed with this work for additional information
6*87d2adbcSAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*87d2adbcSAndrew Rist * to you under the Apache License, Version 2.0 (the
8*87d2adbcSAndrew Rist * "License"); you may not use this file except in compliance
9*87d2adbcSAndrew Rist * with the License. You may obtain a copy of the License at
10*87d2adbcSAndrew Rist *
11*87d2adbcSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*87d2adbcSAndrew Rist *
13*87d2adbcSAndrew Rist * Unless required by applicable law or agreed to in writing,
14*87d2adbcSAndrew Rist * software distributed under the License is distributed on an
15*87d2adbcSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*87d2adbcSAndrew Rist * KIND, either express or implied. See the License for the
17*87d2adbcSAndrew Rist * specific language governing permissions and limitations
18*87d2adbcSAndrew Rist * under the License.
19*87d2adbcSAndrew Rist *
20*87d2adbcSAndrew Rist *************************************************************/
21*87d2adbcSAndrew Rist
22*87d2adbcSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_sal.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #include "rtl/uri.h"
28cdf0e10cSrcweir
29cdf0e10cSrcweir #include "surrogates.h"
30cdf0e10cSrcweir
31cdf0e10cSrcweir #include "osl/diagnose.h"
32cdf0e10cSrcweir #include "rtl/strbuf.hxx"
33cdf0e10cSrcweir #include "rtl/textenc.h"
34cdf0e10cSrcweir #include "rtl/textcvt.h"
35cdf0e10cSrcweir #include "rtl/uri.h"
36cdf0e10cSrcweir #include "rtl/ustrbuf.h"
37cdf0e10cSrcweir #include "rtl/ustrbuf.hxx"
38cdf0e10cSrcweir #include "rtl/ustring.h"
39cdf0e10cSrcweir #include "rtl/ustring.hxx"
40cdf0e10cSrcweir #include "sal/types.h"
41cdf0e10cSrcweir
42cdf0e10cSrcweir #include <cstddef>
43cdf0e10cSrcweir
44cdf0e10cSrcweir namespace {
45cdf0e10cSrcweir
46cdf0e10cSrcweir std::size_t const nCharClassSize = 128;
47cdf0e10cSrcweir
48cdf0e10cSrcweir sal_Unicode const cEscapePrefix = 0x25; // '%'
49cdf0e10cSrcweir
isDigit(sal_uInt32 nUtf32)50cdf0e10cSrcweir inline bool isDigit(sal_uInt32 nUtf32)
51cdf0e10cSrcweir {
52cdf0e10cSrcweir return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
53cdf0e10cSrcweir }
54cdf0e10cSrcweir
isAlpha(sal_uInt32 nUtf32)55cdf0e10cSrcweir inline bool isAlpha(sal_uInt32 nUtf32)
56cdf0e10cSrcweir {
57cdf0e10cSrcweir // 'A'--'Z', 'a'--'z'
58cdf0e10cSrcweir return (
59cdf0e10cSrcweir (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
60cdf0e10cSrcweir (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
61cdf0e10cSrcweir );
62cdf0e10cSrcweir }
63cdf0e10cSrcweir
isHighSurrogate(sal_uInt32 nUtf16)64cdf0e10cSrcweir inline bool isHighSurrogate(sal_uInt32 nUtf16)
65cdf0e10cSrcweir {
66cdf0e10cSrcweir return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
67cdf0e10cSrcweir }
68cdf0e10cSrcweir
isLowSurrogate(sal_uInt32 nUtf16)69cdf0e10cSrcweir inline bool isLowSurrogate(sal_uInt32 nUtf16)
70cdf0e10cSrcweir {
71cdf0e10cSrcweir return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
72cdf0e10cSrcweir }
73cdf0e10cSrcweir
combineSurrogates(sal_uInt32 high,sal_uInt32 low)74cdf0e10cSrcweir inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
75cdf0e10cSrcweir {
76cdf0e10cSrcweir return SAL_RTL_COMBINE_SURROGATES(high, low);
77cdf0e10cSrcweir }
78cdf0e10cSrcweir
getHexWeight(sal_uInt32 nUtf32)79cdf0e10cSrcweir inline int getHexWeight(sal_uInt32 nUtf32)
80cdf0e10cSrcweir {
81cdf0e10cSrcweir return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
82cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x30) :
83cdf0e10cSrcweir nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
84cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x41 + 10) :
85cdf0e10cSrcweir nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
86cdf0e10cSrcweir static_cast< int >(nUtf32 - 0x61 + 10) :
87cdf0e10cSrcweir -1; // not a hex digit
88cdf0e10cSrcweir }
89cdf0e10cSrcweir
isValid(sal_Bool const * pCharClass,sal_uInt32 nUtf32)90cdf0e10cSrcweir inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
91cdf0e10cSrcweir {
92cdf0e10cSrcweir return nUtf32 < nCharClassSize && pCharClass[nUtf32];
93cdf0e10cSrcweir }
94cdf0e10cSrcweir
writeUnicode(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_Unicode cChar)95cdf0e10cSrcweir inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
96cdf0e10cSrcweir sal_Unicode cChar)
97cdf0e10cSrcweir {
98cdf0e10cSrcweir rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
99cdf0e10cSrcweir }
100cdf0e10cSrcweir
101cdf0e10cSrcweir enum EscapeType
102cdf0e10cSrcweir {
103cdf0e10cSrcweir EscapeNo,
104cdf0e10cSrcweir EscapeChar,
105cdf0e10cSrcweir EscapeOctet
106cdf0e10cSrcweir };
107cdf0e10cSrcweir
108cdf0e10cSrcweir /* Read any of the following:
109cdf0e10cSrcweir
110cdf0e10cSrcweir - sequence of escape sequences representing character from eCharset,
111cdf0e10cSrcweir translated to single UCS4 character; or
112cdf0e10cSrcweir
113cdf0e10cSrcweir - pair of UTF-16 surrogates, translated to single UCS4 character; or
114cdf0e10cSrcweir
115cdf0e10cSrcweir _ single UTF-16 character, extended to UCS4 character.
116cdf0e10cSrcweir */
readUcs4(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,bool bEncoded,rtl_TextEncoding eCharset,EscapeType * pType)117cdf0e10cSrcweir sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
118cdf0e10cSrcweir bool bEncoded, rtl_TextEncoding eCharset,
119cdf0e10cSrcweir EscapeType * pType)
120cdf0e10cSrcweir {
121cdf0e10cSrcweir sal_uInt32 nChar = *(*pBegin)++;
122cdf0e10cSrcweir int nWeight1;
123cdf0e10cSrcweir int nWeight2;
124cdf0e10cSrcweir if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
125cdf0e10cSrcweir && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
126cdf0e10cSrcweir && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
127cdf0e10cSrcweir {
128cdf0e10cSrcweir *pBegin += 2;
129cdf0e10cSrcweir nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
130cdf0e10cSrcweir if (nChar <= 0x7F)
131cdf0e10cSrcweir *pType = EscapeChar;
132cdf0e10cSrcweir else if (eCharset == RTL_TEXTENCODING_UTF8)
133cdf0e10cSrcweir {
134cdf0e10cSrcweir if (nChar >= 0xC0 && nChar <= 0xF4)
135cdf0e10cSrcweir {
136cdf0e10cSrcweir sal_uInt32 nEncoded;
137cdf0e10cSrcweir int nShift;
138cdf0e10cSrcweir sal_uInt32 nMin;
139cdf0e10cSrcweir if (nChar <= 0xDF)
140cdf0e10cSrcweir {
141cdf0e10cSrcweir nEncoded = (nChar & 0x1F) << 6;
142cdf0e10cSrcweir nShift = 0;
143cdf0e10cSrcweir nMin = 0x80;
144cdf0e10cSrcweir }
145cdf0e10cSrcweir else if (nChar <= 0xEF)
146cdf0e10cSrcweir {
147cdf0e10cSrcweir nEncoded = (nChar & 0x0F) << 12;
148cdf0e10cSrcweir nShift = 6;
149cdf0e10cSrcweir nMin = 0x800;
150cdf0e10cSrcweir }
151cdf0e10cSrcweir else
152cdf0e10cSrcweir {
153cdf0e10cSrcweir nEncoded = (nChar & 0x07) << 18;
154cdf0e10cSrcweir nShift = 12;
155cdf0e10cSrcweir nMin = 0x10000;
156cdf0e10cSrcweir }
157cdf0e10cSrcweir sal_Unicode const * p = *pBegin;
158cdf0e10cSrcweir bool bUTF8 = true;
159cdf0e10cSrcweir for (; nShift >= 0; nShift -= 6)
160cdf0e10cSrcweir {
161cdf0e10cSrcweir if (pEnd - p < 3 || p[0] != cEscapePrefix
162cdf0e10cSrcweir || (nWeight1 = getHexWeight(p[1])) < 8
163cdf0e10cSrcweir || nWeight1 > 11
164cdf0e10cSrcweir || (nWeight2 = getHexWeight(p[2])) < 0)
165cdf0e10cSrcweir {
166cdf0e10cSrcweir bUTF8 = sal_False;
167cdf0e10cSrcweir break;
168cdf0e10cSrcweir }
169cdf0e10cSrcweir p += 3;
170cdf0e10cSrcweir nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
171cdf0e10cSrcweir }
172cdf0e10cSrcweir if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
173cdf0e10cSrcweir && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
174cdf0e10cSrcweir {
175cdf0e10cSrcweir *pBegin = p;
176cdf0e10cSrcweir *pType = EscapeChar;
177cdf0e10cSrcweir return nEncoded;
178cdf0e10cSrcweir }
179cdf0e10cSrcweir }
180cdf0e10cSrcweir *pType = EscapeOctet;
181cdf0e10cSrcweir }
182cdf0e10cSrcweir else
183cdf0e10cSrcweir {
184cdf0e10cSrcweir rtl::OStringBuffer aBuf;
185cdf0e10cSrcweir aBuf.append(static_cast< char >(nChar));
186cdf0e10cSrcweir rtl_TextToUnicodeConverter aConverter
187cdf0e10cSrcweir = rtl_createTextToUnicodeConverter(eCharset);
188cdf0e10cSrcweir sal_Unicode const * p = *pBegin;
189cdf0e10cSrcweir for (;;)
190cdf0e10cSrcweir {
191cdf0e10cSrcweir sal_Unicode aDst[2];
192cdf0e10cSrcweir sal_uInt32 nInfo;
193cdf0e10cSrcweir sal_Size nConverted;
194cdf0e10cSrcweir sal_Size nDstSize = rtl_convertTextToUnicode(
195cdf0e10cSrcweir aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
196cdf0e10cSrcweir sizeof aDst / sizeof aDst[0],
197cdf0e10cSrcweir (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
198cdf0e10cSrcweir | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
199cdf0e10cSrcweir | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
200cdf0e10cSrcweir &nInfo, &nConverted);
201cdf0e10cSrcweir if (nInfo == 0)
202cdf0e10cSrcweir {
203cdf0e10cSrcweir OSL_ASSERT(
204cdf0e10cSrcweir nConverted
205cdf0e10cSrcweir == sal::static_int_cast< sal_uInt32 >(
206cdf0e10cSrcweir aBuf.getLength()));
207cdf0e10cSrcweir rtl_destroyTextToUnicodeConverter(aConverter);
208cdf0e10cSrcweir *pBegin = p;
209cdf0e10cSrcweir *pType = EscapeChar;
210cdf0e10cSrcweir OSL_ASSERT(
211cdf0e10cSrcweir nDstSize == 1
212cdf0e10cSrcweir || (nDstSize == 2 && isHighSurrogate(aDst[0])
213cdf0e10cSrcweir && isLowSurrogate(aDst[1])));
214cdf0e10cSrcweir return nDstSize == 1
215cdf0e10cSrcweir ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
216cdf0e10cSrcweir }
217cdf0e10cSrcweir else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
218cdf0e10cSrcweir && pEnd - p >= 3 && p[0] == cEscapePrefix
219cdf0e10cSrcweir && (nWeight1 = getHexWeight(p[1])) >= 0
220cdf0e10cSrcweir && (nWeight2 = getHexWeight(p[2])) >= 0)
221cdf0e10cSrcweir {
222cdf0e10cSrcweir p += 3;
223cdf0e10cSrcweir aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
224cdf0e10cSrcweir }
225cdf0e10cSrcweir else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
226cdf0e10cSrcweir && p != pEnd && *p <= 0x7F)
227cdf0e10cSrcweir {
228cdf0e10cSrcweir aBuf.append(static_cast< char >(*p++));
229cdf0e10cSrcweir }
230cdf0e10cSrcweir else
231cdf0e10cSrcweir {
232cdf0e10cSrcweir OSL_ASSERT(
233cdf0e10cSrcweir (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
234cdf0e10cSrcweir == 0);
235cdf0e10cSrcweir break;
236cdf0e10cSrcweir }
237cdf0e10cSrcweir }
238cdf0e10cSrcweir rtl_destroyTextToUnicodeConverter(aConverter);
239cdf0e10cSrcweir *pType = EscapeOctet;
240cdf0e10cSrcweir }
241cdf0e10cSrcweir return nChar;
242cdf0e10cSrcweir }
243cdf0e10cSrcweir else
244cdf0e10cSrcweir {
245cdf0e10cSrcweir *pType = EscapeNo;
246cdf0e10cSrcweir return isHighSurrogate(nChar) && *pBegin < pEnd
247cdf0e10cSrcweir && isLowSurrogate(**pBegin) ?
248cdf0e10cSrcweir combineSurrogates(nChar, *(*pBegin)++) : nChar;
249cdf0e10cSrcweir }
250cdf0e10cSrcweir }
251cdf0e10cSrcweir
writeUcs4(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32)252cdf0e10cSrcweir void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
253cdf0e10cSrcweir {
254cdf0e10cSrcweir OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
255cdf0e10cSrcweir if (nUtf32 <= 0xFFFF) {
256cdf0e10cSrcweir writeUnicode(
257cdf0e10cSrcweir pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
258cdf0e10cSrcweir } else {
259cdf0e10cSrcweir nUtf32 -= 0x10000;
260cdf0e10cSrcweir writeUnicode(
261cdf0e10cSrcweir pBuffer, pCapacity,
262cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
263cdf0e10cSrcweir writeUnicode(
264cdf0e10cSrcweir pBuffer, pCapacity,
265cdf0e10cSrcweir static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
266cdf0e10cSrcweir }
267cdf0e10cSrcweir }
268cdf0e10cSrcweir
writeEscapeOctet(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nOctet)269cdf0e10cSrcweir void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
270cdf0e10cSrcweir sal_uInt32 nOctet)
271cdf0e10cSrcweir {
272cdf0e10cSrcweir OSL_ENSURE(nOctet <= 0xFF, "bad octet");
273cdf0e10cSrcweir
274cdf0e10cSrcweir static sal_Unicode const aHex[16]
275cdf0e10cSrcweir = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
276cdf0e10cSrcweir 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
277cdf0e10cSrcweir
278cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, cEscapePrefix);
279cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
280cdf0e10cSrcweir writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
281cdf0e10cSrcweir }
282cdf0e10cSrcweir
writeEscapeChar(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32,rtl_TextEncoding eCharset,bool bStrict)283cdf0e10cSrcweir bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
284cdf0e10cSrcweir sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
285cdf0e10cSrcweir {
286cdf0e10cSrcweir OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
287cdf0e10cSrcweir if (eCharset == RTL_TEXTENCODING_UTF8) {
288cdf0e10cSrcweir if (nUtf32 < 0x80)
289cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32);
290cdf0e10cSrcweir else if (nUtf32 < 0x800)
291cdf0e10cSrcweir {
292cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
293cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
294cdf0e10cSrcweir }
295cdf0e10cSrcweir else if (nUtf32 < 0x10000)
296cdf0e10cSrcweir {
297cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
298cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
299cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
300cdf0e10cSrcweir }
301cdf0e10cSrcweir else
302cdf0e10cSrcweir {
303cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
304cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
305cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
306cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
307cdf0e10cSrcweir }
308cdf0e10cSrcweir } else {
309cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter
310cdf0e10cSrcweir = rtl_createUnicodeToTextConverter(eCharset);
311cdf0e10cSrcweir sal_Unicode aSrc[2];
312cdf0e10cSrcweir sal_Size nSrcSize;
313cdf0e10cSrcweir if (nUtf32 <= 0xFFFF)
314cdf0e10cSrcweir {
315cdf0e10cSrcweir aSrc[0] = static_cast< sal_Unicode >(nUtf32);
316cdf0e10cSrcweir nSrcSize = 1;
317cdf0e10cSrcweir }
318cdf0e10cSrcweir else
319cdf0e10cSrcweir {
320cdf0e10cSrcweir aSrc[0] = static_cast< sal_Unicode >(
321cdf0e10cSrcweir ((nUtf32 - 0x10000) >> 10) | 0xD800);
322cdf0e10cSrcweir aSrc[1] = static_cast< sal_Unicode >(
323cdf0e10cSrcweir ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
324cdf0e10cSrcweir nSrcSize = 2;
325cdf0e10cSrcweir }
326cdf0e10cSrcweir sal_Char aDst[32]; // FIXME random value
327cdf0e10cSrcweir sal_uInt32 nInfo;
328cdf0e10cSrcweir sal_Size nConverted;
329cdf0e10cSrcweir sal_Size nDstSize = rtl_convertUnicodeToText(
330cdf0e10cSrcweir aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
331cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
332cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
333cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_FLUSH,
334cdf0e10cSrcweir &nInfo, &nConverted);
335cdf0e10cSrcweir OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
336cdf0e10cSrcweir rtl_destroyUnicodeToTextConverter(aConverter);
337cdf0e10cSrcweir if (nInfo == 0) {
338cdf0e10cSrcweir OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
339cdf0e10cSrcweir for (sal_Size i = 0; i < nDstSize; ++i)
340cdf0e10cSrcweir writeEscapeOctet(pBuffer, pCapacity,
341cdf0e10cSrcweir static_cast< unsigned char >(aDst[i]));
342cdf0e10cSrcweir // FIXME all octets are escaped, even if there is no need
343cdf0e10cSrcweir } else {
344cdf0e10cSrcweir if (bStrict) {
345cdf0e10cSrcweir return false;
346cdf0e10cSrcweir } else {
347cdf0e10cSrcweir writeUcs4(pBuffer, pCapacity, nUtf32);
348cdf0e10cSrcweir }
349cdf0e10cSrcweir }
350cdf0e10cSrcweir }
351cdf0e10cSrcweir return true;
352cdf0e10cSrcweir }
353cdf0e10cSrcweir
354cdf0e10cSrcweir struct Component
355cdf0e10cSrcweir {
356cdf0e10cSrcweir sal_Unicode const * pBegin;
357cdf0e10cSrcweir sal_Unicode const * pEnd;
358cdf0e10cSrcweir
Component__anon4ac86d970111::Component359cdf0e10cSrcweir inline Component(): pBegin(0) {}
360cdf0e10cSrcweir
isPresent__anon4ac86d970111::Component361cdf0e10cSrcweir inline bool isPresent() const { return pBegin != 0; }
362cdf0e10cSrcweir
363cdf0e10cSrcweir inline sal_Int32 getLength() const;
364cdf0e10cSrcweir };
365cdf0e10cSrcweir
getLength() const366cdf0e10cSrcweir inline sal_Int32 Component::getLength() const
367cdf0e10cSrcweir {
368cdf0e10cSrcweir OSL_ENSURE(isPresent(), "taking length of non-present component");
369cdf0e10cSrcweir return static_cast< sal_Int32 >(pEnd - pBegin);
370cdf0e10cSrcweir }
371cdf0e10cSrcweir
372cdf0e10cSrcweir struct Components
373cdf0e10cSrcweir {
374cdf0e10cSrcweir Component aScheme;
375cdf0e10cSrcweir Component aAuthority;
376cdf0e10cSrcweir Component aPath;
377cdf0e10cSrcweir Component aQuery;
378cdf0e10cSrcweir Component aFragment;
379cdf0e10cSrcweir };
380cdf0e10cSrcweir
parseUriRef(rtl_uString const * pUriRef,Components * pComponents)381cdf0e10cSrcweir void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
382cdf0e10cSrcweir {
383cdf0e10cSrcweir // This algorithm is liberal and accepts various forms of illegal input.
384cdf0e10cSrcweir
385cdf0e10cSrcweir sal_Unicode const * pBegin = pUriRef->buffer;
386cdf0e10cSrcweir sal_Unicode const * pEnd = pBegin + pUriRef->length;
387cdf0e10cSrcweir sal_Unicode const * pPos = pBegin;
388cdf0e10cSrcweir
389cdf0e10cSrcweir if (pPos != pEnd && isAlpha(*pPos))
390cdf0e10cSrcweir for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
391cdf0e10cSrcweir if (*p == ':')
392cdf0e10cSrcweir {
393cdf0e10cSrcweir pComponents->aScheme.pBegin = pBegin;
394cdf0e10cSrcweir pComponents->aScheme.pEnd = ++p;
395cdf0e10cSrcweir pPos = p;
396cdf0e10cSrcweir break;
397cdf0e10cSrcweir }
398cdf0e10cSrcweir else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
399cdf0e10cSrcweir && *p != '.')
400cdf0e10cSrcweir break;
401cdf0e10cSrcweir
402cdf0e10cSrcweir if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
403cdf0e10cSrcweir {
404cdf0e10cSrcweir pComponents->aAuthority.pBegin = pPos;
405cdf0e10cSrcweir pPos += 2;
406cdf0e10cSrcweir while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
407cdf0e10cSrcweir ++pPos;
408cdf0e10cSrcweir pComponents->aAuthority.pEnd = pPos;
409cdf0e10cSrcweir }
410cdf0e10cSrcweir
411cdf0e10cSrcweir pComponents->aPath.pBegin = pPos;
412cdf0e10cSrcweir while (pPos != pEnd && *pPos != '?' && * pPos != '#')
413cdf0e10cSrcweir ++pPos;
414cdf0e10cSrcweir pComponents->aPath.pEnd = pPos;
415cdf0e10cSrcweir
416cdf0e10cSrcweir if (pPos != pEnd && *pPos == '?')
417cdf0e10cSrcweir {
418cdf0e10cSrcweir pComponents->aQuery.pBegin = pPos++;
419cdf0e10cSrcweir while (pPos != pEnd && * pPos != '#')
420cdf0e10cSrcweir ++pPos;
421cdf0e10cSrcweir pComponents->aQuery.pEnd = pPos;
422cdf0e10cSrcweir }
423cdf0e10cSrcweir
424cdf0e10cSrcweir if (pPos != pEnd)
425cdf0e10cSrcweir {
426cdf0e10cSrcweir OSL_ASSERT(*pPos == '#');
427cdf0e10cSrcweir pComponents->aFragment.pBegin = pPos;
428cdf0e10cSrcweir pComponents->aFragment.pEnd = pEnd;
429cdf0e10cSrcweir }
430cdf0e10cSrcweir }
431cdf0e10cSrcweir
joinPaths(Component const & rBasePath,Component const & rRelPath)432cdf0e10cSrcweir rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
433cdf0e10cSrcweir {
434cdf0e10cSrcweir OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
435cdf0e10cSrcweir OSL_ASSERT(rRelPath.isPresent());
436cdf0e10cSrcweir
437cdf0e10cSrcweir // The invariant of aBuffer is that it always starts and ends with a slash
438cdf0e10cSrcweir // (until probably right at the end of the algorithm, when the last segment
439cdf0e10cSrcweir // of rRelPath is added, which does not necessarily end in a slash):
440cdf0e10cSrcweir rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
441cdf0e10cSrcweir // XXX numeric overflow
442cdf0e10cSrcweir
443cdf0e10cSrcweir // Segments "." and ".." within rBasePath are not conisdered special (but
444cdf0e10cSrcweir // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
445cdf0e10cSrcweir // bit unclear about this point:
446cdf0e10cSrcweir sal_Int32 nFixed = 1;
447cdf0e10cSrcweir sal_Unicode const * p = rBasePath.pBegin + 1;
448cdf0e10cSrcweir for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
449cdf0e10cSrcweir if (*q == '/')
450cdf0e10cSrcweir {
451cdf0e10cSrcweir if (
452cdf0e10cSrcweir (q - p == 1 && p[0] == '.') ||
453cdf0e10cSrcweir (q - p == 2 && p[0] == '.' && p[1] == '.')
454cdf0e10cSrcweir )
455cdf0e10cSrcweir {
456cdf0e10cSrcweir nFixed = q + 1 - rBasePath.pBegin;
457cdf0e10cSrcweir }
458cdf0e10cSrcweir p = q + 1;
459cdf0e10cSrcweir }
460cdf0e10cSrcweir aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
461cdf0e10cSrcweir
462cdf0e10cSrcweir p = rRelPath.pBegin;
463cdf0e10cSrcweir if (p != rRelPath.pEnd)
464cdf0e10cSrcweir for (;;)
465cdf0e10cSrcweir {
466cdf0e10cSrcweir sal_Unicode const * q = p;
467cdf0e10cSrcweir sal_Unicode const * r;
468cdf0e10cSrcweir for (;;)
469cdf0e10cSrcweir {
470cdf0e10cSrcweir if (q == rRelPath.pEnd)
471cdf0e10cSrcweir {
472cdf0e10cSrcweir r = q;
473cdf0e10cSrcweir break;
474cdf0e10cSrcweir }
475cdf0e10cSrcweir if (*q == '/')
476cdf0e10cSrcweir {
477cdf0e10cSrcweir r = q + 1;
478cdf0e10cSrcweir break;
479cdf0e10cSrcweir }
480cdf0e10cSrcweir ++q;
481cdf0e10cSrcweir }
482cdf0e10cSrcweir if (q - p == 2 && p[0] == '.' && p[1] == '.')
483cdf0e10cSrcweir {
484cdf0e10cSrcweir // Erroneous excess segments ".." within rRelPath are left
485cdf0e10cSrcweir // intact, as the examples in RFC 2396, section C.2, suggest:
486cdf0e10cSrcweir sal_Int32 i = aBuffer.getLength() - 1;
487cdf0e10cSrcweir if (i < nFixed)
488cdf0e10cSrcweir {
489cdf0e10cSrcweir aBuffer.append(p, r - p);
490cdf0e10cSrcweir nFixed += 3;
491cdf0e10cSrcweir }
492cdf0e10cSrcweir else
493cdf0e10cSrcweir {
494cdf0e10cSrcweir while (aBuffer.charAt(i - 1) != '/')
495cdf0e10cSrcweir --i;
496cdf0e10cSrcweir aBuffer.setLength(i);
497cdf0e10cSrcweir }
498cdf0e10cSrcweir }
499cdf0e10cSrcweir else if (q - p != 1 || *p != '.')
500cdf0e10cSrcweir aBuffer.append(p, r - p);
501cdf0e10cSrcweir if (q == rRelPath.pEnd)
502cdf0e10cSrcweir break;
503cdf0e10cSrcweir p = q + 1;
504cdf0e10cSrcweir }
505cdf0e10cSrcweir
506cdf0e10cSrcweir return aBuffer.makeStringAndClear();
507cdf0e10cSrcweir }
508cdf0e10cSrcweir
509cdf0e10cSrcweir }
510cdf0e10cSrcweir
rtl_getUriCharClass(rtl_UriCharClass eCharClass)511cdf0e10cSrcweir sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
512cdf0e10cSrcweir SAL_THROW_EXTERN_C()
513cdf0e10cSrcweir {
514cdf0e10cSrcweir static sal_Bool const aCharClass[][nCharClassSize]
515cdf0e10cSrcweir = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
516cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
518cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
519cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
520cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
521cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
522cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
523cdf0e10cSrcweir },
524cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
525cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
527cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
528cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
529cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
530cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
531cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
532cdf0e10cSrcweir },
533cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
534cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
536cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
537cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
538cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
539cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
540cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
541cdf0e10cSrcweir },
542cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
543cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
545cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
546cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
547cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
548cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
549cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
550cdf0e10cSrcweir },
551cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
552cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
554cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
555cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
556cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
557cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
558cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
559cdf0e10cSrcweir },
560cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
561cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
563cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
564cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
565cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
566cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
567cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
568cdf0e10cSrcweir },
569cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
570cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
572cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
573cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
574cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
575cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
576cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
577cdf0e10cSrcweir },
578cdf0e10cSrcweir { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
579cdf0e10cSrcweir 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580cdf0e10cSrcweir 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
581cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
582cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
583cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
584cdf0e10cSrcweir 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
585cdf0e10cSrcweir 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
586cdf0e10cSrcweir }};
587cdf0e10cSrcweir OSL_ENSURE(
588cdf0e10cSrcweir (eCharClass >= 0
589cdf0e10cSrcweir && (sal::static_int_cast< std::size_t >(eCharClass)
590cdf0e10cSrcweir < sizeof aCharClass / sizeof aCharClass[0])),
591cdf0e10cSrcweir "bad eCharClass");
592cdf0e10cSrcweir return aCharClass[eCharClass];
593cdf0e10cSrcweir }
594cdf0e10cSrcweir
rtl_uriEncode(rtl_uString * pText,sal_Bool const * pCharClass,rtl_UriEncodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)595cdf0e10cSrcweir void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
596cdf0e10cSrcweir rtl_UriEncodeMechanism eMechanism,
597cdf0e10cSrcweir rtl_TextEncoding eCharset, rtl_uString ** pResult)
598cdf0e10cSrcweir SAL_THROW_EXTERN_C()
599cdf0e10cSrcweir {
600cdf0e10cSrcweir OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
601cdf0e10cSrcweir // make sure the percent sign is encoded...
602cdf0e10cSrcweir
603cdf0e10cSrcweir sal_Unicode const * p = pText->buffer;
604cdf0e10cSrcweir sal_Unicode const * pEnd = p + pText->length;
605cdf0e10cSrcweir sal_Int32 nCapacity = 0;
606cdf0e10cSrcweir rtl_uString_new(pResult);
607cdf0e10cSrcweir while (p < pEnd)
608cdf0e10cSrcweir {
609cdf0e10cSrcweir EscapeType eType;
610cdf0e10cSrcweir sal_uInt32 nUtf32 = readUcs4(
611cdf0e10cSrcweir &p, pEnd,
612cdf0e10cSrcweir (eMechanism == rtl_UriEncodeKeepEscapes
613cdf0e10cSrcweir || eMechanism == rtl_UriEncodeCheckEscapes
614cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes),
615cdf0e10cSrcweir eCharset, &eType);
616cdf0e10cSrcweir switch (eType)
617cdf0e10cSrcweir {
618cdf0e10cSrcweir case EscapeNo:
619cdf0e10cSrcweir if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
620cdf0e10cSrcweir writeUnicode(pResult, &nCapacity,
621cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32));
622cdf0e10cSrcweir else if (!writeEscapeChar(
623cdf0e10cSrcweir pResult, &nCapacity, nUtf32, eCharset,
624cdf0e10cSrcweir (eMechanism == rtl_UriEncodeStrict
625cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
626cdf0e10cSrcweir {
627cdf0e10cSrcweir rtl_uString_new(pResult);
628cdf0e10cSrcweir return;
629cdf0e10cSrcweir }
630cdf0e10cSrcweir break;
631cdf0e10cSrcweir
632cdf0e10cSrcweir case EscapeChar:
633cdf0e10cSrcweir if (eMechanism == rtl_UriEncodeCheckEscapes
634cdf0e10cSrcweir && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
635cdf0e10cSrcweir writeUnicode(pResult, &nCapacity,
636cdf0e10cSrcweir static_cast< sal_Unicode >(nUtf32));
637cdf0e10cSrcweir else if (!writeEscapeChar(
638cdf0e10cSrcweir pResult, &nCapacity, nUtf32, eCharset,
639cdf0e10cSrcweir (eMechanism == rtl_UriEncodeStrict
640cdf0e10cSrcweir || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
641cdf0e10cSrcweir {
642cdf0e10cSrcweir rtl_uString_new(pResult);
643cdf0e10cSrcweir return;
644cdf0e10cSrcweir }
645cdf0e10cSrcweir break;
646cdf0e10cSrcweir
647cdf0e10cSrcweir case EscapeOctet:
648cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32);
649cdf0e10cSrcweir break;
650cdf0e10cSrcweir }
651cdf0e10cSrcweir }
652cdf0e10cSrcweir }
653cdf0e10cSrcweir
rtl_uriDecode(rtl_uString * pText,rtl_UriDecodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)654cdf0e10cSrcweir void SAL_CALL rtl_uriDecode(rtl_uString * pText,
655cdf0e10cSrcweir rtl_UriDecodeMechanism eMechanism,
656cdf0e10cSrcweir rtl_TextEncoding eCharset, rtl_uString ** pResult)
657cdf0e10cSrcweir SAL_THROW_EXTERN_C()
658cdf0e10cSrcweir {
659cdf0e10cSrcweir switch (eMechanism)
660cdf0e10cSrcweir {
661cdf0e10cSrcweir case rtl_UriDecodeNone:
662cdf0e10cSrcweir rtl_uString_assign(pResult, pText);
663cdf0e10cSrcweir break;
664cdf0e10cSrcweir
665cdf0e10cSrcweir case rtl_UriDecodeToIuri:
666cdf0e10cSrcweir eCharset = RTL_TEXTENCODING_UTF8;
667cdf0e10cSrcweir default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
668cdf0e10cSrcweir {
669cdf0e10cSrcweir sal_Unicode const * p = pText->buffer;
670cdf0e10cSrcweir sal_Unicode const * pEnd = p + pText->length;
671cdf0e10cSrcweir sal_Int32 nCapacity = 0;
672cdf0e10cSrcweir rtl_uString_new(pResult);
673cdf0e10cSrcweir while (p < pEnd)
674cdf0e10cSrcweir {
675cdf0e10cSrcweir EscapeType eType;
676cdf0e10cSrcweir sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
677cdf0e10cSrcweir switch (eType)
678cdf0e10cSrcweir {
679cdf0e10cSrcweir case EscapeChar:
680cdf0e10cSrcweir if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
681cdf0e10cSrcweir {
682cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32);
683cdf0e10cSrcweir break;
684cdf0e10cSrcweir }
685cdf0e10cSrcweir case EscapeNo:
686cdf0e10cSrcweir writeUcs4(pResult, &nCapacity, nUtf32);
687cdf0e10cSrcweir break;
688cdf0e10cSrcweir
689cdf0e10cSrcweir case EscapeOctet:
690cdf0e10cSrcweir if (eMechanism == rtl_UriDecodeStrict) {
691cdf0e10cSrcweir rtl_uString_new(pResult);
692cdf0e10cSrcweir return;
693cdf0e10cSrcweir }
694cdf0e10cSrcweir writeEscapeOctet(pResult, &nCapacity, nUtf32);
695cdf0e10cSrcweir break;
696cdf0e10cSrcweir }
697cdf0e10cSrcweir }
698cdf0e10cSrcweir }
699cdf0e10cSrcweir break;
700cdf0e10cSrcweir }
701cdf0e10cSrcweir }
702cdf0e10cSrcweir
rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,rtl_uString * pRelUriRef,rtl_uString ** pResult,rtl_uString ** pException)703cdf0e10cSrcweir sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
704cdf0e10cSrcweir rtl_uString * pRelUriRef,
705cdf0e10cSrcweir rtl_uString ** pResult,
706cdf0e10cSrcweir rtl_uString ** pException)
707cdf0e10cSrcweir SAL_THROW_EXTERN_C()
708cdf0e10cSrcweir {
709cdf0e10cSrcweir // If pRelUriRef starts with a scheme component it is an absolute URI
710cdf0e10cSrcweir // reference, and we are done (i.e., this algorithm does not support
711cdf0e10cSrcweir // backwards-compatible relative URIs starting with a scheme component, see
712cdf0e10cSrcweir // RFC 2396, section 5.2, step 3):
713cdf0e10cSrcweir Components aRelComponents;
714cdf0e10cSrcweir parseUriRef(pRelUriRef, &aRelComponents);
715cdf0e10cSrcweir if (aRelComponents.aScheme.isPresent())
716cdf0e10cSrcweir {
717cdf0e10cSrcweir rtl_uString_assign(pResult, pRelUriRef);
718cdf0e10cSrcweir return true;
719cdf0e10cSrcweir }
720cdf0e10cSrcweir
721cdf0e10cSrcweir // Parse pBaseUriRef; if the scheme component is not present or not valid,
722cdf0e10cSrcweir // or the path component is not empty and starts with anything but a slash,
723cdf0e10cSrcweir // an exception is raised:
724cdf0e10cSrcweir Components aBaseComponents;
725cdf0e10cSrcweir parseUriRef(pBaseUriRef, &aBaseComponents);
726cdf0e10cSrcweir if (!aBaseComponents.aScheme.isPresent())
727cdf0e10cSrcweir {
728cdf0e10cSrcweir rtl::OUString aMessage(pBaseUriRef);
729cdf0e10cSrcweir aMessage += rtl::OUString(
730cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM(
731cdf0e10cSrcweir " does not start with a scheme component"));
732cdf0e10cSrcweir rtl_uString_assign(pException,
733cdf0e10cSrcweir const_cast< rtl::OUString & >(aMessage).pData);
734cdf0e10cSrcweir return false;
735cdf0e10cSrcweir }
736cdf0e10cSrcweir if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
737cdf0e10cSrcweir && *aBaseComponents.aPath.pBegin != '/')
738cdf0e10cSrcweir {
739cdf0e10cSrcweir rtl::OUString aMessage(pBaseUriRef);
740cdf0e10cSrcweir aMessage += rtl::OUString(
741cdf0e10cSrcweir RTL_CONSTASCII_USTRINGPARAM(
742cdf0e10cSrcweir "path component does not start with slash"));
743cdf0e10cSrcweir rtl_uString_assign(pException, aMessage.pData);
744cdf0e10cSrcweir return false;
745cdf0e10cSrcweir }
746cdf0e10cSrcweir
747cdf0e10cSrcweir // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
748cdf0e10cSrcweir // into an absolute one (if the relative URI is a reference to the "current
749cdf0e10cSrcweir // document," the "current document" is here taken to be the base URI):
750cdf0e10cSrcweir rtl::OUStringBuffer aBuffer;
751cdf0e10cSrcweir aBuffer.append(aBaseComponents.aScheme.pBegin,
752cdf0e10cSrcweir aBaseComponents.aScheme.getLength());
753cdf0e10cSrcweir if (aRelComponents.aAuthority.isPresent())
754cdf0e10cSrcweir {
755cdf0e10cSrcweir aBuffer.append(aRelComponents.aAuthority.pBegin,
756cdf0e10cSrcweir aRelComponents.aAuthority.getLength());
757cdf0e10cSrcweir aBuffer.append(aRelComponents.aPath.pBegin,
758cdf0e10cSrcweir aRelComponents.aPath.getLength());
759cdf0e10cSrcweir if (aRelComponents.aQuery.isPresent())
760cdf0e10cSrcweir aBuffer.append(aRelComponents.aQuery.pBegin,
761cdf0e10cSrcweir aRelComponents.aQuery.getLength());
762cdf0e10cSrcweir }
763cdf0e10cSrcweir else
764cdf0e10cSrcweir {
765cdf0e10cSrcweir if (aBaseComponents.aAuthority.isPresent())
766cdf0e10cSrcweir aBuffer.append(aBaseComponents.aAuthority.pBegin,
767cdf0e10cSrcweir aBaseComponents.aAuthority.getLength());
768cdf0e10cSrcweir if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
769cdf0e10cSrcweir && !aRelComponents.aQuery.isPresent())
770cdf0e10cSrcweir {
771cdf0e10cSrcweir aBuffer.append(aBaseComponents.aPath.pBegin,
772cdf0e10cSrcweir aBaseComponents.aPath.getLength());
773cdf0e10cSrcweir if (aBaseComponents.aQuery.isPresent())
774cdf0e10cSrcweir aBuffer.append(aBaseComponents.aQuery.pBegin,
775cdf0e10cSrcweir aBaseComponents.aQuery.getLength());
776cdf0e10cSrcweir }
777cdf0e10cSrcweir else
778cdf0e10cSrcweir {
779cdf0e10cSrcweir if (*aRelComponents.aPath.pBegin == '/')
780cdf0e10cSrcweir aBuffer.append(aRelComponents.aPath.pBegin,
781cdf0e10cSrcweir aRelComponents.aPath.getLength());
782cdf0e10cSrcweir else
783cdf0e10cSrcweir aBuffer.append(joinPaths(aBaseComponents.aPath,
784cdf0e10cSrcweir aRelComponents.aPath));
785cdf0e10cSrcweir if (aRelComponents.aQuery.isPresent())
786cdf0e10cSrcweir aBuffer.append(aRelComponents.aQuery.pBegin,
787cdf0e10cSrcweir aRelComponents.aQuery.getLength());
788cdf0e10cSrcweir }
789cdf0e10cSrcweir }
790cdf0e10cSrcweir if (aRelComponents.aFragment.isPresent())
791cdf0e10cSrcweir aBuffer.append(aRelComponents.aFragment.pBegin,
792cdf0e10cSrcweir aRelComponents.aFragment.getLength());
793cdf0e10cSrcweir rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
794cdf0e10cSrcweir return true;
795cdf0e10cSrcweir }
796