1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sal.hxx"
26
27 #include "rtl/uri.h"
28
29 #include "surrogates.h"
30
31 #include "osl/diagnose.h"
32 #include "rtl/strbuf.hxx"
33 #include "rtl/textenc.h"
34 #include "rtl/textcvt.h"
35 #include "rtl/uri.h"
36 #include "rtl/ustrbuf.h"
37 #include "rtl/ustrbuf.hxx"
38 #include "rtl/ustring.h"
39 #include "rtl/ustring.hxx"
40 #include "sal/types.h"
41
42 #include <cstddef>
43
44 namespace {
45
46 std::size_t const nCharClassSize = 128;
47
48 sal_Unicode const cEscapePrefix = 0x25; // '%'
49
isDigit(sal_uInt32 nUtf32)50 inline bool isDigit(sal_uInt32 nUtf32)
51 {
52 return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
53 }
54
isAlpha(sal_uInt32 nUtf32)55 inline bool isAlpha(sal_uInt32 nUtf32)
56 {
57 // 'A'--'Z', 'a'--'z'
58 return (
59 (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
60 (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
61 );
62 }
63
isHighSurrogate(sal_uInt32 nUtf16)64 inline bool isHighSurrogate(sal_uInt32 nUtf16)
65 {
66 return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
67 }
68
isLowSurrogate(sal_uInt32 nUtf16)69 inline bool isLowSurrogate(sal_uInt32 nUtf16)
70 {
71 return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
72 }
73
combineSurrogates(sal_uInt32 high,sal_uInt32 low)74 inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
75 {
76 return SAL_RTL_COMBINE_SURROGATES(high, low);
77 }
78
getHexWeight(sal_uInt32 nUtf32)79 inline int getHexWeight(sal_uInt32 nUtf32)
80 {
81 return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
82 static_cast< int >(nUtf32 - 0x30) :
83 nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
84 static_cast< int >(nUtf32 - 0x41 + 10) :
85 nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
86 static_cast< int >(nUtf32 - 0x61 + 10) :
87 -1; // not a hex digit
88 }
89
isValid(sal_Bool const * pCharClass,sal_uInt32 nUtf32)90 inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
91 {
92 return nUtf32 < nCharClassSize && pCharClass[nUtf32];
93 }
94
writeUnicode(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_Unicode cChar)95 inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
96 sal_Unicode cChar)
97 {
98 rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
99 }
100
101 enum EscapeType
102 {
103 EscapeNo,
104 EscapeChar,
105 EscapeOctet
106 };
107
108 /* Read any of the following:
109
110 - sequence of escape sequences representing character from eCharset,
111 translated to single UCS4 character; or
112
113 - pair of UTF-16 surrogates, translated to single UCS4 character; or
114
115 _ single UTF-16 character, extended to UCS4 character.
116 */
readUcs4(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,bool bEncoded,rtl_TextEncoding eCharset,EscapeType * pType)117 sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
118 bool bEncoded, rtl_TextEncoding eCharset,
119 EscapeType * pType)
120 {
121 sal_uInt32 nChar = *(*pBegin)++;
122 int nWeight1;
123 int nWeight2;
124 if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
125 && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
126 && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
127 {
128 *pBegin += 2;
129 nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
130 if (nChar <= 0x7F)
131 *pType = EscapeChar;
132 else if (eCharset == RTL_TEXTENCODING_UTF8)
133 {
134 if (nChar >= 0xC0 && nChar <= 0xF4)
135 {
136 sal_uInt32 nEncoded;
137 int nShift;
138 sal_uInt32 nMin;
139 if (nChar <= 0xDF)
140 {
141 nEncoded = (nChar & 0x1F) << 6;
142 nShift = 0;
143 nMin = 0x80;
144 }
145 else if (nChar <= 0xEF)
146 {
147 nEncoded = (nChar & 0x0F) << 12;
148 nShift = 6;
149 nMin = 0x800;
150 }
151 else
152 {
153 nEncoded = (nChar & 0x07) << 18;
154 nShift = 12;
155 nMin = 0x10000;
156 }
157 sal_Unicode const * p = *pBegin;
158 bool bUTF8 = true;
159 for (; nShift >= 0; nShift -= 6)
160 {
161 if (pEnd - p < 3 || p[0] != cEscapePrefix
162 || (nWeight1 = getHexWeight(p[1])) < 8
163 || nWeight1 > 11
164 || (nWeight2 = getHexWeight(p[2])) < 0)
165 {
166 bUTF8 = sal_False;
167 break;
168 }
169 p += 3;
170 nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
171 }
172 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
173 && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
174 {
175 *pBegin = p;
176 *pType = EscapeChar;
177 return nEncoded;
178 }
179 }
180 *pType = EscapeOctet;
181 }
182 else
183 {
184 rtl::OStringBuffer aBuf;
185 aBuf.append(static_cast< char >(nChar));
186 rtl_TextToUnicodeConverter aConverter
187 = rtl_createTextToUnicodeConverter(eCharset);
188 sal_Unicode const * p = *pBegin;
189 for (;;)
190 {
191 sal_Unicode aDst[2];
192 sal_uInt32 nInfo;
193 sal_Size nConverted;
194 sal_Size nDstSize = rtl_convertTextToUnicode(
195 aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
196 sizeof aDst / sizeof aDst[0],
197 (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
198 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
199 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
200 &nInfo, &nConverted);
201 if (nInfo == 0)
202 {
203 OSL_ASSERT(
204 nConverted
205 == sal::static_int_cast< sal_uInt32 >(
206 aBuf.getLength()));
207 rtl_destroyTextToUnicodeConverter(aConverter);
208 *pBegin = p;
209 *pType = EscapeChar;
210 OSL_ASSERT(
211 nDstSize == 1
212 || (nDstSize == 2 && isHighSurrogate(aDst[0])
213 && isLowSurrogate(aDst[1])));
214 return nDstSize == 1
215 ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
216 }
217 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
218 && pEnd - p >= 3 && p[0] == cEscapePrefix
219 && (nWeight1 = getHexWeight(p[1])) >= 0
220 && (nWeight2 = getHexWeight(p[2])) >= 0)
221 {
222 p += 3;
223 aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
224 }
225 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
226 && p != pEnd && *p <= 0x7F)
227 {
228 aBuf.append(static_cast< char >(*p++));
229 }
230 else
231 {
232 OSL_ASSERT(
233 (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
234 == 0);
235 break;
236 }
237 }
238 rtl_destroyTextToUnicodeConverter(aConverter);
239 *pType = EscapeOctet;
240 }
241 return nChar;
242 }
243 else
244 {
245 *pType = EscapeNo;
246 return isHighSurrogate(nChar) && *pBegin < pEnd
247 && isLowSurrogate(**pBegin) ?
248 combineSurrogates(nChar, *(*pBegin)++) : nChar;
249 }
250 }
251
writeUcs4(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32)252 void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
253 {
254 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
255 if (nUtf32 <= 0xFFFF) {
256 writeUnicode(
257 pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
258 } else {
259 nUtf32 -= 0x10000;
260 writeUnicode(
261 pBuffer, pCapacity,
262 static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
263 writeUnicode(
264 pBuffer, pCapacity,
265 static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
266 }
267 }
268
writeEscapeOctet(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nOctet)269 void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
270 sal_uInt32 nOctet)
271 {
272 OSL_ENSURE(nOctet <= 0xFF, "bad octet");
273
274 static sal_Unicode const aHex[16]
275 = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
276 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
277
278 writeUnicode(pBuffer, pCapacity, cEscapePrefix);
279 writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
280 writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
281 }
282
writeEscapeChar(rtl_uString ** pBuffer,sal_Int32 * pCapacity,sal_uInt32 nUtf32,rtl_TextEncoding eCharset,bool bStrict)283 bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
284 sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
285 {
286 OSL_ENSURE(nUtf32 <= 0x10FFFF, "bad UTF-32 char");
287 if (eCharset == RTL_TEXTENCODING_UTF8) {
288 if (nUtf32 < 0x80)
289 writeEscapeOctet(pBuffer, pCapacity, nUtf32);
290 else if (nUtf32 < 0x800)
291 {
292 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
293 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
294 }
295 else if (nUtf32 < 0x10000)
296 {
297 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
298 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
299 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
300 }
301 else
302 {
303 writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
304 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
305 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
306 writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
307 }
308 } else {
309 rtl_UnicodeToTextConverter aConverter
310 = rtl_createUnicodeToTextConverter(eCharset);
311 sal_Unicode aSrc[2];
312 sal_Size nSrcSize;
313 if (nUtf32 <= 0xFFFF)
314 {
315 aSrc[0] = static_cast< sal_Unicode >(nUtf32);
316 nSrcSize = 1;
317 }
318 else
319 {
320 aSrc[0] = static_cast< sal_Unicode >(
321 ((nUtf32 - 0x10000) >> 10) | 0xD800);
322 aSrc[1] = static_cast< sal_Unicode >(
323 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
324 nSrcSize = 2;
325 }
326 sal_Char aDst[32]; // FIXME random value
327 sal_uInt32 nInfo;
328 sal_Size nConverted;
329 sal_Size nDstSize = rtl_convertUnicodeToText(
330 aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
331 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
332 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
333 | RTL_UNICODETOTEXT_FLAGS_FLUSH,
334 &nInfo, &nConverted);
335 OSL_ASSERT((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
336 rtl_destroyUnicodeToTextConverter(aConverter);
337 if (nInfo == 0) {
338 OSL_ENSURE(nConverted == nSrcSize, "bad rtl_convertUnicodeToText");
339 for (sal_Size i = 0; i < nDstSize; ++i)
340 writeEscapeOctet(pBuffer, pCapacity,
341 static_cast< unsigned char >(aDst[i]));
342 // FIXME all octets are escaped, even if there is no need
343 } else {
344 if (bStrict) {
345 return false;
346 } else {
347 writeUcs4(pBuffer, pCapacity, nUtf32);
348 }
349 }
350 }
351 return true;
352 }
353
354 struct Component
355 {
356 sal_Unicode const * pBegin;
357 sal_Unicode const * pEnd;
358
Component__anone461c9380111::Component359 inline Component(): pBegin(0) {}
360
isPresent__anone461c9380111::Component361 inline bool isPresent() const { return pBegin != 0; }
362
363 inline sal_Int32 getLength() const;
364 };
365
getLength() const366 inline sal_Int32 Component::getLength() const
367 {
368 OSL_ENSURE(isPresent(), "taking length of non-present component");
369 return static_cast< sal_Int32 >(pEnd - pBegin);
370 }
371
372 struct Components
373 {
374 Component aScheme;
375 Component aAuthority;
376 Component aPath;
377 Component aQuery;
378 Component aFragment;
379 };
380
parseUriRef(rtl_uString const * pUriRef,Components * pComponents)381 void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
382 {
383 // This algorithm is liberal and accepts various forms of illegal input.
384
385 sal_Unicode const * pBegin = pUriRef->buffer;
386 sal_Unicode const * pEnd = pBegin + pUriRef->length;
387 sal_Unicode const * pPos = pBegin;
388
389 if (pPos != pEnd && isAlpha(*pPos))
390 for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
391 if (*p == ':')
392 {
393 pComponents->aScheme.pBegin = pBegin;
394 pComponents->aScheme.pEnd = ++p;
395 pPos = p;
396 break;
397 }
398 else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
399 && *p != '.')
400 break;
401
402 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
403 {
404 pComponents->aAuthority.pBegin = pPos;
405 pPos += 2;
406 while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
407 ++pPos;
408 pComponents->aAuthority.pEnd = pPos;
409 }
410
411 pComponents->aPath.pBegin = pPos;
412 while (pPos != pEnd && *pPos != '?' && * pPos != '#')
413 ++pPos;
414 pComponents->aPath.pEnd = pPos;
415
416 if (pPos != pEnd && *pPos == '?')
417 {
418 pComponents->aQuery.pBegin = pPos++;
419 while (pPos != pEnd && * pPos != '#')
420 ++pPos;
421 pComponents->aQuery.pEnd = pPos;
422 }
423
424 if (pPos != pEnd)
425 {
426 OSL_ASSERT(*pPos == '#');
427 pComponents->aFragment.pBegin = pPos;
428 pComponents->aFragment.pEnd = pEnd;
429 }
430 }
431
joinPaths(Component const & rBasePath,Component const & rRelPath)432 rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
433 {
434 OSL_ASSERT(rBasePath.isPresent() && *rBasePath.pBegin == '/');
435 OSL_ASSERT(rRelPath.isPresent());
436
437 // The invariant of aBuffer is that it always starts and ends with a slash
438 // (until probably right at the end of the algorithm, when the last segment
439 // of rRelPath is added, which does not necessarily end in a slash):
440 rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
441 // XXX numeric overflow
442
443 // Segments "." and ".." within rBasePath are not conisdered special (but
444 // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
445 // bit unclear about this point:
446 sal_Int32 nFixed = 1;
447 sal_Unicode const * p = rBasePath.pBegin + 1;
448 for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
449 if (*q == '/')
450 {
451 if (
452 (q - p == 1 && p[0] == '.') ||
453 (q - p == 2 && p[0] == '.' && p[1] == '.')
454 )
455 {
456 nFixed = q + 1 - rBasePath.pBegin;
457 }
458 p = q + 1;
459 }
460 aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
461
462 p = rRelPath.pBegin;
463 if (p != rRelPath.pEnd)
464 for (;;)
465 {
466 sal_Unicode const * q = p;
467 sal_Unicode const * r;
468 for (;;)
469 {
470 if (q == rRelPath.pEnd)
471 {
472 r = q;
473 break;
474 }
475 if (*q == '/')
476 {
477 r = q + 1;
478 break;
479 }
480 ++q;
481 }
482 if (q - p == 2 && p[0] == '.' && p[1] == '.')
483 {
484 // Erroneous excess segments ".." within rRelPath are left
485 // intact, as the examples in RFC 2396, section C.2, suggest:
486 sal_Int32 i = aBuffer.getLength() - 1;
487 if (i < nFixed)
488 {
489 aBuffer.append(p, r - p);
490 nFixed += 3;
491 }
492 else
493 {
494 while (aBuffer.charAt(i - 1) != '/')
495 --i;
496 aBuffer.setLength(i);
497 }
498 }
499 else if (q - p != 1 || *p != '.')
500 aBuffer.append(p, r - p);
501 if (q == rRelPath.pEnd)
502 break;
503 p = q + 1;
504 }
505
506 return aBuffer.makeStringAndClear();
507 }
508
509 }
510
rtl_getUriCharClass(rtl_UriCharClass eCharClass)511 sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
512 SAL_THROW_EXTERN_C()
513 {
514 static sal_Bool const aCharClass[][nCharClassSize]
515 = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
519 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
520 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
521 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
522 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
523 },
524 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
530 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
532 },
533 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
534 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
535 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
539 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
541 },
542 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
543 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
544 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
548 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
550 },
551 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
553 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
554 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
555 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
557 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
559 },
560 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
564 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
566 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
568 },
569 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
572 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
573 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
574 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
575 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
576 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
577 },
578 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
581 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
582 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
583 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
584 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
585 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
586 }};
587 OSL_ENSURE(
588 (eCharClass >= 0
589 && (sal::static_int_cast< std::size_t >(eCharClass)
590 < sizeof aCharClass / sizeof aCharClass[0])),
591 "bad eCharClass");
592 return aCharClass[eCharClass];
593 }
594
rtl_uriEncode(rtl_uString * pText,sal_Bool const * pCharClass,rtl_UriEncodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)595 void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
596 rtl_UriEncodeMechanism eMechanism,
597 rtl_TextEncoding eCharset, rtl_uString ** pResult)
598 SAL_THROW_EXTERN_C()
599 {
600 OSL_ENSURE(!pCharClass[0x25], "bad pCharClass");
601 // make sure the percent sign is encoded...
602
603 sal_Unicode const * p = pText->buffer;
604 sal_Unicode const * pEnd = p + pText->length;
605 sal_Int32 nCapacity = 0;
606 rtl_uString_new(pResult);
607 while (p < pEnd)
608 {
609 EscapeType eType;
610 sal_uInt32 nUtf32 = readUcs4(
611 &p, pEnd,
612 (eMechanism == rtl_UriEncodeKeepEscapes
613 || eMechanism == rtl_UriEncodeCheckEscapes
614 || eMechanism == rtl_UriEncodeStrictKeepEscapes),
615 eCharset, &eType);
616 switch (eType)
617 {
618 case EscapeNo:
619 if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
620 writeUnicode(pResult, &nCapacity,
621 static_cast< sal_Unicode >(nUtf32));
622 else if (!writeEscapeChar(
623 pResult, &nCapacity, nUtf32, eCharset,
624 (eMechanism == rtl_UriEncodeStrict
625 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
626 {
627 rtl_uString_new(pResult);
628 return;
629 }
630 break;
631
632 case EscapeChar:
633 if (eMechanism == rtl_UriEncodeCheckEscapes
634 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
635 writeUnicode(pResult, &nCapacity,
636 static_cast< sal_Unicode >(nUtf32));
637 else if (!writeEscapeChar(
638 pResult, &nCapacity, nUtf32, eCharset,
639 (eMechanism == rtl_UriEncodeStrict
640 || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
641 {
642 rtl_uString_new(pResult);
643 return;
644 }
645 break;
646
647 case EscapeOctet:
648 writeEscapeOctet(pResult, &nCapacity, nUtf32);
649 break;
650 }
651 }
652 }
653
rtl_uriDecode(rtl_uString * pText,rtl_UriDecodeMechanism eMechanism,rtl_TextEncoding eCharset,rtl_uString ** pResult)654 void SAL_CALL rtl_uriDecode(rtl_uString * pText,
655 rtl_UriDecodeMechanism eMechanism,
656 rtl_TextEncoding eCharset, rtl_uString ** pResult)
657 SAL_THROW_EXTERN_C()
658 {
659 switch (eMechanism)
660 {
661 case rtl_UriDecodeNone:
662 rtl_uString_assign(pResult, pText);
663 break;
664
665 case rtl_UriDecodeToIuri:
666 eCharset = RTL_TEXTENCODING_UTF8;
667 default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
668 {
669 sal_Unicode const * p = pText->buffer;
670 sal_Unicode const * pEnd = p + pText->length;
671 sal_Int32 nCapacity = 0;
672 rtl_uString_new(pResult);
673 while (p < pEnd)
674 {
675 EscapeType eType;
676 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
677 switch (eType)
678 {
679 case EscapeChar:
680 if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
681 {
682 writeEscapeOctet(pResult, &nCapacity, nUtf32);
683 break;
684 }
685 case EscapeNo:
686 writeUcs4(pResult, &nCapacity, nUtf32);
687 break;
688
689 case EscapeOctet:
690 if (eMechanism == rtl_UriDecodeStrict) {
691 rtl_uString_new(pResult);
692 return;
693 }
694 writeEscapeOctet(pResult, &nCapacity, nUtf32);
695 break;
696 }
697 }
698 }
699 break;
700 }
701 }
702
rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,rtl_uString * pRelUriRef,rtl_uString ** pResult,rtl_uString ** pException)703 sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
704 rtl_uString * pRelUriRef,
705 rtl_uString ** pResult,
706 rtl_uString ** pException)
707 SAL_THROW_EXTERN_C()
708 {
709 // If pRelUriRef starts with a scheme component it is an absolute URI
710 // reference, and we are done (i.e., this algorithm does not support
711 // backwards-compatible relative URIs starting with a scheme component, see
712 // RFC 2396, section 5.2, step 3):
713 Components aRelComponents;
714 parseUriRef(pRelUriRef, &aRelComponents);
715 if (aRelComponents.aScheme.isPresent())
716 {
717 rtl_uString_assign(pResult, pRelUriRef);
718 return true;
719 }
720
721 // Parse pBaseUriRef; if the scheme component is not present or not valid,
722 // or the path component is not empty and starts with anything but a slash,
723 // an exception is raised:
724 Components aBaseComponents;
725 parseUriRef(pBaseUriRef, &aBaseComponents);
726 if (!aBaseComponents.aScheme.isPresent())
727 {
728 rtl::OUString aMessage(pBaseUriRef);
729 aMessage += rtl::OUString(
730 RTL_CONSTASCII_USTRINGPARAM(
731 " does not start with a scheme component"));
732 rtl_uString_assign(pException,
733 const_cast< rtl::OUString & >(aMessage).pData);
734 return false;
735 }
736 if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
737 && *aBaseComponents.aPath.pBegin != '/')
738 {
739 rtl::OUString aMessage(pBaseUriRef);
740 aMessage += rtl::OUString(
741 RTL_CONSTASCII_USTRINGPARAM(
742 "path component does not start with slash"));
743 rtl_uString_assign(pException, aMessage.pData);
744 return false;
745 }
746
747 // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
748 // into an absolute one (if the relative URI is a reference to the "current
749 // document," the "current document" is here taken to be the base URI):
750 rtl::OUStringBuffer aBuffer;
751 aBuffer.append(aBaseComponents.aScheme.pBegin,
752 aBaseComponents.aScheme.getLength());
753 if (aRelComponents.aAuthority.isPresent())
754 {
755 aBuffer.append(aRelComponents.aAuthority.pBegin,
756 aRelComponents.aAuthority.getLength());
757 aBuffer.append(aRelComponents.aPath.pBegin,
758 aRelComponents.aPath.getLength());
759 if (aRelComponents.aQuery.isPresent())
760 aBuffer.append(aRelComponents.aQuery.pBegin,
761 aRelComponents.aQuery.getLength());
762 }
763 else
764 {
765 if (aBaseComponents.aAuthority.isPresent())
766 aBuffer.append(aBaseComponents.aAuthority.pBegin,
767 aBaseComponents.aAuthority.getLength());
768 if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
769 && !aRelComponents.aQuery.isPresent())
770 {
771 aBuffer.append(aBaseComponents.aPath.pBegin,
772 aBaseComponents.aPath.getLength());
773 if (aBaseComponents.aQuery.isPresent())
774 aBuffer.append(aBaseComponents.aQuery.pBegin,
775 aBaseComponents.aQuery.getLength());
776 }
777 else
778 {
779 if (*aRelComponents.aPath.pBegin == '/')
780 aBuffer.append(aRelComponents.aPath.pBegin,
781 aRelComponents.aPath.getLength());
782 else
783 aBuffer.append(joinPaths(aBaseComponents.aPath,
784 aRelComponents.aPath));
785 if (aRelComponents.aQuery.isPresent())
786 aBuffer.append(aRelComponents.aQuery.pBegin,
787 aRelComponents.aQuery.getLength());
788 }
789 }
790 if (aRelComponents.aFragment.isPresent())
791 aBuffer.append(aRelComponents.aFragment.pBegin,
792 aRelComponents.aFragment.getLength());
793 rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
794 return true;
795 }
796