xref: /aoo41x/main/tools/source/fsys/urlobj.cxx (revision 89b56da7)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_tools.hxx"
26 #include <tools/urlobj.hxx>
27 #include <tools/debug.hxx>
28 #include <tools/inetmime.hxx>
29 #include "com/sun/star/uno/Reference.hxx"
30 #include "com/sun/star/util/XStringWidth.hpp"
31 #include "osl/diagnose.h"
32 #include "osl/file.hxx"
33 #include "rtl/string.h"
34 #include "rtl/textenc.h"
35 #include "rtl/ustring.hxx"
36 #include "sal/types.h"
37 
38 #ifndef INCLUDED_ALGORITHM
39 #include <algorithm>
40 #define INCLUDED_ALGORITHM
41 #endif
42 #ifndef INCLUDED_LIMITS
43 #include <limits>
44 #define INCLUDED_LIMITS
45 #endif
46 
47 #include <string.h>
48 
49 namespace unnamed_tools_urlobj {} using namespace unnamed_tools_urlobj;
50 	// unnamed namespaces don't work well yet...
51 
52 using namespace com::sun;
53 
54 //============================================================================
55 //
56 //	INetURLObject
57 //
58 //============================================================================
59 
60 /* The URI grammar (using RFC 2234 conventions).
61 
62    Constructs of the form
63 	   {reference <rule1> using rule2}
64    stand for a rule matching the given rule1 specified in the given reference,
65    encoded to URI syntax using rule2 (as specified in this URI grammar).
66 
67 
68    ; RFC 1738, RFC 2396, RFC 2732, private
69    login = [user [":" password] "@"] hostport
70    user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
71    password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
72    hostport = host [":" port]
73    host = incomplete-hostname / hostname / IPv4address / IPv6reference
74    incomplete-hostname = *(domainlabel ".") domainlabel
75    hostname = *(domainlabel ".") toplabel ["."]
76    domainlabel = alphanum [*(alphanum / "-") alphanum]
77    toplabel = ALPHA [*(alphanum / "-") alphanum]
78    IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
79    IPv6reference = "[" hexpart [":" IPv4address] "]"
80    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
81    hexseq = hex4 *(":" hex4)
82    hex4 = 1*4HEXDIG
83    port = *DIGIT
84    escaped = "%" HEXDIG HEXDIG
85    reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
86    mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
87    alphanum = ALPHA / DIGIT
88    unreserved = alphanum / mark
89    uric = escaped / reserved / unreserved
90    pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
91 
92 
93    ; RFC 1738, RFC 2396
94    ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
95    segment = *pchar
96 
97 
98    ; RFC 1738, RFC 2396
99    http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
100    segment = *(pchar / ";")
101 
102 
103    ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
104    file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
105    segment = *pchar
106    netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
107 
108 
109    ; RFC 2368, RFC 2396
110    mailto-url = "MAILTO:" [to] [headers]
111    to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
112    headers = "?" header *("&" header)
113    header = hname "=" hvalue
114    hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
115    hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
116 
117 
118    ; private (see RFC 1738, RFC 2396)
119    vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
120    segment = *(pchar / ";")
121 
122 
123    ; RFC 1738, RFC 2396, RFC 2732
124    news-url = "NEWS:" grouppart
125    grouppart = "*" / group / article
126    group = alpha *(alphanum / "+" / "-" / "." / "_")
127    article = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "?" / "_" / "~") "@" host
128 
129 
130    ; private
131    private-url = "PRIVATE:" path ["?" *uric]
132    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
133 
134 
135    ; private
136    vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
137    name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
138    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
139 
140 
141    ; private
142    https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
143    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
144 
145 
146    ; private
147    slot-url = "SLOT:" path ["?" *uric]
148    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
149 
150 
151    ; private
152    macro-url = "MACRO:" path ["?" *uric]
153    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
154 
155 
156    ; private
157    javascript-url = "JAVASCRIPT:" *uric
158 
159 
160    ; private (see RFC 2192)
161    imap-url = "IMAP://" user [";AUTH=" auth] "@" hostport "/" segment *("/" segment) ["/;UID=" nz_number]
162    user = 1*{RFC 2060 <CHAR8> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "=" / "_" / "~")}
163    auth = {RFC 2060 <atom> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "+" / "," / "-" / "." / "=" / "_" / "~")}
164    segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / "=" / "@" / "_" / "~")
165    nz_number = {RFC 2060 <nz_number> using *DIGIT}
166 
167 
168    ; private
169    pop3-url = "POP3://" login ["/" ["<" *uric ">"]]
170 
171 
172    ; RFC 2397
173    data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
174    mediatype = [type "/" subtype] *(";" attribute "=" value)
175    type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
176    subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
177    attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
178    value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
179 
180 
181    ; RFC 2392, RFC 2396
182    cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
183 
184 
185    ; private
186    out-url = "OUT:///~" name ["/" *uric]
187    name = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "?" / "@" / "_" / "~"
188 
189 
190    ; private
191    vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
192    reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
193 
194    ; private
195    vim-url = "VIM://" +vimc [":" *vimc] ["/" [("INBOX" message) / ("NEWSGROUPS" ["/" [+vimc message]])]]
196    message = ["/" [+vimc [":" +DIGIT "." +DIGIT "." +DIGIT]]]
197    vimc = ("=" HEXDIG HEXDIG) / alphanum
198 
199 
200    ; private
201    uno-url = ".UNO:" path ["?" *uric]
202    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
203 
204 
205    ; private
206    component-url = ".COMPONENT:" path ["?" *uric]
207    path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
208 
209 
210    ; private
211    vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
212    reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
213 
214 
215    ; RFC 2255
216    ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
217    dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
218    attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
219    filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
220    extension = ["!"] ["X-"] extoken ["=" exvalue]
221    extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
222    exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
223 
224 
225    ; private
226    db-url = "DB:" *uric
227 
228 
229    ; private
230    vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
231    opaque_part = uric_no_slash *uric
232    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
233 
234 
235    ; private
236    vnd-sun-star-url = "VND.SUN.STAR.ODMA:" ["/" *uric_no_slash]
237    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
238 
239 
240    ; RFC 1738
241    telnet-url = "TELNET://" login ["/"]
242 
243 
244    ; private
245    vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
246    opaque_part = uric_no_slash *uric
247    uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
248 
249 
250    ; private
251    vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
252    segment = *pchar
253 
254 
255    ; private
256    unknown-url = scheme ":" 1*uric
257    scheme = ALPHA *(alphanum / "+" / "-" / ".")
258 
259 
260    ; private (http://ubiqx.org/cifs/Appendix-D.html):
261    smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
262    segment = *(pchar / ";")
263  */
264 
265 //============================================================================
clear()266 inline sal_Int32 INetURLObject::SubString::clear()
267 {
268 	sal_Int32 nDelta = -m_nLength;
269 	m_nBegin = -1;
270 	m_nLength = 0;
271 	return nDelta;
272 }
273 
set(rtl::OUStringBuffer & rString,rtl::OUString const & rSubString)274 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
275 									   rtl::OUString const & rSubString)
276 {
277 	rtl::OUString sTemp(rString.makeStringAndClear());
278 	sal_Int32 nDelta = set(sTemp, rSubString);
279 	rString.append(sTemp);
280 	return nDelta;
281 }
282 
set(rtl::OUString & rString,rtl::OUString const & rSubString)283 inline sal_Int32 INetURLObject::SubString::set(rtl::OUString & rString,
284 									   rtl::OUString const & rSubString)
285 {
286 	sal_Int32 nDelta = rSubString.getLength() - m_nLength;
287 
288 	rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
289 
290 	m_nLength = rSubString.getLength();
291 	return nDelta;
292 }
293 
set(rtl::OUStringBuffer & rString,rtl::OUString const & rSubString,sal_Int32 nTheBegin)294 inline sal_Int32 INetURLObject::SubString::set(rtl::OUStringBuffer & rString,
295 									   rtl::OUString const & rSubString,
296 											   sal_Int32 nTheBegin)
297 {
298 	m_nBegin = nTheBegin;
299 	return set(rString, rSubString);
300 }
301 
302 //============================================================================
operator +=(sal_Int32 nDelta)303 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
304 {
305 	if (isPresent())
306 		m_nBegin = m_nBegin + nDelta;
307 }
308 
309 //============================================================================
compare(SubString const & rOther,rtl::OUStringBuffer const & rThisString,rtl::OUStringBuffer const & rOtherString) const310 int INetURLObject::SubString::compare(SubString const & rOther,
311                                       rtl::OUStringBuffer const & rThisString,
312                                       rtl::OUStringBuffer const & rOtherString) const
313 {
314     sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
315     sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
316     sal_Unicode const * end = p1 + len;
317     sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
318     while (p1 != end) {
319         if (*p1 < *p2) {
320             return -1;
321         } else if (*p1 > *p2) {
322             return 1;
323         }
324         ++p1;
325         ++p2;
326     }
327     return m_nLength < rOther.m_nLength ? -1
328         : m_nLength > rOther.m_nLength ? 1
329         : 0;
330 }
331 
332 //============================================================================
333 struct INetURLObject::SchemeInfo
334 {
335 	sal_Char const * m_pScheme;
336 	sal_Char const * m_pPrefix;
337 	sal_uInt16 m_nDefaultPort;
338 	bool m_bAuthority;
339 	bool m_bUser;
340 	bool m_bAuth;
341 	bool m_bPassword;
342 	bool m_bHost;
343 	bool m_bPort;
344 	bool m_bHierarchical;
345 	bool m_bQuery;
346 };
347 
348 //============================================================================
349 struct INetURLObject::PrefixInfo
350 {
351 	enum Kind { OFFICIAL, INTERNAL, EXTERNAL, ALIAS }; // order is important!
352 
353 	sal_Char const * m_pPrefix;
354 	sal_Char const * m_pTranslatedPrefix;
355 	INetProtocol m_eScheme;
356 	Kind m_eKind;
357 };
358 
359 //============================================================================
360 static INetURLObject::SchemeInfo const aSchemeInfoMap[INET_PROT_END]
361 	= { { "", "", 0, false, false, false, false, false, false, false,
362 		  false },
363 		{ "ftp", "ftp://", 21, true, true, false, true, true, true, true,
364 		  false },
365 		{ "http", "http://", 80, true, false, false, false, true, true,
366 		  true, true },
367 		{ "file", "file://", 0, true, false, false, false, true, false,
368 		  true, false },
369 		{ "mailto", "mailto:", 0, false, false, false, false, false,
370 		  false, false, true },
371 		{ "vnd.sun.star.webdav", "vnd.sun.star.webdav://", 80, true, false,
372 		  false, false, true, true, true, true },
373 		{ "news", "news:", 0, false, false, false, false, false, false, false,
374           false },
375 		{ "private", "private:", 0, false, false, false, false, false,
376 		  false, false, true },
377 		{ "vnd.sun.star.help", "vnd.sun.star.help://", 0, true, false, false,
378 		  false, false, false, true, true },
379 		{ "https", "https://", 443, true, false, false, false, true, true,
380 		  true, true },
381 		{ "slot", "slot:", 0, false, false, false, false, false, false,
382 		  false, true },
383 		{ "macro", "macro:", 0, false, false, false, false, false, false,
384 		  false, true },
385 		{ "javascript", "javascript:", 0, false, false, false, false,
386 		  false, false, false, false },
387 		{ "imap", "imap://", 143, true, true, true, false, true, true,
388 		  true, false },
389 		{ "pop3", "pop3://", 110, true, true, false, true, true, true,
390 		  false, false },
391 		{ "data", "data:", 0, false, false, false, false, false, false,
392 		  false, false },
393 		{ "cid", "cid:", 0, false, false, false, false, false, false,
394 		  false, false },
395 		{ "out", "out://", 0, true, false, false, false, false, false,
396 		  false, false },
397 		{ "vnd.sun.star.hier", "vnd.sun.star.hier:", 0, true, false, false,
398 		  false, false, false, true, false },
399 		{ "vim", "vim://", 0, true, true, false, true, false, false, true,
400 		  false },
401 		{ ".uno", ".uno:", 0, false, false, false, false, false, false,
402 		  false, true },
403 		{ ".component", ".component:", 0, false, false, false, false,
404 		  false, false, false, true },
405 		{ "vnd.sun.star.pkg", "vnd.sun.star.pkg://", 0, true, false, false,
406 		  false, false, false, true, true },
407 		{ "ldap", "ldap://", 389, true, false, false, false, true, true,
408 		  false, true },
409 		{ "db", "db:", 0, false, false, false, false, false, false, false,
410 		  false },
411 		{ "vnd.sun.star.cmd", "vnd.sun.star.cmd:", 0, false, false, false,
412 		  false, false, false, false, false },
413 		{ "vnd.sun.star.odma", "vnd.sun.star.odma:", 0, false, false, false,
414 		  false, false, false, true, false },
415 		{ "telnet", "telnet://", 23, true, true, false, true, true, true, true,
416           false },
417 		{ "vnd.sun.star.expand", "vnd.sun.star.expand:", 0, false, false, false,
418 		  false, false, false, false, false },
419         { "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", 0, false, false, false,
420           false, false, false, true, false },
421         { "", "", 0, false, false, false, false, true, true, true, false },
422         { "smb", "smb://", 139, true, true, false, true, true, true, true,
423           true },
424 		{ "hid", "hid:", 0, false, false, false, false, false, false,
425         false, true } };
426 
427 // static
428 inline INetURLObject::SchemeInfo const &
getSchemeInfo(INetProtocol eTheScheme)429 INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
430 {
431 	return aSchemeInfoMap[eTheScheme];
432 };
433 
434 //============================================================================
getSchemeInfo() const435 inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
436 {
437 	return getSchemeInfo(m_eScheme);
438 }
439 
440 //============================================================================
441 // static
appendEscape(rtl::OUStringBuffer & rTheText,sal_Char cEscapePrefix,sal_uInt32 nOctet)442 inline void INetURLObject::appendEscape(rtl::OUStringBuffer & rTheText,
443 										sal_Char cEscapePrefix,
444 										sal_uInt32 nOctet)
445 {
446 	rTheText.append(sal_Unicode(cEscapePrefix));
447 	rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet >> 4))));
448 	rTheText.append(sal_Unicode(INetMIME::getHexDigit(int(nOctet & 15))));
449 }
450 
451 //============================================================================
452 namespace unnamed_tools_urlobj {
453 
454 enum
455 {
456 	PA = INetURLObject::PART_OBSOLETE_NORMAL,
457 	PB = INetURLObject::PART_OBSOLETE_FILE,
458 	PC = INetURLObject::PART_OBSOLETE_PARAM,
459 	PD = INetURLObject::PART_USER_PASSWORD,
460 	PE = INetURLObject::PART_IMAP_ACHAR,
461 	PF = INetURLObject::PART_VIM,
462 	PG = INetURLObject::PART_HOST_EXTRA,
463 	PH = INetURLObject::PART_FPATH,
464 	PI = INetURLObject::PART_AUTHORITY,
465 	PJ = INetURLObject::PART_PATH_SEGMENTS_EXTRA,
466 	PK = INetURLObject::PART_REL_SEGMENT_EXTRA,
467 	PL = INetURLObject::PART_URIC,
468 	PM = INetURLObject::PART_HTTP_PATH,
469 	PN = INetURLObject::PART_FILE_SEGMENT_EXTRA,
470 	PO = INetURLObject::PART_MESSAGE_ID,
471 	PP = INetURLObject::PART_MESSAGE_ID_PATH,
472 	PQ = INetURLObject::PART_MAILTO,
473 	PR = INetURLObject::PART_PATH_BEFORE_QUERY,
474 	PS = INetURLObject::PART_PCHAR,
475 	PT = INetURLObject::PART_FRAGMENT,
476 	PU = INetURLObject::PART_VISIBLE,
477 	PV = INetURLObject::PART_VISIBLE_NONSPECIAL,
478 	PW = INetURLObject::PART_CREATEFRAGMENT,
479 	PX = INetURLObject::PART_UNO_PARAM_VALUE,
480 	PY = INetURLObject::PART_UNAMBIGUOUS,
481 	PZ = INetURLObject::PART_URIC_NO_SLASH,
482     P1 = INetURLObject::PART_HTTP_QUERY,
483     P2 = INetURLObject::PART_NEWS_ARTICLE_LOCALPART
484 };
485 
486 static sal_uInt32 const aMustEncodeMap[128]
487 	= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
489 /*   */	                                                                        PY,
490 /* ! */	      PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
491 /* " */	                                                            PU+PV      +PY,
492 /* # */	                                                            PU,
493 /* $ */	         PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
494 /* % */	                                                            PU,
495 /* & */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN+PO+PP   +PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
496 /* ' */	         PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
497 /* ( */	         PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
498 /* ) */	         PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
499 /* * */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
500 /* + */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
501 /* , */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW      +PZ+P1+P2,
502 /* - */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
503 /* . */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
504 /* / */ PA+PB+PC            +PH   +PJ   +PL+PM      +PP+PQ+PR   +PT+PU+PV   +PX         +P2,
505 /* 0 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
506 /* 1 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
507 /* 2 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
508 /* 3 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
509 /* 4 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
510 /* 5 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
511 /* 6 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
512 /* 7 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
513 /* 8 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
514 /* 9 */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
515 /* : */	   PB+PC            +PH+PI+PJ   +PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX   +PZ+P1+P2,
516 /* ; */	      PC+PD            +PI+PJ+PK+PL+PM   +PO+PP+PQ+PR   +PT+PU   +PW      +PZ+P1+P2,
517 /* < */	      PC                                 +PO+PP            +PU+PV      +PY,
518 /* = */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN         +PR+PS+PT+PU+PV+PW      +PZ+P1+P2,
519 /* > */	      PC                                 +PO+PP            +PU+PV      +PY,
520 /* ? */	      PC                        +PL                     +PT+PU   +PW+PX   +PZ   +P2,
521 /* @ */	      PC            +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1,
522 /* A */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
523 /* B */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
524 /* C */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
525 /* D */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
526 /* E */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
527 /* F */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
528 /* G */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
529 /* H */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
530 /* I */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
531 /* J */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
532 /* K */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
533 /* L */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
534 /* M */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
535 /* N */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
536 /* O */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
537 /* P */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
538 /* Q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
539 /* R */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
540 /* S */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
541 /* T */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
542 /* U */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
543 /* V */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
544 /* W */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
545 /* X */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
546 /* Y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
547 /* Z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
548 /* [ */	                                 PL                        +PU+PV   +PX,
549 /* \ */	   PB                                                      +PU+PV      +PY,
550 /* ] */	                                 PL                        +PU+PV   +PX,
551 /* ^ */	                                                            PU+PV      +PY,
552 /* _ */ PA+PB+PC+PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
553 /* ` */	                                                            PU+PV      +PY,
554 /* a */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
555 /* b */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
556 /* c */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
557 /* d */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
558 /* e */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
559 /* f */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
560 /* g */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
561 /* h */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
562 /* i */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
563 /* j */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
564 /* k */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
565 /* l */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
566 /* m */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
567 /* n */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
568 /* o */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
569 /* p */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
570 /* q */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
571 /* r */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
572 /* s */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
573 /* t */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
574 /* u */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
575 /* v */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
576 /* w */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
577 /* x */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
578 /* y */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
579 /* z */ PA+PB+PC+PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ+P1+P2,
580 /* { */	                                                            PU+PV      +PY,
581 /* | */	   PB+PC                              +PN               +PT+PU+PV      +PY,
582 /* } */	                                                            PU+PV      +PY,
583 /* ~ */ PA+PB+PC+PD+PE      +PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR+PS+PT+PU+PV+PW+PX+PY+PZ  +P2,
584 		0 };
585 
mustEncode(sal_uInt32 nUTF32,INetURLObject::Part ePart)586 inline bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
587 {
588 	return !INetMIME::isUSASCII(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
589 }
590 
591 }
592 
593 //============================================================================
setInvalid()594 void INetURLObject::setInvalid()
595 {
596 	m_aAbsURIRef.setLength(0);
597 	m_eScheme = INET_PROT_NOT_VALID;
598     m_aScheme.clear();
599 	m_aUser.clear();
600 	m_aAuth.clear();
601 	m_aHost.clear();
602 	m_aPort.clear();
603 	m_aPath.clear();
604 	m_aQuery.clear();
605 	m_aFragment.clear();
606 }
607 
608 //============================================================================
609 
610 namespace unnamed_tools_urlobj {
611 
612 INetURLObject::FSysStyle
guessFSysStyleByCounting(sal_Unicode const * pBegin,sal_Unicode const * pEnd,INetURLObject::FSysStyle eStyle)613 guessFSysStyleByCounting(sal_Unicode const * pBegin,
614 						 sal_Unicode const * pEnd,
615 						 INetURLObject::FSysStyle eStyle)
616 {
617 	DBG_ASSERT(eStyle
618 			       & (INetURLObject::FSYS_UNX
619 					      | INetURLObject::FSYS_DOS
620 					      | INetURLObject::FSYS_MAC),
621 			   "guessFSysStyleByCounting(): Bad style");
622 	DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
623 			   && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
624 			   "guessFSysStyleByCounting(): Too big");
625 	sal_Int32 nSlashCount
626 		= eStyle & INetURLObject::FSYS_UNX ?
627 		      0 : std::numeric_limits< sal_Int32 >::min();
628 	sal_Int32 nBackslashCount
629 		= eStyle & INetURLObject::FSYS_DOS ?
630 		      0 : std::numeric_limits< sal_Int32 >::min();
631 	sal_Int32 nColonCount
632 		= eStyle & INetURLObject::FSYS_MAC ?
633 		      0 : std::numeric_limits< sal_Int32 >::min();
634 	while (pBegin != pEnd)
635 		switch (*pBegin++)
636 		{
637 			case '/':
638 				++nSlashCount;
639 				break;
640 
641 			case '\\':
642 				++nBackslashCount;
643 				break;
644 
645 			case ':':
646 				++nColonCount;
647 				break;
648 		}
649 	return nSlashCount >= nBackslashCount ?
650 		       nSlashCount >= nColonCount ?
651 		           INetURLObject::FSYS_UNX : INetURLObject::FSYS_MAC :
652 		       nBackslashCount >= nColonCount ?
653 		           INetURLObject::FSYS_DOS : INetURLObject::FSYS_MAC;
654 }
655 
parseScheme(sal_Unicode const ** begin,sal_Unicode const * end,sal_uInt32 fragmentDelimiter)656 rtl::OUString parseScheme(
657     sal_Unicode const ** begin, sal_Unicode const * end,
658     sal_uInt32 fragmentDelimiter)
659 {
660     sal_Unicode const * p = *begin;
661     if (p != end && INetMIME::isAlpha(*p)) {
662         do {
663             ++p;
664         } while (p != end
665                  && (INetMIME::isAlphanumeric(*p) || *p == '+' || *p == '-'
666                      || *p == '.'));
667         // #i34835# To avoid problems with Windows file paths like "C:\foo",
668         // do not accept generic schemes that are only one character long:
669         if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
670             && p - *begin >= 2)
671         {
672 			rtl::OUString scheme(
673                 rtl::OUString(*begin, p - *begin).toAsciiLowerCase());
674             *begin = p + 1;
675             return scheme;
676         }
677     }
678     return rtl::OUString();
679 }
680 
681 }
682 
setAbsURIRef(rtl::OUString const & rTheAbsURIRef,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bSmart,FSysStyle eStyle)683 bool INetURLObject::setAbsURIRef(rtl::OUString const & rTheAbsURIRef,
684 								 bool bOctets,
685 								 EncodeMechanism eMechanism,
686 								 rtl_TextEncoding eCharset,
687 								 bool bSmart,
688 								 FSysStyle eStyle)
689 {
690 	sal_Unicode const * pPos = rTheAbsURIRef.getStr();
691 	sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
692 
693 	setInvalid();
694 
695 	sal_uInt32 nFragmentDelimiter = '#';
696 
697 	rtl::OUStringBuffer aSynAbsURIRef;
698 
699 	// Parse <scheme>:
700 	sal_Unicode const * p = pPos;
701 	PrefixInfo const * pPrefix = getPrefix(p, pEnd);
702 	if (pPrefix)
703 	{
704 		pPos = p;
705 		m_eScheme = pPrefix->m_eScheme;
706 
707 		rtl::OUString sTemp(rtl::OUString::createFromAscii(pPrefix->m_eKind
708 										         >= PrefixInfo::EXTERNAL ?
709 										     pPrefix->m_pTranslatedPrefix :
710 										     pPrefix->m_pPrefix));
711 		aSynAbsURIRef.append(sTemp);
712         m_aScheme = SubString( 0, sTemp.indexOf(static_cast< sal_Unicode >(':')) );
713 	}
714 	else
715 	{
716 		if (bSmart)
717 		{
718 			// For scheme detection, the first (if any) of the following
719 			// productions that matches the input string (and for which the
720 			// appropriate style bit is set in eStyle, if applicable)
721 			// determines the scheme. The productions use the auxiliary rules
722 			//
723 			//	  domain = label *("." label)
724 			//	  label = alphanum [*(alphanum / "-") alphanum]
725 			//	  alphanum = ALPHA / DIGIT
726 			//	  IPv6reference = "[" IPv6address "]"
727 			//	  IPv6address = hexpart [":" IPv4address]
728 			//	  IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
729 			//	  hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
730 			//	  hexseq = hex4 *(":" hex4)
731 			//	  hex4 = 1*4HEXDIG
732 			//	  UCS4 = <any UCS4 character>
733 			//
734 			// 1st Production (known scheme):
735 			//	  <one of the known schemes, ignoring case> ":" *UCS4
736 			//
737 			// 2nd Production (mailto):
738 			//	  domain "@" domain
739 			//
740 			// 3rd Production (ftp):
741 			//	  "FTP" 2*("." label) ["/" *UCS4]
742 			//
743 			// 4th Production (http):
744 			//	  label 2*("." label) ["/" *UCS4]
745 			//
746 			// 5th Production (file):
747 			//	  "//" (domain / IPv6reference) ["/" *UCS4]
748 			//
749 			// 6th Production (Unix file):
750 			//    "/" *UCS4
751 			//
752 			// 7th Production (UNC file; FSYS_DOS only):
753 			//	  "\\" domain ["\" *UCS4]
754 			//
755 			// 8th Production (Unix-like DOS file; FSYS_DOS only):
756 			//	  ALPHA ":" ["/" *UCS4]
757 			//
758 			// 9th Production (DOS file; FSYS_DOS only):
759 			//	  ALPHA ":" ["\" *UCS4]
760 			//
761 			// For the 'non URL' file productions 6--9, the interpretation of
762 			// the input as a (degenerate) URI is turned off, i.e., escape
763 			// sequences and fragments are never detected as such, but are
764 			// taken as literal characters.
765 
766 			sal_Unicode const * p1 = pPos;
767 			if (eStyle & FSYS_DOS
768 				&& pEnd - p1 >= 2
769 				&& INetMIME::isAlpha(p1[0])
770 				&& p1[1] == ':'
771                 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
772 			{
773 				m_eScheme = INET_PROT_FILE; // 8th, 9th
774 				eMechanism = ENCODE_ALL;
775 				nFragmentDelimiter = 0x80000000;
776 			}
777 			else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
778 			{
779 				p1 += 2;
780 				if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
781 					&& (p1 == pEnd || *p1 == '/'))
782 					m_eScheme = INET_PROT_FILE; // 5th
783 			}
784 			else if (p1 != pEnd && *p1 == '/')
785 			{
786 				m_eScheme = INET_PROT_FILE; // 6th
787 				eMechanism = ENCODE_ALL;
788 				nFragmentDelimiter = 0x80000000;
789 			}
790 			else if (eStyle & FSYS_DOS
791 					 && pEnd - p1 >= 2
792 					 && p1[0] == '\\'
793 					 && p1[1] == '\\')
794 			{
795 				p1 += 2;
796                 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
797                     p1, pEnd - p1, '\\');
798                 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
799 				if (
800                     parseHostOrNetBiosName(
801                         p1, pe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
802                         true, NULL) ||
803                     (scanDomain(p1, pe) > 0 && p1 == pe)
804                    )
805 				{
806 					m_eScheme = INET_PROT_FILE; // 7th
807 					eMechanism = ENCODE_ALL;
808 					nFragmentDelimiter = 0x80000000;
809 				}
810 			}
811 			else
812 			{
813 				sal_Unicode const * pDomainEnd = p1;
814 				sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
815 				if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
816 				{
817 					++pDomainEnd;
818 					if (scanDomain(pDomainEnd, pEnd) > 0
819 						&& pDomainEnd == pEnd)
820 						m_eScheme = INET_PROT_MAILTO; // 2nd
821 				}
822 				else if (nLabels >= 3
823 						 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
824 					m_eScheme
825 						= pDomainEnd - p1 >= 4
826                           && (p1[0] == 'f' || p1[0] == 'F')
827                           && (p1[1] == 't' || p1[1] == 'T')
828                           && (p1[2] == 'p' || p1[2] == 'P')
829 						  && p1[3] == '.' ?
830 						      INET_PROT_FTP : INET_PROT_HTTP; // 3rd, 4th
831 			}
832 		}
833 
834 		rtl::OUString aSynScheme;
835         if (m_eScheme == INET_PROT_NOT_VALID) {
836             sal_Unicode const * p1 = pPos;
837             aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
838             if (aSynScheme.getLength() > 0)
839             {
840                 m_eScheme = INET_PROT_GENERIC;
841                 pPos = p1;
842             }
843         }
844 
845         if (bSmart && m_eScheme == INET_PROT_NOT_VALID && pPos != pEnd
846             && *pPos != nFragmentDelimiter)
847         {
848             m_eScheme = m_eSmartScheme;
849         }
850 
851 		if (m_eScheme == INET_PROT_NOT_VALID)
852 		{
853 			setInvalid();
854 			return false;
855 		}
856 
857         if (m_eScheme != INET_PROT_GENERIC) {
858             aSynScheme = rtl::OUString::createFromAscii(getSchemeInfo().m_pScheme);
859         }
860         m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
861         aSynAbsURIRef.append(sal_Unicode(':'));
862 	}
863 
864 	sal_Char cEscapePrefix = getEscapePrefix();
865 	sal_uInt32 nSegmentDelimiter = '/';
866 	sal_uInt32 nAltSegmentDelimiter = 0x80000000;
867 	bool bSkippedInitialSlash = false;
868 
869 	// Parse //<user>;AUTH=<auth>@<host>:<port> or
870 	// //<user>:<password>@<host>:<port> or
871     // //<reg_name>
872 	if (getSchemeInfo().m_bAuthority)
873 	{
874 		sal_Unicode const * pUserInfoBegin = 0;
875 		sal_Unicode const * pUserInfoEnd = 0;
876 		sal_Unicode const * pHostPortBegin = 0;
877 		sal_Unicode const * pHostPortEnd = 0;
878 
879 		switch (m_eScheme)
880 		{
881 			case INET_PROT_VND_SUN_STAR_HELP:
882 			{
883 				if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
884 				{
885 					setInvalid();
886 					return false;
887 				}
888                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
889 				rtl::OUStringBuffer aSynAuthority;
890 				while (pPos < pEnd
891 					   && *pPos != '/' && *pPos != '?'
892 					   && *pPos != nFragmentDelimiter)
893 				{
894 					EscapeType eEscapeType;
895 					sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
896 												 cEscapePrefix, eMechanism,
897 												 eCharset, eEscapeType);
898 					appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
899 							   PART_AUTHORITY, cEscapePrefix, eCharset,
900 							   false);
901 				}
902 				m_aHost.set(aSynAbsURIRef,
903 							aSynAuthority.makeStringAndClear(),
904 							aSynAbsURIRef.getLength());
905 					// misusing m_aHost to store the authority
906 				break;
907 			}
908 
909             case INET_PROT_VND_SUN_STAR_HIER:
910             {
911                 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
912                 {
913                     pPos += 2;
914                     aSynAbsURIRef.
915                         appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
916                     rtl::OUStringBuffer aSynAuthority;
917                     while (pPos < pEnd
918                            && *pPos != '/' && *pPos != '?'
919                            && *pPos != nFragmentDelimiter)
920                     {
921                         EscapeType eEscapeType;
922                         sal_uInt32 nUTF32 = getUTF32(pPos,
923                                                      pEnd,
924                                                      bOctets,
925                                                      cEscapePrefix,
926                                                      eMechanism,
927                                                      eCharset,
928                                                      eEscapeType);
929                         appendUCS4(aSynAuthority,
930                                    nUTF32,
931                                    eEscapeType,
932                                    bOctets,
933                                    PART_AUTHORITY,
934                                    cEscapePrefix,
935                                    eCharset,
936                                    false);
937                     }
938                     if (aSynAuthority.getLength() == 0)
939                     {
940                         setInvalid();
941                         return false;
942                     }
943                     m_aHost.set(aSynAbsURIRef,
944                                 aSynAuthority.makeStringAndClear(),
945                                 aSynAbsURIRef.getLength());
946                         // misusing m_aHost to store the authority
947                 }
948                 break;
949             }
950 
951 			case INET_PROT_VND_SUN_STAR_PKG:
952 			{
953 				if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
954 				{
955 					setInvalid();
956 					return false;
957 				}
958                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
959 				rtl::OUStringBuffer aSynAuthority;
960 				while (pPos < pEnd
961 					   && *pPos != '/' && *pPos != '?'
962 					   && *pPos != nFragmentDelimiter)
963 				{
964 					EscapeType eEscapeType;
965 					sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
966 												 cEscapePrefix, eMechanism,
967 												 eCharset, eEscapeType);
968 					appendUCS4(aSynAuthority, nUTF32, eEscapeType, bOctets,
969 							   PART_AUTHORITY, cEscapePrefix, eCharset,
970 							   false);
971 				}
972 				if (aSynAuthority.getLength() == 0)
973 				{
974 					setInvalid();
975 					return false;
976 				}
977 				m_aHost.set(aSynAbsURIRef,
978 							aSynAuthority.makeStringAndClear(),
979 							aSynAbsURIRef.getLength());
980 					// misusing m_aHost to store the authority
981 				break;
982 			}
983 
984 			case INET_PROT_FILE:
985 				if (bSmart)
986 				{
987 					// The first of the following seven productions that
988 					// matches the rest of the input string (and for which the
989 					// appropriate style bit is set in eStyle, if applicable)
990 					// determines the used notation.  The productions use the
991 					// auxiliary rules
992 					//
993 					//	  domain = label *("." label)
994 					//	  label = alphanum [*(alphanum / "-") alphanum]
995 					//	  alphanum = ALPHA / DIGIT
996 					//	  IPv6reference = "[" IPv6address "]"
997 					//	  IPv6address = hexpart [":" IPv4address]
998 					//	  IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
999 					//	  hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1000 					//	  hexseq = hex4 *(":" hex4)
1001 					//	  hex4 = 1*4HEXDIG
1002 					//	  path = <any UCS4 character except "#">
1003 					//	  UCS4 = <any UCS4 character>
1004 
1005 					// 1st Production (URL):
1006 					//	  "//" [domain / IPv6reference] ["/" *path]
1007 					//		  ["#" *UCS4]
1008 					//	becomes
1009 					//	  "file://" domain "/" *path ["#" *UCS4]
1010 					if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1011 					{
1012 						sal_Unicode const * p1 = pPos + 2;
1013                         while (p1 != pEnd && *p1 != '/' &&
1014                                *p1 != nFragmentDelimiter)
1015                         {
1016                             ++p1;
1017                         }
1018                         if (parseHostOrNetBiosName(
1019                                 pPos + 2, p1, bOctets, ENCODE_ALL,
1020                                 RTL_TEXTENCODING_DONTKNOW, true, NULL))
1021 						{
1022                             aSynAbsURIRef.
1023                                 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1024 							pHostPortBegin = pPos + 2;
1025 							pHostPortEnd = p1;
1026 							pPos = p1;
1027 							break;
1028 						}
1029 					}
1030 
1031                     // 2nd Production (MS IE generated 1; FSYS_DOS only):
1032                     //    "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1033 					//	becomes
1034 					//	  "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1035 					//	replacing "\" by "/" within <*path>
1036                     //
1037                     // 3rd Production (MS IE generated 2; FSYS_DOS only):
1038                     //    "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1039 					//	becomes
1040 					//	  "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1041 					//	replacing "\" by "/" within <*path>
1042                     //
1043 					// 4th Production (misscounted slashes):
1044 					//	  "//" *path ["#" *UCS4]
1045 					//	becomes
1046 					//	  "file:///" *path ["#" *UCS4]
1047 					if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1048 					{
1049                         aSynAbsURIRef.
1050                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1051 						pPos += 2;
1052 						bSkippedInitialSlash = true;
1053                         if ((eStyle & FSYS_DOS) != 0
1054                             && pEnd - pPos >= 2
1055                             && INetMIME::isAlpha(pPos[0])
1056                             && pPos[1] == ':'
1057                             && (pEnd - pPos == 2
1058                                 || pPos[2] == '/' || pPos[2] == '\\'))
1059                             nAltSegmentDelimiter = '\\';
1060 						break;
1061 					}
1062 
1063 					// 5th Production (Unix):
1064 					//	  "/" *path ["#" *UCS4]
1065 					//	becomes
1066 					//	  "file:///" *path ["#" *UCS4]
1067 					if (pPos < pEnd && *pPos == '/')
1068                     {
1069                         aSynAbsURIRef.
1070                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1071 						break;
1072                     }
1073 
1074 					// 6th Production (UNC; FSYS_DOS only):
1075 					//	  "\\" domain ["\" *path] ["#" *UCS4]
1076 					//	becomes
1077 					//	  "file://" domain "/" *path ["#" *UCS4]
1078 					//	replacing "\" by "/" within <*path>
1079 					if (eStyle & FSYS_DOS
1080 						&& pEnd - pPos >= 2
1081 						&& pPos[0] == '\\'
1082 						&& pPos[1] == '\\')
1083 					{
1084 						sal_Unicode const * p1 = pPos + 2;
1085                         sal_Unicode const * pe = p1;
1086                         while (pe < pEnd && *pe != '\\' &&
1087                                *pe != nFragmentDelimiter)
1088                         {
1089                             ++pe;
1090                         }
1091                         if (
1092                              parseHostOrNetBiosName(
1093                                 p1, pe, bOctets, ENCODE_ALL,
1094                                 RTL_TEXTENCODING_DONTKNOW, true, NULL) ||
1095                              (scanDomain(p1, pe) > 0 && p1 == pe)
1096                            )
1097 						{
1098                             aSynAbsURIRef.
1099                                 appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1100 							pHostPortBegin = pPos + 2;
1101 							pHostPortEnd = pe;
1102 							pPos = pe;
1103 							nSegmentDelimiter = '\\';
1104 							break;
1105 						}
1106 					}
1107 
1108 					// 7th Production (Unix-like DOS; FSYS_DOS only):
1109 					//	  ALPHA ":" ["/" *path] ["#" *UCS4]
1110 					//	becomes
1111 					//	  "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1112 					//	replacing "\" by "/" within <*path>
1113 					//
1114 					// 8th Production (DOS; FSYS_DOS only):
1115 					//	  ALPHA ":" ["\" *path] ["#" *UCS4]
1116 					//	becomes
1117 					//	  "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1118 					//	replacing "\" by "/" within <*path>
1119 					if (eStyle & FSYS_DOS
1120 						&& pEnd - pPos >= 2
1121 						&& INetMIME::isAlpha(pPos[0])
1122 						&& pPos[1] == ':'
1123 						&& (pEnd - pPos == 2
1124                             || pPos[2] == '/'
1125                             || pPos[2] == '\\'))
1126 					{
1127                         aSynAbsURIRef.
1128                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1129 						nAltSegmentDelimiter = '\\';
1130 						bSkippedInitialSlash = true;
1131 						break;
1132 					}
1133 
1134 					// 9th Production (any):
1135 					//	  *path ["#" *UCS4]
1136 					//	becomes
1137 					//	  "file:///" *path ["#" *UCS4]
1138 					//	replacing the delimiter by "/" within <*path>.	The
1139 					//	delimiter is that character from the set { "/", "\",
1140 					//	":" } which appears most often in <*path> (if FSYS_UNX
1141 					//  is not among the style bits, "/" is removed from the
1142 					//  set; if FSYS_DOS is not among the style bits, "\" is
1143 					//  removed from the set; if FSYS_MAC is not among the
1144 					//  style bits, ":" is removed from the set).  If two or
1145 					//	more characters appear the same number of times, the
1146 					//	character mentioned first in that set is chosen.  If
1147 					//	the first character of <*path> is the delimiter, that
1148 					//	character is not copied.
1149 					if (eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC))
1150 					{
1151                         aSynAbsURIRef.
1152                             appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1153 						switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1154 						{
1155 							case FSYS_UNX:
1156 								nSegmentDelimiter = '/';
1157 								break;
1158 
1159 							case FSYS_DOS:
1160 								nSegmentDelimiter = '\\';
1161 								break;
1162 
1163 							case FSYS_MAC:
1164 								nSegmentDelimiter = ':';
1165 								break;
1166 
1167 							default:
1168 								DBG_ERROR(
1169 									"INetURLObject::setAbsURIRef():"
1170 									    " Bad guessFSysStyleByCounting");
1171 								break;
1172 						}
1173 						bSkippedInitialSlash
1174 							= pPos != pEnd && *pPos != nSegmentDelimiter;
1175 						break;
1176 					}
1177 				}
1178 			default:
1179 			{
1180                 // For INET_PROT_FILE, allow an empty authority ("//") to be
1181                 // missing if the following path starts with an explicit "/"
1182                 // (Java is notorious in generating such file URLs, so be
1183                 // liberal here):
1184 				if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1185 					pPos += 2;
1186                 else if (!bSmart
1187                          && !(m_eScheme == INET_PROT_FILE
1188                               && pPos != pEnd && *pPos == '/'))
1189 				{
1190 					setInvalid();
1191 					return false;
1192 				}
1193                 aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1194 
1195 				sal_Unicode const * pAuthority = pPos;
1196 				sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1197 				while (pPos < pEnd && *pPos != '/' && *pPos != c
1198 					   && *pPos != nFragmentDelimiter)
1199 					++pPos;
1200 				if (getSchemeInfo().m_bUser)
1201 					if (getSchemeInfo().m_bHost)
1202 					{
1203 						sal_Unicode const * p1 = pAuthority;
1204 						while (p1 < pPos && *p1 != '@')
1205 							++p1;
1206 						if (p1 == pPos)
1207 						{
1208 							pHostPortBegin = pAuthority;
1209 							pHostPortEnd = pPos;
1210 						}
1211 						else
1212 						{
1213 							pUserInfoBegin = pAuthority;
1214 							pUserInfoEnd = p1;
1215 							pHostPortBegin = p1 + 1;
1216 							pHostPortEnd = pPos;
1217 						}
1218 					}
1219 					else
1220 					{
1221 						pUserInfoBegin = pAuthority;
1222 						pUserInfoEnd = pPos;
1223 					}
1224 				else if (getSchemeInfo().m_bHost)
1225 				{
1226 					pHostPortBegin = pAuthority;
1227 					pHostPortEnd = pPos;
1228 				}
1229 				else if (pPos != pAuthority)
1230 				{
1231 					setInvalid();
1232 					return false;
1233 				}
1234 				break;
1235 			}
1236 		}
1237 
1238 		if (pUserInfoBegin)
1239 		{
1240 			Part ePart = m_eScheme == INET_PROT_IMAP ?
1241 							 PART_IMAP_ACHAR :
1242 						 m_eScheme == INET_PROT_VIM ?
1243 							 PART_VIM :
1244 							 PART_USER_PASSWORD;
1245 			bool bSupportsPassword = getSchemeInfo().m_bPassword;
1246 			bool bSupportsAuth
1247 				= !bSupportsPassword && getSchemeInfo().m_bAuth;
1248 			bool bHasAuth = false;
1249 			rtl::OUStringBuffer aSynUser;
1250 			sal_Unicode const * p1 = pUserInfoBegin;
1251 			while (p1 < pUserInfoEnd)
1252 			{
1253 				EscapeType eEscapeType;
1254 				sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1255 											 cEscapePrefix, eMechanism,
1256 											 eCharset, eEscapeType);
1257 				if (eEscapeType == ESCAPE_NO)
1258                 {
1259 					if (nUTF32 == ':' && bSupportsPassword)
1260 					{
1261 						bHasAuth = true;
1262 						break;
1263 					}
1264 					else if (nUTF32 == ';' && bSupportsAuth
1265 							 && pUserInfoEnd - p1
1266 									> RTL_CONSTASCII_LENGTH("auth=")
1267 							 && INetMIME::equalIgnoreCase(
1268 									p1,
1269 									p1 + RTL_CONSTASCII_LENGTH("auth="),
1270 									"auth="))
1271 					{
1272 						p1 += RTL_CONSTASCII_LENGTH("auth=");
1273 						bHasAuth = true;
1274 						break;
1275 					}
1276                 }
1277 				appendUCS4(aSynUser, nUTF32, eEscapeType, bOctets, ePart,
1278 						   cEscapePrefix, eCharset, false);
1279 			}
1280 			m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1281 				aSynAbsURIRef.getLength());
1282 			if (bHasAuth)
1283             {
1284 				if (bSupportsPassword)
1285 				{
1286 					aSynAbsURIRef.append(sal_Unicode(':'));
1287 					rtl::OUStringBuffer aSynAuth;
1288 					while (p1 < pUserInfoEnd)
1289 					{
1290 						EscapeType eEscapeType;
1291 						sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1292 													 cEscapePrefix,
1293 													 eMechanism, eCharset,
1294 													 eEscapeType);
1295 						appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1296 								   ePart, cEscapePrefix, eCharset, false);
1297 					}
1298 					m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1299 						aSynAbsURIRef.getLength());
1300 				}
1301 				else
1302 				{
1303 					aSynAbsURIRef.
1304 						appendAscii(RTL_CONSTASCII_STRINGPARAM(";AUTH="));
1305 					rtl::OUStringBuffer aSynAuth;
1306 					while (p1 < pUserInfoEnd)
1307 					{
1308 						EscapeType eEscapeType;
1309 						sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd, bOctets,
1310 													 cEscapePrefix,
1311 													 eMechanism, eCharset,
1312 													 eEscapeType);
1313 						if (!INetMIME::isIMAPAtomChar(nUTF32))
1314 						{
1315 							setInvalid();
1316 							return false;
1317 						}
1318 						appendUCS4(aSynAuth, nUTF32, eEscapeType, bOctets,
1319 								   ePart, cEscapePrefix, eCharset, false);
1320 					}
1321 					m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1322 						aSynAbsURIRef.getLength());
1323 				}
1324             }
1325 			if (pHostPortBegin)
1326 				aSynAbsURIRef.append(sal_Unicode('@'));
1327 		}
1328 
1329 		if (pHostPortBegin)
1330 		{
1331 			sal_Unicode const * pPort = pHostPortEnd;
1332 			if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1333 			{
1334 				sal_Unicode const * p1 = pHostPortEnd - 1;
1335 				while (p1 > pHostPortBegin && INetMIME::isDigit(*p1))
1336 					--p1;
1337 				if (*p1 == ':')
1338 					pPort = p1;
1339 			}
1340             bool bNetBiosName = false;
1341 			switch (m_eScheme)
1342 			{
1343 				case INET_PROT_FILE:
1344 					// If the host equals "LOCALHOST" (unencoded and ignoring
1345 					// case), turn it into an empty host:
1346 					if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1347 												  "localhost"))
1348 						pHostPortBegin = pPort;
1349                     bNetBiosName = true;
1350 					break;
1351 
1352 				case INET_PROT_LDAP:
1353                 case INET_PROT_SMB:
1354 					if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1355 					{
1356 						setInvalid();
1357 						return false;
1358 					}
1359 					break;
1360 				default:
1361 					if (pHostPortBegin == pPort)
1362 					{
1363 						setInvalid();
1364 						return false;
1365 					}
1366 					break;
1367 			}
1368 			rtl::OUStringBuffer aSynHost;
1369 			if (!parseHostOrNetBiosName(
1370                     pHostPortBegin, pPort, bOctets, eMechanism, eCharset,
1371                     bNetBiosName, &aSynHost))
1372 			{
1373 				setInvalid();
1374 				return false;
1375 			}
1376 			m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1377 				aSynAbsURIRef.getLength());
1378 			if (pPort != pHostPortEnd)
1379 			{
1380 				aSynAbsURIRef.append(sal_Unicode(':'));
1381 				m_aPort.set(aSynAbsURIRef,
1382 					rtl::OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1383 					aSynAbsURIRef.getLength());
1384 			}
1385 		}
1386 	}
1387 
1388 	// Parse <path>
1389 	rtl::OUStringBuffer aSynPath;
1390 	if (!parsePath(m_eScheme, &pPos, pEnd, bOctets, eMechanism, eCharset,
1391 				   bSkippedInitialSlash, nSegmentDelimiter,
1392 				   nAltSegmentDelimiter,
1393 				   getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1394 				   nFragmentDelimiter, aSynPath))
1395 	{
1396 		setInvalid();
1397 		return false;
1398 	}
1399 	m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1400 		aSynAbsURIRef.getLength());
1401 
1402 	// Parse ?<query>
1403 	if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1404 	{
1405 		aSynAbsURIRef.append(sal_Unicode('?'));
1406 		rtl::OUStringBuffer aSynQuery;
1407 		for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1408 		{
1409 			EscapeType eEscapeType;
1410 			sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1411 										 eMechanism, eCharset, eEscapeType);
1412 			appendUCS4(aSynQuery, nUTF32, eEscapeType, bOctets,
1413 					   PART_URIC, cEscapePrefix, eCharset, true);
1414 		}
1415 		m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1416 			aSynAbsURIRef.getLength());
1417 	}
1418 
1419 	// Parse #<fragment>
1420 	if (pPos < pEnd && *pPos == nFragmentDelimiter)
1421 	{
1422 		aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1423 		rtl::OUStringBuffer aSynFragment;
1424 		for (++pPos; pPos < pEnd;)
1425 		{
1426 			EscapeType eEscapeType;
1427 			sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, cEscapePrefix,
1428 										 eMechanism, eCharset, eEscapeType);
1429 			appendUCS4(aSynFragment, nUTF32, eEscapeType, bOctets, PART_URIC,
1430 					   cEscapePrefix, eCharset, true);
1431 		}
1432 		m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1433 			aSynAbsURIRef.getLength());
1434 	}
1435 
1436 	if (pPos != pEnd)
1437 	{
1438 		setInvalid();
1439 		return false;
1440 	}
1441 
1442 	m_aAbsURIRef = aSynAbsURIRef;
1443 
1444 	return true;
1445 }
1446 
1447 //============================================================================
convertRelToAbs(rtl::OUString const & rTheRelURIRef,bool bOctets,INetURLObject & rTheAbsURIRef,bool & rWasAbsolute,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bIgnoreFragment,bool bSmart,bool bRelativeNonURIs,FSysStyle eStyle) const1448 bool INetURLObject::convertRelToAbs(rtl::OUString const & rTheRelURIRef,
1449 									bool bOctets,
1450 									INetURLObject & rTheAbsURIRef,
1451 									bool & rWasAbsolute,
1452 									EncodeMechanism eMechanism,
1453 									rtl_TextEncoding eCharset,
1454 									bool bIgnoreFragment, bool bSmart,
1455 									bool bRelativeNonURIs, FSysStyle eStyle)
1456 	const
1457 {
1458 	sal_Unicode const * p = rTheRelURIRef.getStr();
1459 	sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1460 
1461 	sal_Unicode const * pPrefixBegin = p;
1462 	PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1463     bool hasScheme = pPrefix != 0;
1464     if (!hasScheme) {
1465         pPrefixBegin = p;
1466         hasScheme = parseScheme(&pPrefixBegin, pEnd, '#').getLength() > 0;
1467     }
1468 
1469 	sal_uInt32 nSegmentDelimiter = '/';
1470 	sal_uInt32 nQueryDelimiter
1471 		= !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1472 	sal_uInt32 nFragmentDelimiter = '#';
1473 	Part ePart = PART_VISIBLE;
1474 
1475 	if (!hasScheme && bSmart)
1476 	{
1477 		// If the input matches any of the following productions (for which
1478 		// the appropriate style bit is set in eStyle), it is assumed to be an
1479 		// absolute file system path, rather than a relative URI reference.
1480 		// (This is only a subset of the productions used for scheme detection
1481 		// in INetURLObject::setAbsURIRef(), because most of those productions
1482 		// interfere with the syntax of relative URI references.)  The
1483 		// productions use the auxiliary rules
1484 		//
1485 		//	  domain = label *("." label)
1486 		//	  label = alphanum [*(alphanum / "-") alphanum]
1487 		//	  alphanum = ALPHA / DIGIT
1488 		//	  UCS4 = <any UCS4 character>
1489 		//
1490 		// 1st Production (UNC file; FSYS_DOS only):
1491 		//	  "\\" domain ["\" *UCS4]
1492 		//
1493 		// 2nd Production (Unix-like DOS file; FSYS_DOS only):
1494 		//	  ALPHA ":" ["/" *UCS4]
1495 		//
1496 		// 3rd Production (DOS file; FSYS_DOS only):
1497 		//	  ALPHA ":" ["\" *UCS4]
1498 		if (eStyle & FSYS_DOS)
1499 		{
1500 			bool bFSys = false;
1501 			sal_Unicode const * q = p;
1502 			if (pEnd - q >= 2
1503 				&& INetMIME::isAlpha(q[0])
1504 				&& q[1] == ':'
1505                 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1506 				bFSys = true; // 2nd, 3rd
1507 			else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1508 			{
1509 				q += 2;
1510                 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1511                     q, pEnd - q, '\\');
1512                 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1513                 if (parseHostOrNetBiosName(
1514                         q, qe, bOctets, ENCODE_ALL, RTL_TEXTENCODING_DONTKNOW,
1515                         true, NULL))
1516                 {
1517 					bFSys = true; // 1st
1518                 }
1519 			}
1520 			if (bFSys)
1521 			{
1522 				INetURLObject aNewURI;
1523 				aNewURI.setAbsURIRef(rTheRelURIRef, bOctets, eMechanism,
1524 									 eCharset, true, eStyle);
1525 				if (!aNewURI.HasError())
1526 				{
1527 					rTheAbsURIRef = aNewURI;
1528 					rWasAbsolute = true;
1529 					return true;
1530 				}
1531 			}
1532 		}
1533 
1534 		// When the base URL is a file URL, accept relative file system paths
1535 		// using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1536 		// and "#"), as well as relative URIs using "/" as delimiter:
1537 		if (m_eScheme == INET_PROT_FILE)
1538 			switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1539 			{
1540 				case FSYS_UNX:
1541 					nSegmentDelimiter = '/';
1542 					break;
1543 
1544 				case FSYS_DOS:
1545 					nSegmentDelimiter = '\\';
1546 					bRelativeNonURIs = true;
1547 					break;
1548 
1549 				case FSYS_MAC:
1550 					nSegmentDelimiter = ':';
1551 					bRelativeNonURIs = true;
1552 					break;
1553 
1554 				default:
1555 					DBG_ERROR("INetURLObject::convertRelToAbs():"
1556 							      " Bad guessFSysStyleByCounting");
1557 					break;
1558 			}
1559 
1560 		if (bRelativeNonURIs)
1561 		{
1562 			eMechanism = ENCODE_ALL;
1563 			nQueryDelimiter = 0x80000000;
1564 			nFragmentDelimiter = 0x80000000;
1565 			ePart = PART_VISIBLE_NONSPECIAL;
1566 		}
1567 	}
1568 
1569 	// If the relative URI has the same scheme as the base URI, and that
1570 	// scheme is hierarchical, then ignore its presence in the relative
1571 	// URI in order to be backward compatible (cf. RFC 2396 section 5.2
1572 	// step 3):
1573 	if (pPrefix && pPrefix->m_eScheme == m_eScheme
1574 		&& getSchemeInfo().m_bHierarchical)
1575 	{
1576 		hasScheme = false;
1577 		while (p != pEnd && *p++ != ':') ;
1578 	}
1579 	rWasAbsolute = hasScheme;
1580 
1581 	// Fast solution for non-relative URIs:
1582 	if (hasScheme)
1583 	{
1584 		INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1585 		if (aNewURI.HasError())
1586 		{
1587 			rWasAbsolute = false;
1588 			return false;
1589 		}
1590 
1591 		if (bIgnoreFragment)
1592 			aNewURI.clearFragment();
1593 		rTheAbsURIRef = aNewURI;
1594 		return true;
1595 	}
1596 
1597 	enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1598 				 STATE_DONE };
1599 
1600 	rtl::OUStringBuffer aSynAbsURIRef;
1601 	// make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1602 	// is empty ("") in that case, so take the scheme from m_aAbsURIRef
1603 	if (m_eScheme != INET_PROT_GENERIC)
1604 	{
1605 		aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1606 	}
1607 	else
1608 	{
1609 		sal_Unicode const * pSchemeBegin
1610 			= m_aAbsURIRef.getStr();
1611 		sal_Unicode const * pSchemeEnd = pSchemeBegin;
1612 		while (pSchemeEnd[0] != ':')
1613 		{
1614 			++pSchemeEnd;
1615 		}
1616 		aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1617 	}
1618 	aSynAbsURIRef.append(sal_Unicode(':'));
1619 
1620 	sal_Char cEscapePrefix = getEscapePrefix();
1621 
1622 	State eState = STATE_AUTH;
1623 	bool bSameDoc = true;
1624 
1625 	if (getSchemeInfo().m_bAuthority)
1626     {
1627 		if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1628 		{
1629 			aSynAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
1630 			p += 2;
1631 			eState = STATE_ABS_PATH;
1632 			bSameDoc = false;
1633 			while (p != pEnd)
1634 			{
1635 				EscapeType eEscapeType;
1636 				sal_uInt32 nUTF32
1637 					= getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1638 							   eCharset, eEscapeType);
1639 				if (eEscapeType == ESCAPE_NO)
1640                 {
1641 					if (nUTF32 == nSegmentDelimiter)
1642 						break;
1643 					else if (nUTF32 == nFragmentDelimiter)
1644 					{
1645 						eState = STATE_FRAGMENT;
1646 						break;
1647 					}
1648                 }
1649 				appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1650 						   PART_VISIBLE, cEscapePrefix, eCharset, true);
1651 			}
1652 		}
1653 		else
1654 		{
1655 			SubString aAuthority(getAuthority());
1656 			aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1657 								     + aAuthority.getBegin(),
1658 								 aAuthority.getLength());
1659 		}
1660     }
1661 
1662 	if (eState == STATE_AUTH)
1663     {
1664 		if (p == pEnd)
1665 			eState = STATE_DONE;
1666 		else if (*p == nFragmentDelimiter)
1667 		{
1668 			++p;
1669 			eState = STATE_FRAGMENT;
1670 		}
1671 		else if (*p == nSegmentDelimiter)
1672 		{
1673 			++p;
1674 			eState = STATE_ABS_PATH;
1675 			bSameDoc = false;
1676 		}
1677 		else
1678 		{
1679 			eState = STATE_REL_PATH;
1680 			bSameDoc = false;
1681 		}
1682     }
1683 
1684 	if (eState == STATE_ABS_PATH)
1685 	{
1686 		aSynAbsURIRef.append(sal_Unicode('/'));
1687 		eState = STATE_DONE;
1688 		while (p != pEnd)
1689 		{
1690 			EscapeType eEscapeType;
1691 			sal_uInt32 nUTF32
1692 				= getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1693 						   eCharset, eEscapeType);
1694 			if (eEscapeType == ESCAPE_NO)
1695             {
1696 				if (nUTF32 == nFragmentDelimiter)
1697 				{
1698 					eState = STATE_FRAGMENT;
1699 					break;
1700 				}
1701 				else if (nUTF32 == nSegmentDelimiter)
1702 					nUTF32 = '/';
1703             }
1704 			appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1705 					   cEscapePrefix, eCharset, true);
1706 		}
1707 	}
1708 	else if (eState == STATE_REL_PATH)
1709 	{
1710 		if (!getSchemeInfo().m_bHierarchical)
1711 		{
1712             // Detect cases where a relative input could not be made absolute
1713             // because the given base URL is broken (most probably because it is
1714             // empty):
1715             OSL_ASSERT(!HasError());
1716 			rWasAbsolute = false;
1717 			return false;
1718 		}
1719 
1720 		sal_Unicode const * pBasePathBegin
1721 			= m_aAbsURIRef.getStr() + m_aPath.getBegin();
1722 		sal_Unicode const * pBasePathEnd
1723 			= pBasePathBegin + m_aPath.getLength();
1724 		while (pBasePathEnd != pBasePathBegin)
1725 			if (*(--pBasePathEnd) == '/')
1726 			{
1727 				++pBasePathEnd;
1728 				break;
1729 			}
1730 
1731 		sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1732 		aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1733 		DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1734 				 && aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1) == '/',
1735 				 "INetURLObject::convertRelToAbs(): Bad base path");
1736 
1737 		while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1738 		{
1739 			if (*p == '.')
1740             {
1741 				if (pEnd - p == 1
1742 					|| p[1] == nSegmentDelimiter
1743 					|| p[1] == nQueryDelimiter
1744 					|| p[1] == nFragmentDelimiter)
1745 				{
1746 					++p;
1747 					if (p != pEnd && *p == nSegmentDelimiter)
1748 						++p;
1749 					continue;
1750 				}
1751 				else if (pEnd - p >= 2
1752 						 && p[1] == '.'
1753 						 && (pEnd - p == 2
1754 							 || p[2] == nSegmentDelimiter
1755 							 || p[2] == nQueryDelimiter
1756 							 || p[2] == nFragmentDelimiter)
1757 						 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1758 				{
1759 					p += 2;
1760 					if (p != pEnd && *p == nSegmentDelimiter)
1761 						++p;
1762 
1763 					sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1764 					while (i > nPathBegin && aSynAbsURIRef.charAt(i) != '/')
1765 						--i;
1766 					aSynAbsURIRef.setLength(i + 1);
1767 					DBG_ASSERT(
1768 						aSynAbsURIRef.getLength() > nPathBegin
1769 						&& aSynAbsURIRef.charAt(aSynAbsURIRef.getLength() - 1)
1770 						       == '/',
1771 						"INetURLObject::convertRelToAbs(): Bad base path");
1772 					continue;
1773 				}
1774             }
1775 
1776 			while (p != pEnd
1777 				   && *p != nSegmentDelimiter
1778 				   && *p != nQueryDelimiter
1779 				   && *p != nFragmentDelimiter)
1780 			{
1781 				EscapeType eEscapeType;
1782 				sal_uInt32 nUTF32
1783 					= getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1784 							   eCharset, eEscapeType);
1785 				appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1786 						   cEscapePrefix, eCharset, true);
1787 			}
1788 			if (p != pEnd && *p == nSegmentDelimiter)
1789 			{
1790 				aSynAbsURIRef.append(sal_Unicode('/'));
1791 				++p;
1792 			}
1793 		}
1794 
1795 		while (p != pEnd && *p != nFragmentDelimiter)
1796 		{
1797 			EscapeType eEscapeType;
1798 			sal_uInt32 nUTF32
1799 				= getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1800 						   eCharset, eEscapeType);
1801 			appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets, ePart,
1802 					   cEscapePrefix, eCharset, true);
1803 		}
1804 
1805 		if (p == pEnd)
1806 			eState = STATE_DONE;
1807 		else
1808 		{
1809 			++p;
1810 			eState = STATE_FRAGMENT;
1811 		}
1812 	}
1813 	else if (bSameDoc)
1814 	{
1815 		aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1816 							 m_aPath.getLength());
1817 		if (m_aQuery.isPresent())
1818 			aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1819 								     + m_aQuery.getBegin() - 1,
1820 								 m_aQuery.getLength() + 1);
1821 	}
1822 
1823 	if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1824 	{
1825 		aSynAbsURIRef.append(sal_Unicode('#'));
1826 		while (p != pEnd)
1827 		{
1828 			EscapeType eEscapeType;
1829 			sal_uInt32 nUTF32
1830 				= getUTF32(p, pEnd, bOctets, cEscapePrefix, eMechanism,
1831 						   eCharset, eEscapeType);
1832 			appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, bOctets,
1833 					   PART_VISIBLE, cEscapePrefix, eCharset, true);
1834 		}
1835 	}
1836 
1837 	INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1838 	if (aNewURI.HasError())
1839 	{
1840         // Detect cases where a relative input could not be made absolute
1841         // because the given base URL is broken (most probably because it is
1842         // empty):
1843         OSL_ASSERT(!HasError());
1844 		rWasAbsolute = false;
1845 		return false;
1846 	}
1847 
1848 	rTheAbsURIRef = aNewURI;
1849 	return true;
1850 }
1851 
1852 //============================================================================
convertAbsToRel(rtl::OUString const & rTheAbsURIRef,bool bOctets,rtl::OUString & rTheRelURIRef,EncodeMechanism eEncodeMechanism,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,FSysStyle eStyle) const1853 bool INetURLObject::convertAbsToRel(rtl::OUString const & rTheAbsURIRef,
1854 									bool bOctets, rtl::OUString & rTheRelURIRef,
1855 									EncodeMechanism eEncodeMechanism,
1856 									DecodeMechanism eDecodeMechanism,
1857 									rtl_TextEncoding eCharset,
1858 									FSysStyle eStyle) const
1859 {
1860 	// Check for hierarchical base URL:
1861 	if (!getSchemeInfo().m_bHierarchical)
1862 	{
1863 		rTheRelURIRef
1864 			= decode(rTheAbsURIRef,
1865 					 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1866 					 eDecodeMechanism, eCharset);
1867 		return false;
1868 	}
1869 
1870 	// Convert the input (absolute or relative URI ref) to an absolute URI
1871     // ref:
1872 	INetURLObject aSubject;
1873 	bool bWasAbsolute;
1874 	if (!convertRelToAbs(rTheAbsURIRef, bOctets, aSubject, bWasAbsolute,
1875 						 eEncodeMechanism, eCharset, false, false, false,
1876 						 eStyle))
1877 	{
1878 		rTheRelURIRef
1879 			= decode(rTheAbsURIRef,
1880 					 getEscapePrefix(CompareProtocolScheme(rTheAbsURIRef)),
1881 					 eDecodeMechanism, eCharset);
1882 		return false;
1883 	}
1884 
1885 	// Check for differing scheme or authority parts:
1886     if ((m_aScheme.compare(
1887              aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1888          != 0)
1889         || (m_aUser.compare(
1890                 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1891             != 0)
1892         || (m_aAuth.compare(
1893                 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1894             != 0)
1895         || (m_aHost.compare(
1896                 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1897             != 0)
1898         || (m_aPort.compare(
1899                 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1900             != 0))
1901 	{
1902 		rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1903 		return false;
1904 	}
1905 
1906 	sal_Unicode const * pBasePathBegin
1907 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
1908 	sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1909 	sal_Unicode const * pSubjectPathBegin
1910 		= aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1911 	sal_Unicode const * pSubjectPathEnd
1912 		= pSubjectPathBegin + aSubject.m_aPath.getLength();
1913 
1914 	// Make nMatch point past the last matching slash, or past the end of the
1915 	// paths, in case they are equal:
1916 	sal_Unicode const * pSlash = 0;
1917 	sal_Unicode const * p1 = pBasePathBegin;
1918 	sal_Unicode const * p2 = pSubjectPathBegin;
1919 	for (;;)
1920 	{
1921 		if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1922 		{
1923 			if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1924 				pSlash = p1;
1925 			break;
1926 		}
1927 
1928 		sal_Unicode c = *p1++;
1929 		if (c != *p2++)
1930 			break;
1931 		if (c == '/')
1932 			pSlash = p1;
1933 	}
1934 	if (!pSlash)
1935 	{
1936 		// One of the paths does not start with '/':
1937 		rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1938 		return false;
1939 	}
1940 	sal_Int32 nMatch = pSlash - pBasePathBegin;
1941 
1942     // If the two URLs are DOS file URLs starting with different volumes
1943     // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1944     // relative (it could be, but some people do not like that):
1945     if (m_eScheme == INET_PROT_FILE
1946         && nMatch <= 1
1947         && hasDosVolume(eStyle)
1948         && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1949 	{
1950 		rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1951 		return false;
1952 	}
1953 
1954 	// For every slash in the base path after nMatch, a prefix of "../" is
1955 	// added to the new relative URL (if the common prefix of the two paths is
1956 	// only "/"---but see handling of file URLs above---, the complete subject
1957 	// path could go into the new relative URL instead, but some people don't
1958 	// like that):
1959 	rtl::OUStringBuffer aSynRelURIRef;
1960 //	if (nMatch <= 1) nMatch = 0; else // see comment above
1961 	for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1962 		 ++p)
1963 	{
1964 		if (*p == '/')
1965 			aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("../"));
1966 	}
1967 
1968 	// If the new relative URL would start with "//" (i.e., it would be
1969 	// mistaken for a relative URL starting with an authority part), or if the
1970 	// new relative URL would neither be empty nor start with <"/"> nor start
1971 	// with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1972 	// with a scheme part), then the new relative URL is prefixed with "./":
1973 	if (aSynRelURIRef.getLength() == 0)
1974     {
1975 		if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1976 			&& pSubjectPathBegin[nMatch] == '/'
1977 			&& pSubjectPathBegin[nMatch + 1] == '/')
1978         {
1979 			aSynRelURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
1980         }
1981 		else
1982         {
1983 			for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
1984 				 p != pSubjectPathEnd && *p != '/'; ++p)
1985             {
1986 				if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
1987 				{
1988 					aSynRelURIRef.
1989 						appendAscii(RTL_CONSTASCII_STRINGPARAM("./"));
1990 					break;
1991 				}
1992             }
1993         }
1994     }
1995 
1996 	// The remainder of the subject path, starting at nMatch, is appended to
1997 	// the new relative URL:
1998 	sal_Char cEscapePrefix = getEscapePrefix();
1999 	aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2000 							cEscapePrefix, eDecodeMechanism, eCharset));
2001 
2002 	// If the subject has defined query or fragment parts, they are appended
2003 	// to the new relative URL:
2004 	if (aSubject.m_aQuery.isPresent())
2005 	{
2006 		aSynRelURIRef.append(sal_Unicode('?'));
2007 		aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery, cEscapePrefix,
2008 										 eDecodeMechanism, eCharset));
2009 	}
2010 	if (aSubject.m_aFragment.isPresent())
2011 	{
2012 		aSynRelURIRef.append(sal_Unicode('#'));
2013 		aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2014 			cEscapePrefix, eDecodeMechanism, eCharset));
2015 	}
2016 
2017 	rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2018 	return true;
2019 }
2020 
2021 //============================================================================
2022 // static
convertIntToExt(rtl::OUString const & rTheIntURIRef,bool bOctets,rtl::OUString & rTheExtURIRef,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)2023 bool INetURLObject::convertIntToExt(rtl::OUString const & rTheIntURIRef,
2024 									bool bOctets, rtl::OUString & rTheExtURIRef,
2025 									DecodeMechanism eDecodeMechanism,
2026 									rtl_TextEncoding eCharset)
2027 {
2028 	sal_Char cEscapePrefix
2029 		= getEscapePrefix(CompareProtocolScheme(rTheIntURIRef));
2030 	rtl::OUString aSynExtURIRef(encodeText(rTheIntURIRef, bOctets, PART_VISIBLE,
2031 									   cEscapePrefix, NOT_CANONIC, eCharset,
2032 									   true));
2033 	sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2034 	sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2035 	sal_Unicode const * p = pBegin;
2036 	PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2037 	bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::INTERNAL;
2038 	if (bConvert)
2039 	{
2040 		aSynExtURIRef =
2041 			aSynExtURIRef.replaceAt(0, p - pBegin,
2042 				rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2043 	}
2044 	rTheExtURIRef = decode(aSynExtURIRef, cEscapePrefix, eDecodeMechanism,
2045 						   eCharset);
2046 	return bConvert;
2047 }
2048 
2049 //============================================================================
2050 // static
convertExtToInt(rtl::OUString const & rTheExtURIRef,bool bOctets,rtl::OUString & rTheIntURIRef,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)2051 bool INetURLObject::convertExtToInt(rtl::OUString const & rTheExtURIRef,
2052 									bool bOctets, rtl::OUString & rTheIntURIRef,
2053 									DecodeMechanism eDecodeMechanism,
2054 									rtl_TextEncoding eCharset)
2055 {
2056 	sal_Char cEscapePrefix
2057 		= getEscapePrefix(CompareProtocolScheme(rTheExtURIRef));
2058 	rtl::OUString aSynIntURIRef(encodeText(rTheExtURIRef, bOctets, PART_VISIBLE,
2059 									   cEscapePrefix, NOT_CANONIC, eCharset,
2060 									   true));
2061 	sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2062 	sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2063 	sal_Unicode const * p = pBegin;
2064 	PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2065 	bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::EXTERNAL;
2066 	if (bConvert)
2067 	{
2068 		aSynIntURIRef =
2069 			aSynIntURIRef.replaceAt(0, p - pBegin,
2070 				rtl::OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2071 	}
2072 	rTheIntURIRef = decode(aSynIntURIRef, cEscapePrefix, eDecodeMechanism,
2073 						   eCharset);
2074 	return bConvert;
2075 }
2076 
2077 //============================================================================
2078 // static
2079 INetURLObject::PrefixInfo const *
getPrefix(sal_Unicode const * & rBegin,sal_Unicode const * pEnd)2080 INetURLObject::getPrefix(sal_Unicode const *& rBegin,
2081 						 sal_Unicode const * pEnd)
2082 {
2083 	static PrefixInfo const aMap[]
2084 		= { // dummy entry at front needed, because pLast may point here:
2085 			{ 0, 0, INET_PROT_NOT_VALID, PrefixInfo::INTERNAL },
2086 			{ ".component:", "staroffice.component:", INET_PROT_COMPONENT,
2087 			  PrefixInfo::INTERNAL },
2088 			{ ".uno:", "staroffice.uno:", INET_PROT_UNO,
2089 			  PrefixInfo::INTERNAL },
2090 			{ "cid:", 0, INET_PROT_CID, PrefixInfo::OFFICIAL },
2091 			{ "data:", 0, INET_PROT_DATA, PrefixInfo::OFFICIAL },
2092 			{ "db:", "staroffice.db:", INET_PROT_DB, PrefixInfo::INTERNAL },
2093 			{ "file:", 0, INET_PROT_FILE, PrefixInfo::OFFICIAL },
2094 			{ "ftp:", 0, INET_PROT_FTP, PrefixInfo::OFFICIAL },
2095 			{ "hid:", "staroffice.hid:", INET_PROT_HID,
2096 			  PrefixInfo::INTERNAL },
2097 			{ "http:", 0, INET_PROT_HTTP, PrefixInfo::OFFICIAL },
2098 			{ "https:", 0, INET_PROT_HTTPS, PrefixInfo::OFFICIAL },
2099 			{ "imap:", 0, INET_PROT_IMAP, PrefixInfo::OFFICIAL },
2100 			{ "javascript:", 0, INET_PROT_JAVASCRIPT, PrefixInfo::OFFICIAL },
2101 			{ "ldap:", 0, INET_PROT_LDAP, PrefixInfo::OFFICIAL },
2102 			{ "macro:", "staroffice.macro:", INET_PROT_MACRO,
2103 			  PrefixInfo::INTERNAL },
2104 			{ "mailto:", 0, INET_PROT_MAILTO, PrefixInfo::OFFICIAL },
2105 			{ "news:", 0, INET_PROT_NEWS, PrefixInfo::OFFICIAL },
2106 			{ "out:", "staroffice.out:", INET_PROT_OUT,
2107 			  PrefixInfo::INTERNAL },
2108 			{ "pop3:", "staroffice.pop3:", INET_PROT_POP3,
2109 			  PrefixInfo::INTERNAL },
2110 			{ "private:", "staroffice.private:", INET_PROT_PRIV_SOFFICE,
2111 			  PrefixInfo::INTERNAL },
2112 			{ "private:factory/", "staroffice.factory:",
2113 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2114 			{ "private:helpid/", "staroffice.helpid:", INET_PROT_PRIV_SOFFICE,
2115 			  PrefixInfo::INTERNAL },
2116 			{ "private:java/", "staroffice.java:", INET_PROT_PRIV_SOFFICE,
2117 			  PrefixInfo::INTERNAL },
2118 			{ "private:searchfolder:", "staroffice.searchfolder:",
2119 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2120 			{ "private:trashcan:", "staroffice.trashcan:",
2121 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::INTERNAL },
2122 			{ "slot:", "staroffice.slot:", INET_PROT_SLOT,
2123 			  PrefixInfo::INTERNAL },
2124             { "smb:", 0, INET_PROT_SMB, PrefixInfo::OFFICIAL },
2125 			{ "staroffice.component:", ".component:", INET_PROT_COMPONENT,
2126 			  PrefixInfo::EXTERNAL },
2127 			{ "staroffice.db:", "db:", INET_PROT_DB, PrefixInfo::EXTERNAL },
2128 			{ "staroffice.factory:", "private:factory/",
2129 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2130 			{ "staroffice.helpid:", "private:helpid/", INET_PROT_PRIV_SOFFICE,
2131 			  PrefixInfo::EXTERNAL },
2132 			{ "staroffice.hid:", "hid:", INET_PROT_HID,
2133 			  PrefixInfo::EXTERNAL },
2134 			{ "staroffice.java:", "private:java/", INET_PROT_PRIV_SOFFICE,
2135 			  PrefixInfo::EXTERNAL },
2136 			{ "staroffice.macro:", "macro:", INET_PROT_MACRO,
2137 			  PrefixInfo::EXTERNAL },
2138 			{ "staroffice.out:", "out:", INET_PROT_OUT,
2139 			  PrefixInfo::EXTERNAL },
2140 			{ "staroffice.pop3:", "pop3:", INET_PROT_POP3,
2141 			  PrefixInfo::EXTERNAL },
2142 			{ "staroffice.private:", "private:", INET_PROT_PRIV_SOFFICE,
2143 			  PrefixInfo::EXTERNAL },
2144 			{ "staroffice.searchfolder:", "private:searchfolder:",
2145 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2146 			{ "staroffice.slot:", "slot:", INET_PROT_SLOT,
2147 			  PrefixInfo::EXTERNAL },
2148 			{ "staroffice.trashcan:", "private:trashcan:",
2149 			  INET_PROT_PRIV_SOFFICE, PrefixInfo::EXTERNAL },
2150 			{ "staroffice.uno:", ".uno:", INET_PROT_UNO,
2151 			  PrefixInfo::EXTERNAL },
2152 			{ "staroffice.vim:", "vim:", INET_PROT_VIM,
2153 			  PrefixInfo::EXTERNAL },
2154 			{ "staroffice:", "private:", INET_PROT_PRIV_SOFFICE,
2155 			  PrefixInfo::EXTERNAL },
2156             { "telnet:", 0, INET_PROT_TELNET, PrefixInfo::OFFICIAL },
2157 			{ "vim:", "staroffice.vim:", INET_PROT_VIM,
2158 			  PrefixInfo::INTERNAL },
2159 			{ "vnd.sun.star.cmd:", 0, INET_PROT_VND_SUN_STAR_CMD,
2160 			  PrefixInfo::OFFICIAL },
2161 			{ "vnd.sun.star.expand:", 0, INET_PROT_VND_SUN_STAR_EXPAND,
2162 			  PrefixInfo::OFFICIAL },
2163 			{ "vnd.sun.star.help:", 0, INET_PROT_VND_SUN_STAR_HELP,
2164 			  PrefixInfo::OFFICIAL },
2165 			{ "vnd.sun.star.hier:", 0, INET_PROT_VND_SUN_STAR_HIER,
2166 			  PrefixInfo::OFFICIAL },
2167 			{ "vnd.sun.star.odma:", 0, INET_PROT_VND_SUN_STAR_ODMA,
2168 			  PrefixInfo::OFFICIAL },
2169 			{ "vnd.sun.star.pkg:", 0, INET_PROT_VND_SUN_STAR_PKG,
2170 			  PrefixInfo::OFFICIAL },
2171             { "vnd.sun.star.tdoc:", 0, INET_PROT_VND_SUN_STAR_TDOC,
2172               PrefixInfo::OFFICIAL },
2173 			{ "vnd.sun.star.webdav:", 0, INET_PROT_VND_SUN_STAR_WEBDAV,
2174 			  PrefixInfo::OFFICIAL } };
2175 	PrefixInfo const * pFirst = aMap + 1;
2176 	PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2177 	PrefixInfo const * pMatch = 0;
2178 	sal_Unicode const * pMatched = rBegin;
2179 	sal_Unicode const * p = rBegin;
2180 	sal_Int32 i = 0;
2181 	for (; pFirst < pLast; ++i)
2182 	{
2183 		if (pFirst->m_pPrefix[i] == '\0')
2184 		{
2185 			pMatch = pFirst++;
2186 			pMatched = p;
2187 		}
2188 		if (p >= pEnd)
2189 			break;
2190 		sal_uInt32 nChar = INetMIME::toLowerCase(*p++);
2191 		while (pFirst <= pLast && sal_uChar(pFirst->m_pPrefix[i]) < nChar)
2192 			++pFirst;
2193 		while (pFirst <= pLast && sal_uChar(pLast->m_pPrefix[i]) > nChar)
2194 			--pLast;
2195 	}
2196 	if (pFirst == pLast)
2197 	{
2198 		sal_Char const * q = pFirst->m_pPrefix + i;
2199 		while (p < pEnd && *q != '\0'
2200 			   && INetMIME::toLowerCase(*p) == sal_uChar(*q))
2201 		{
2202 			++p;
2203 			++q;
2204 		}
2205 		if (*q == '\0')
2206 		{
2207 			rBegin = p;
2208 			return pFirst;
2209 		}
2210 	}
2211 	rBegin = pMatched;
2212 	return pMatch;
2213 }
2214 
2215 //============================================================================
getAuthorityBegin() const2216 sal_Int32 INetURLObject::getAuthorityBegin() const
2217 {
2218 	DBG_ASSERT(getSchemeInfo().m_bAuthority,
2219 			   "INetURLObject::getAuthority(): Bad scheme");
2220 	sal_Int32 nBegin;
2221 	if (m_aUser.isPresent())
2222 		nBegin = m_aUser.getBegin();
2223 	else if (m_aHost.isPresent())
2224 		nBegin = m_aHost.getBegin();
2225 	else
2226 		nBegin = m_aPath.getBegin();
2227 	nBegin -= RTL_CONSTASCII_LENGTH("//");
2228 	DBG_ASSERT(m_aAbsURIRef.charAt(nBegin) == '/'
2229 			   && m_aAbsURIRef.charAt(nBegin + 1) == '/',
2230 			   "INetURLObject::getAuthority(): Bad authority");
2231     return nBegin;
2232 }
2233 
2234 //============================================================================
getAuthority() const2235 INetURLObject::SubString INetURLObject::getAuthority() const
2236 {
2237     sal_Int32 nBegin = getAuthorityBegin();
2238 	sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2239 					  m_aHost.isPresent() ? m_aHost.getEnd() :
2240 					  m_aAuth.isPresent() ? m_aAuth.getEnd() :
2241 		              m_aUser.isPresent() ? m_aUser.getEnd() :
2242 		                  nBegin + RTL_CONSTASCII_LENGTH("//");
2243 	return SubString(nBegin, nEnd - nBegin);
2244 }
2245 
2246 //============================================================================
setUser(rtl::OUString const & rTheUser,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2247 bool INetURLObject::setUser(rtl::OUString const & rTheUser,
2248 							bool bOctets, EncodeMechanism eMechanism,
2249 							rtl_TextEncoding eCharset)
2250 {
2251 	if (
2252          !getSchemeInfo().m_bUser ||
2253          (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
2254        )
2255     {
2256 		return false;
2257     }
2258 
2259 	rtl::OUString aNewUser(encodeText(rTheUser, bOctets,
2260 								  m_eScheme == INET_PROT_IMAP ?
2261 									  PART_IMAP_ACHAR :
2262 								  m_eScheme == INET_PROT_VIM ?
2263 									  PART_VIM :
2264 									  PART_USER_PASSWORD,
2265 								  getEscapePrefix(), eMechanism, eCharset,
2266 								  false));
2267 	sal_Int32 nDelta;
2268 	if (m_aUser.isPresent())
2269 		nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2270 	else if (m_aHost.isPresent())
2271 	{
2272 		m_aAbsURIRef.insert(m_aHost.getBegin(), sal_Unicode('@'));
2273 		nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2274 	}
2275 	else if (getSchemeInfo().m_bHost)
2276 		return false;
2277 	else
2278 		nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2279 	m_aAuth += nDelta;
2280 	m_aHost += nDelta;
2281 	m_aPort += nDelta;
2282 	m_aPath += nDelta;
2283 	m_aQuery += nDelta;
2284 	m_aFragment += nDelta;
2285 	return true;
2286 }
2287 
2288 namespace
2289 {
lcl_Erase(rtl::OUStringBuffer & rBuf,sal_Int32 index,sal_Int32 count)2290 	void lcl_Erase(rtl::OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2291 	{
2292 		rtl::OUString sTemp(rBuf.makeStringAndClear());
2293 		rBuf.append(sTemp.replaceAt(index, count, rtl::OUString()));
2294 	}
2295 }
2296 
2297 //============================================================================
clearPassword()2298 bool INetURLObject::clearPassword()
2299 {
2300 	if (!getSchemeInfo().m_bPassword)
2301 		return false;
2302 	if (m_aAuth.isPresent())
2303 	{
2304 		lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2305 			m_aAuth.getLength() + 1);
2306 		sal_Int32 nDelta = m_aAuth.clear() - 1;
2307 		m_aHost += nDelta;
2308 		m_aPort += nDelta;
2309 		m_aPath += nDelta;
2310 		m_aQuery += nDelta;
2311 		m_aFragment += nDelta;
2312 	}
2313 	return true;
2314 }
2315 
2316 //============================================================================
setPassword(rtl::OUString const & rThePassword,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2317 bool INetURLObject::setPassword(rtl::OUString const & rThePassword,
2318 								bool bOctets, EncodeMechanism eMechanism,
2319 								rtl_TextEncoding eCharset)
2320 {
2321 	if (!getSchemeInfo().m_bPassword)
2322 		return false;
2323 	rtl::OUString aNewAuth(encodeText(rThePassword, bOctets,
2324 								  m_eScheme == INET_PROT_VIM ?
2325 									  PART_VIM : PART_USER_PASSWORD,
2326 								  getEscapePrefix(), eMechanism, eCharset,
2327 								  false));
2328 	sal_Int32 nDelta;
2329 	if (m_aAuth.isPresent())
2330 		nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2331 	else if (m_aUser.isPresent())
2332 	{
2333 		m_aAbsURIRef.insert(m_aUser.getEnd(), sal_Unicode(':'));
2334 		nDelta
2335 			= m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2336 	}
2337 	else if (m_aHost.isPresent())
2338 	{
2339 		m_aAbsURIRef.insert(m_aHost.getBegin(),
2340 			rtl::OUString::createFromAscii(":@"));
2341 		m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aHost.getBegin());
2342 		nDelta
2343 			= m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2344 	}
2345 	else if (getSchemeInfo().m_bHost)
2346 		return false;
2347 	else
2348 	{
2349 		m_aAbsURIRef.insert(m_aPath.getBegin(), sal_Unicode(':'));
2350 		m_aUser.set(m_aAbsURIRef, rtl::OUString(), m_aPath.getBegin());
2351 		nDelta
2352 			= m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2353 	}
2354 	m_aHost += nDelta;
2355 	m_aPort += nDelta;
2356 	m_aPath += nDelta;
2357 	m_aQuery += nDelta;
2358 	m_aFragment += nDelta;
2359 	return true;
2360 }
2361 
2362 //============================================================================
2363 // static
parseHost(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,rtl::OUString & rCanonic)2364 bool INetURLObject::parseHost(
2365     sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2366     rtl::OUString & rCanonic)
2367 {
2368     // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2369     // IPv4 address directly follows the abbreviating "::".  The ABNF in
2370     // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2371     // mentions "::13:1.68.3".  This algorithm accepts both variants:
2372 	enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2373 				 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2374 				 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2375 				 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2376 				 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2377 				 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2378 				 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2379 				 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2380 	rtl::OUStringBuffer aTheCanonic;
2381 	sal_uInt32 nNumber = 0;
2382 	int nDigits = 0;
2383 	int nOctets = 0;
2384 	State eState = STATE_INITIAL;
2385 	sal_Unicode const * p = rBegin;
2386 	for (; p != pEnd; ++p)
2387 		switch (eState)
2388 		{
2389 			case STATE_INITIAL:
2390 				if (*p == '[')
2391 				{
2392 					aTheCanonic.append(sal_Unicode('['));
2393 					eState = STATE_IP6;
2394 				}
2395 				else if (INetMIME::isAlpha(*p))
2396 					eState = STATE_TOPLABEL;
2397 				else if (INetMIME::isDigit(*p))
2398 				{
2399 					nNumber = INetMIME::getWeight(*p);
2400 					nDigits = 1;
2401 					nOctets = 1;
2402 					eState = STATE_IP4;
2403 				}
2404 				else
2405 					goto done;
2406 				break;
2407 
2408 			case STATE_LABEL:
2409 				if (*p == '.')
2410 					eState = STATE_LABEL_DOT;
2411 				else if (*p == '-')
2412 					eState = STATE_LABEL_HYPHEN;
2413 				else if (!INetMIME::isAlphanumeric(*p))
2414 					goto done;
2415 				break;
2416 
2417 			case STATE_LABEL_HYPHEN:
2418 				if (INetMIME::isAlphanumeric(*p))
2419 					eState = STATE_LABEL;
2420 				else if (*p != '-')
2421 					goto done;
2422 				break;
2423 
2424 			case STATE_LABEL_DOT:
2425 				if (INetMIME::isAlpha(*p))
2426 					eState = STATE_TOPLABEL;
2427 				else if (INetMIME::isDigit(*p))
2428 					eState = STATE_LABEL;
2429 				else
2430 					goto done;
2431 				break;
2432 
2433 			case STATE_TOPLABEL:
2434 				if (*p == '.')
2435 					eState = STATE_TOPLABEL_DOT;
2436 				else if (*p == '-')
2437 					eState = STATE_TOPLABEL_HYPHEN;
2438 				else if (!INetMIME::isAlphanumeric(*p))
2439 					goto done;
2440 				break;
2441 
2442 			case STATE_TOPLABEL_HYPHEN:
2443 				if (INetMIME::isAlphanumeric(*p))
2444 					eState = STATE_TOPLABEL;
2445 				else if (*p != '-')
2446 					goto done;
2447 				break;
2448 
2449 			case STATE_TOPLABEL_DOT:
2450 				if (INetMIME::isAlpha(*p))
2451 					eState = STATE_TOPLABEL;
2452 				else if (INetMIME::isDigit(*p))
2453 					eState = STATE_LABEL;
2454 				else
2455 					goto done;
2456 				break;
2457 
2458 			case STATE_IP4:
2459 				if (*p == '.')
2460 					if (nOctets < 4)
2461 					{
2462 						aTheCanonic.append(
2463 							rtl::OUString::valueOf(sal_Int32(nNumber)));
2464 						aTheCanonic.append(sal_Unicode('.'));
2465 						++nOctets;
2466 						eState = STATE_IP4_DOT;
2467 					}
2468 					else
2469 						eState = STATE_LABEL_DOT;
2470 				else if (*p == '-')
2471 					eState = STATE_LABEL_HYPHEN;
2472 				else if (INetMIME::isAlpha(*p))
2473 					eState = STATE_LABEL;
2474 				else if (INetMIME::isDigit(*p))
2475 					if (nDigits < 3)
2476 					{
2477 						nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2478 						++nDigits;
2479 					}
2480 					else
2481 						eState = STATE_LABEL;
2482 				else
2483 					goto done;
2484 				break;
2485 
2486 			case STATE_IP4_DOT:
2487 				if (INetMIME::isAlpha(*p))
2488 					eState = STATE_TOPLABEL;
2489 				else if (INetMIME::isDigit(*p))
2490 				{
2491 					nNumber = INetMIME::getWeight(*p);
2492 					nDigits = 1;
2493 					eState = STATE_IP4;
2494 				}
2495 				else
2496 					goto done;
2497 				break;
2498 
2499 			case STATE_IP6:
2500 				if (*p == ':')
2501 					eState = STATE_IP6_COLON;
2502 				else if (INetMIME::isHexDigit(*p))
2503 				{
2504 					nNumber = INetMIME::getHexWeight(*p);
2505 					nDigits = 1;
2506 					eState = STATE_IP6_HEXSEQ1;
2507 				}
2508 				else
2509 					goto done;
2510 				break;
2511 
2512 			case STATE_IP6_COLON:
2513 				if (*p == ':')
2514 				{
2515 					aTheCanonic.appendAscii(RTL_CONSTASCII_STRINGPARAM("::"));
2516 					eState = STATE_IP6_2COLON;
2517 				}
2518 				else
2519 					goto done;
2520 				break;
2521 
2522 			case STATE_IP6_2COLON:
2523 				if (*p == ']')
2524 					eState = STATE_IP6_DONE;
2525 				else if (*p == ':')
2526 				{
2527 					aTheCanonic.append(sal_Unicode(':'));
2528 					eState = STATE_IP6_3COLON;
2529 				}
2530                 else if (INetMIME::isDigit(*p))
2531                 {
2532                     nNumber = INetMIME::getWeight(*p);
2533                     nDigits = 1;
2534                     eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2535                 }
2536 				else if (INetMIME::isHexDigit(*p))
2537 				{
2538 					nNumber = INetMIME::getHexWeight(*p);
2539 					nDigits = 1;
2540 					eState = STATE_IP6_HEXSEQ2;
2541 				}
2542 				else
2543 					goto done;
2544 				break;
2545 
2546 			case STATE_IP6_3COLON:
2547 				if (INetMIME::isDigit(*p))
2548 				{
2549 					nNumber = INetMIME::getWeight(*p);
2550 					nDigits = 1;
2551 					nOctets = 1;
2552 					eState = STATE_IP6_IP4;
2553 				}
2554 				else
2555 					goto done;
2556 				break;
2557 
2558 			case STATE_IP6_HEXSEQ1:
2559 				if (*p == ']')
2560 				{
2561 					aTheCanonic.append(
2562 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2563 					eState = STATE_IP6_DONE;
2564 				}
2565 				else if (*p == ':')
2566 				{
2567 					aTheCanonic.append(
2568 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2569 					aTheCanonic.append(sal_Unicode(':'));
2570 					eState = STATE_IP6_HEXSEQ1_COLON;
2571 				}
2572 				else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2573 				{
2574 					nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2575 					++nDigits;
2576 				}
2577 				else
2578 					goto done;
2579 				break;
2580 
2581 			case STATE_IP6_HEXSEQ1_COLON:
2582 				if (*p == ':')
2583 				{
2584 					aTheCanonic.append(sal_Unicode(':'));
2585 					eState = STATE_IP6_2COLON;
2586 				}
2587 				else if (INetMIME::isDigit(*p))
2588 				{
2589 					nNumber = INetMIME::getWeight(*p);
2590 					nDigits = 1;
2591 					eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2592 				}
2593 				else if (INetMIME::isHexDigit(*p))
2594 				{
2595 					nNumber = INetMIME::getHexWeight(*p);
2596 					nDigits = 1;
2597 					eState = STATE_IP6_HEXSEQ1;
2598 				}
2599 				else
2600 					goto done;
2601 				break;
2602 
2603 			case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2604 				if (*p == ']')
2605 				{
2606 					aTheCanonic.append(
2607 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2608 					eState = STATE_IP6_DONE;
2609 				}
2610 				else if (*p == ':')
2611 				{
2612 					aTheCanonic.append(
2613 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2614 					aTheCanonic.append(sal_Unicode(':'));
2615 					eState = STATE_IP6_HEXSEQ1_COLON;
2616 				}
2617 				else if (*p == '.')
2618 				{
2619 					nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2620 								  + (nNumber & 15);
2621 					aTheCanonic.append(
2622 						rtl::OUString::valueOf(sal_Int32(nNumber)));
2623 					aTheCanonic.append(sal_Unicode('.'));
2624 					nOctets = 2;
2625 					eState = STATE_IP6_IP4_DOT;
2626 				}
2627 				else if (INetMIME::isDigit(*p) && nDigits < 3)
2628 				{
2629 					nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2630 					++nDigits;
2631 				}
2632 				else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2633 				{
2634 					nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2635 					++nDigits;
2636 					eState = STATE_IP6_HEXSEQ1;
2637 				}
2638 				else
2639 					goto done;
2640 				break;
2641 
2642 			case STATE_IP6_HEXSEQ2:
2643 				if (*p == ']')
2644 				{
2645 					aTheCanonic.append(
2646 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2647 					eState = STATE_IP6_DONE;
2648 				}
2649 				else if (*p == ':')
2650 				{
2651 					aTheCanonic.append(
2652 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2653 					aTheCanonic.append(sal_Unicode(':'));
2654 					eState = STATE_IP6_HEXSEQ2_COLON;
2655 				}
2656 				else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2657 				{
2658 					nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2659 					++nDigits;
2660 				}
2661 				else
2662 					goto done;
2663 				break;
2664 
2665 			case STATE_IP6_HEXSEQ2_COLON:
2666 				if (INetMIME::isDigit(*p))
2667 				{
2668 					nNumber = INetMIME::getWeight(*p);
2669 					nDigits = 1;
2670 					eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2671 				}
2672 				else if (INetMIME::isHexDigit(*p))
2673 				{
2674 					nNumber = INetMIME::getHexWeight(*p);
2675 					nDigits = 1;
2676 					eState = STATE_IP6_HEXSEQ2;
2677 				}
2678 				else
2679 					goto done;
2680 				break;
2681 
2682 			case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2683 				if (*p == ']')
2684 				{
2685 					aTheCanonic.append(
2686 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2687 					eState = STATE_IP6_DONE;
2688 				}
2689 				else if (*p == ':')
2690 				{
2691 					aTheCanonic.append(
2692 						rtl::OUString::valueOf(sal_Int32(nNumber), 16));
2693 					aTheCanonic.append(sal_Unicode(':'));
2694 					eState = STATE_IP6_HEXSEQ2_COLON;
2695 				}
2696 				else if (*p == '.')
2697 				{
2698 					nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2699 								  + (nNumber & 15);
2700 					aTheCanonic.append(
2701 						rtl::OUString::valueOf(sal_Int32(nNumber)));
2702 					aTheCanonic.append(sal_Unicode('.'));
2703 					nOctets = 2;
2704 					eState = STATE_IP6_IP4_DOT;
2705 				}
2706 				else if (INetMIME::isDigit(*p) && nDigits < 3)
2707 				{
2708 					nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2709 					++nDigits;
2710 				}
2711 				else if (INetMIME::isHexDigit(*p) && nDigits < 4)
2712 				{
2713 					nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2714 					++nDigits;
2715 					eState = STATE_IP6_HEXSEQ2;
2716 				}
2717 				else
2718 					goto done;
2719 				break;
2720 
2721 			case STATE_IP6_IP4:
2722 				if (*p == ']')
2723 					if (nOctets == 4)
2724 					{
2725 						aTheCanonic.append(
2726 							rtl::OUString::valueOf(sal_Int32(nNumber)));
2727 						eState = STATE_IP6_DONE;
2728 					}
2729 					else
2730 						goto done;
2731 				else if (*p == '.')
2732 					if (nOctets < 4)
2733 					{
2734 						aTheCanonic.append(
2735 							rtl::OUString::valueOf(sal_Int32(nNumber)));
2736 						aTheCanonic.append(sal_Unicode('.'));
2737 						++nOctets;
2738 						eState = STATE_IP6_IP4_DOT;
2739 					}
2740 					else
2741 						goto done;
2742 				else if (INetMIME::isDigit(*p) && nDigits < 3)
2743 				{
2744 					nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2745 					++nDigits;
2746 				}
2747 				else
2748 					goto done;
2749 				break;
2750 
2751 			case STATE_IP6_IP4_DOT:
2752 				if (INetMIME::isDigit(*p))
2753 				{
2754 					nNumber = INetMIME::getWeight(*p);
2755 					nDigits = 1;
2756 					eState = STATE_IP6_IP4;
2757 				}
2758 				else
2759 					goto done;
2760 				break;
2761 
2762             case STATE_IP6_DONE:
2763                 goto done;
2764 		}
2765  done:
2766 	switch (eState)
2767 	{
2768 		case STATE_LABEL:
2769 		case STATE_TOPLABEL:
2770 		case STATE_TOPLABEL_DOT:
2771 			aTheCanonic.setLength(0);
2772 			aTheCanonic.append(rBegin, p - rBegin);
2773 			rBegin = p;
2774 			rCanonic = aTheCanonic.makeStringAndClear();
2775 			return true;
2776 
2777 		case STATE_IP4:
2778 			if (nOctets == 4)
2779 			{
2780 				aTheCanonic.append(
2781 					rtl::OUString::valueOf(sal_Int32(nNumber)));
2782 				rBegin = p;
2783 				rCanonic = aTheCanonic.makeStringAndClear();
2784 				return true;
2785 			}
2786 			return false;
2787 
2788 		case STATE_IP6_DONE:
2789 			aTheCanonic.append(sal_Unicode(']'));
2790 			rBegin = p;
2791 			rCanonic = aTheCanonic.makeStringAndClear();
2792 			return true;
2793 
2794         default:
2795             return false;
2796 	}
2797 }
2798 
2799 //============================================================================
2800 // static
parseHostOrNetBiosName(sal_Unicode const * pBegin,sal_Unicode const * pEnd,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bNetBiosName,rtl::OUStringBuffer * pCanonic)2801 bool INetURLObject::parseHostOrNetBiosName(
2802     sal_Unicode const * pBegin, sal_Unicode const * pEnd, bool bOctets,
2803     EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2804     rtl::OUStringBuffer* pCanonic)
2805 {
2806 	rtl::OUString aTheCanonic;
2807     if (pBegin < pEnd)
2808     {
2809         sal_Unicode const * p = pBegin;
2810         if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2811         {
2812             if (bNetBiosName)
2813             {
2814                 rtl::OUStringBuffer buf;
2815                 while (pBegin < pEnd)
2816                 {
2817                     EscapeType eEscapeType;
2818                     sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, '%',
2819                                                  eMechanism, eCharset,
2820                                                  eEscapeType);
2821                     if (!INetMIME::isVisible(nUTF32))
2822                         return false;
2823                     if (!INetMIME::isAlphanumeric(nUTF32))
2824                         switch (nUTF32)
2825                         {
2826                         case '"':
2827                         case '*':
2828                         case '+':
2829                         case ',':
2830                         case '/':
2831                         case ':':
2832                         case ';':
2833                         case '<':
2834                         case '=':
2835                         case '>':
2836                         case '?':
2837                         case '[':
2838                         case '\\':
2839                         case ']':
2840                         case '`':
2841                         case '|':
2842                             return false;;
2843                         }
2844                     if (pCanonic != NULL) {
2845                         appendUCS4(
2846                             buf, nUTF32, eEscapeType, bOctets, PART_URIC, '%',
2847                             eCharset, true);
2848                     }
2849                 }
2850                 aTheCanonic = buf.makeStringAndClear();
2851             }
2852             else
2853                 return false;
2854         }
2855     }
2856     if (pCanonic != NULL) {
2857         *pCanonic = aTheCanonic;
2858     }
2859     return true;
2860 }
2861 
2862 //============================================================================
2863 // static
encodeHostPort(rtl::OUString const & rTheHostPort,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2864 rtl::OUString INetURLObject::encodeHostPort(rtl::OUString const & rTheHostPort,
2865 										bool bOctets,
2866 										EncodeMechanism eMechanism,
2867 										rtl_TextEncoding eCharset)
2868 {
2869 	sal_Int32 nPort = rTheHostPort.getLength();
2870 	if (nPort != 0)
2871 	{
2872 		sal_Int32 i = nPort - 1;
2873 		while (i != 0 && INetMIME::isDigit(rTheHostPort.getStr()[i]))
2874 			--i;
2875 		if (rTheHostPort.getStr()[i] == ':')
2876 			nPort = i;
2877 	}
2878 	rtl::OUString aResult(encodeText(rTheHostPort.copy(0, nPort), bOctets,
2879 								 PART_HOST_EXTRA, '%', eMechanism, eCharset,
2880 								 true));
2881 	aResult += rTheHostPort.copy(nPort);
2882 	return aResult;
2883 }
2884 
2885 //============================================================================
setHost(rtl::OUString const & rTheHost,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)2886 bool INetURLObject::setHost(rtl::OUString const & rTheHost, bool bOctets,
2887 							EncodeMechanism eMechanism,
2888 							rtl_TextEncoding eCharset)
2889 {
2890 	if (!getSchemeInfo().m_bHost)
2891 		return false;
2892 	rtl::OUStringBuffer aSynHost(rTheHost);
2893     bool bNetBiosName = false;
2894 	switch (m_eScheme)
2895 	{
2896 		case INET_PROT_FILE:
2897 			{
2898 				rtl::OUString sTemp(aSynHost);
2899 				if (sTemp.equalsIgnoreAsciiCaseAsciiL(
2900 					RTL_CONSTASCII_STRINGPARAM("localhost")))
2901 				{
2902 					aSynHost.setLength(0);
2903 				}
2904             	bNetBiosName = true;
2905 			}
2906 			break;
2907 		case INET_PROT_LDAP:
2908 			if (aSynHost.getLength() == 0 && m_aPort.isPresent())
2909 				return false;
2910 			break;
2911 
2912 		default:
2913 			if (aSynHost.getLength() == 0)
2914 				return false;
2915 			break;
2916 	}
2917     if (!parseHostOrNetBiosName(
2918             aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2919             bOctets, eMechanism, eCharset, bNetBiosName, &aSynHost))
2920         return false;
2921 	sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2922 	m_aPort += nDelta;
2923 	m_aPath += nDelta;
2924 	m_aQuery += nDelta;
2925 	m_aFragment += nDelta;
2926 	return true;
2927 }
2928 
2929 //============================================================================
2930 // static
parsePath(INetProtocol eScheme,sal_Unicode const ** pBegin,sal_Unicode const * pEnd,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bSkippedInitialSlash,sal_uInt32 nSegmentDelimiter,sal_uInt32 nAltSegmentDelimiter,sal_uInt32 nQueryDelimiter,sal_uInt32 nFragmentDelimiter,rtl::OUStringBuffer & rSynPath)2931 bool INetURLObject::parsePath(INetProtocol eScheme,
2932                               sal_Unicode const ** pBegin,
2933 							  sal_Unicode const * pEnd,
2934 							  bool bOctets,
2935 							  EncodeMechanism eMechanism,
2936 							  rtl_TextEncoding eCharset,
2937 							  bool bSkippedInitialSlash,
2938 							  sal_uInt32 nSegmentDelimiter,
2939 							  sal_uInt32 nAltSegmentDelimiter,
2940 							  sal_uInt32 nQueryDelimiter,
2941 							  sal_uInt32 nFragmentDelimiter,
2942 							  rtl::OUStringBuffer &rSynPath)
2943 {
2944 	DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2945 
2946 	sal_Unicode const * pPos = *pBegin;
2947 	rtl::OUStringBuffer aTheSynPath;
2948 
2949 	switch (eScheme)
2950 	{
2951 		case INET_PROT_NOT_VALID:
2952 			return false;
2953 
2954 		case INET_PROT_FTP:
2955 		case INET_PROT_IMAP:
2956 			if (pPos < pEnd && *pPos != '/')
2957 				return false;
2958 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
2959 			{
2960 				EscapeType eEscapeType;
2961 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2962 											 '%', eMechanism,
2963 											 eCharset, eEscapeType);
2964 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2965 						   PART_HTTP_PATH, '%', eCharset, true);
2966 			}
2967 			if (aTheSynPath.getLength() == 0)
2968 				aTheSynPath.append(sal_Unicode('/'));
2969 			break;
2970 
2971 		case INET_PROT_HTTP:
2972 		case INET_PROT_VND_SUN_STAR_WEBDAV:
2973 		case INET_PROT_HTTPS:
2974         case INET_PROT_SMB:
2975 			if (pPos < pEnd && *pPos != '/')
2976 				return false;
2977 			while (pPos < pEnd && *pPos != nQueryDelimiter
2978 				   && *pPos != nFragmentDelimiter)
2979 			{
2980 				EscapeType eEscapeType;
2981 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
2982 											 '%', eMechanism,
2983 											 eCharset, eEscapeType);
2984 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
2985 						   PART_HTTP_PATH, '%', eCharset, true);
2986 			}
2987 			if (aTheSynPath.getLength() == 0)
2988 				aTheSynPath.append(sal_Unicode('/'));
2989 			break;
2990 
2991 		case INET_PROT_FILE:
2992 		{
2993 			if (bSkippedInitialSlash)
2994 				aTheSynPath.append(sal_Unicode('/'));
2995 			else if (pPos < pEnd
2996 					 && *pPos != nSegmentDelimiter
2997 					 && *pPos != nAltSegmentDelimiter)
2998 				return false;
2999 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3000 			{
3001 				EscapeType eEscapeType;
3002 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3003 											 '%', eMechanism,
3004 											 eCharset, eEscapeType);
3005 				if (eEscapeType == ESCAPE_NO)
3006                 {
3007 					if (nUTF32 == nSegmentDelimiter
3008 						|| nUTF32 == nAltSegmentDelimiter)
3009 					{
3010 						aTheSynPath.append(sal_Unicode('/'));
3011 						continue;
3012 					}
3013 					else if (nUTF32 == '|'
3014 							 && (pPos == pEnd
3015 								 || *pPos == nFragmentDelimiter
3016 								 || *pPos == nSegmentDelimiter
3017 								 || *pPos == nAltSegmentDelimiter)
3018 							 && aTheSynPath.getLength() == 2
3019 							 && INetMIME::isAlpha(aTheSynPath.charAt(1)))
3020 					{
3021 						// A first segment of <ALPHA "|"> is translated to
3022 						// <ALPHA ":">:
3023 						aTheSynPath.append(sal_Unicode(':'));
3024 						continue;
3025 					}
3026                 }
3027 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3028 						   PART_PCHAR, '%', eCharset, true);
3029 			}
3030 			if (aTheSynPath.getLength() == 0)
3031 				aTheSynPath.append(sal_Unicode('/'));
3032 			break;
3033 		}
3034 
3035 		case INET_PROT_MAILTO:
3036 			while (pPos < pEnd && *pPos != nQueryDelimiter
3037 				   && *pPos != nFragmentDelimiter)
3038 			{
3039 				EscapeType eEscapeType;
3040 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3041 											 '%', eMechanism,
3042 											 eCharset, eEscapeType);
3043 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3044 						   PART_MAILTO, '%', eCharset, true);
3045 			}
3046 			break;
3047 
3048 		case INET_PROT_NEWS:
3049             if (pPos == pEnd || *pPos == nQueryDelimiter
3050                 || *pPos == nFragmentDelimiter)
3051                 return false;
3052 
3053             // Match <"*">:
3054             if (*pPos == '*'
3055                 && (pEnd - pPos == 1 || pPos[1] == nQueryDelimiter
3056                     || pPos[1] == nFragmentDelimiter))
3057             {
3058                 ++pPos;
3059                 aTheSynPath.append(sal_Unicode('*'));
3060                 break;
3061             }
3062 
3063             // Match <group>:
3064             if (INetMIME::isAlpha(*pPos))
3065                 for (sal_Unicode const * p = pPos + 1;; ++p)
3066                     if (p == pEnd || *p == nQueryDelimiter
3067                         || *p == nFragmentDelimiter)
3068                     {
3069                         aTheSynPath.setLength(0);
3070                         aTheSynPath.append(pPos, p - pPos);
3071                         pPos = p;
3072                         goto done;
3073                     }
3074                     else if (!INetMIME::isAlphanumeric(*p) && *p != '+'
3075                              && *p != '-' && *p != '.' && *p != '_')
3076                         break;
3077 
3078             // Match <article>:
3079             for (;;)
3080             {
3081                 if (pPos == pEnd || *pPos == nQueryDelimiter
3082                     || *pPos == nFragmentDelimiter)
3083                     return false;
3084                 if (*pPos == '@')
3085                     break;
3086                 EscapeType eEscapeType;
3087                 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets, '%',
3088                                              eMechanism, eCharset, eEscapeType);
3089                 appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3090                            PART_NEWS_ARTICLE_LOCALPART, '%', eCharset, true);
3091             }
3092             if (aTheSynPath.getLength() == 0)
3093                 return false;
3094             ++pPos;
3095             aTheSynPath.append(sal_Unicode('@'));
3096             {
3097                 sal_Unicode const * p = pPos;
3098                 while (p < pEnd && *pPos != nQueryDelimiter
3099                        && *pPos != nFragmentDelimiter)
3100                     ++p;
3101                 rtl::OUString aCanonic;
3102                 if (!parseHost(pPos, p, aCanonic))
3103                     return false;
3104                 aTheSynPath.append(aCanonic);
3105             }
3106 
3107         done:
3108             break;
3109 
3110 		case INET_PROT_POP3:
3111 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3112 			{
3113 				EscapeType eEscapeType;
3114 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3115 											 '%', eMechanism,
3116 											 eCharset, eEscapeType);
3117 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3118 						   PART_MESSAGE_ID_PATH, '%', eCharset,
3119 						   true);
3120 			}
3121 			break;
3122 
3123 		case INET_PROT_PRIV_SOFFICE:
3124 		case INET_PROT_SLOT:
3125         case INET_PROT_HID:
3126 		case INET_PROT_MACRO:
3127 		case INET_PROT_UNO:
3128 		case INET_PROT_COMPONENT:
3129 		case INET_PROT_LDAP:
3130 			while (pPos < pEnd && *pPos != nQueryDelimiter
3131 				   && *pPos != nFragmentDelimiter)
3132 			{
3133 				EscapeType eEscapeType;
3134 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3135 											 '%', eMechanism,
3136 											 eCharset, eEscapeType);
3137 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3138 						   PART_PATH_BEFORE_QUERY, '%', eCharset,
3139 						   true);
3140 			}
3141 			break;
3142 
3143 		case INET_PROT_VND_SUN_STAR_HELP:
3144 			if (pPos == pEnd
3145                 || *pPos == nQueryDelimiter
3146                 || *pPos == nFragmentDelimiter)
3147 				aTheSynPath.append(sal_Unicode('/'));
3148 			else
3149 			{
3150 				if (*pPos != '/')
3151 					return false;
3152                 while (pPos < pEnd && *pPos != nQueryDelimiter
3153                        && *pPos != nFragmentDelimiter)
3154                 {
3155                     EscapeType eEscapeType;
3156                     sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3157                                                  '%', eMechanism,
3158                                                  eCharset, eEscapeType);
3159                     appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3160                                PART_HTTP_PATH, '%', eCharset, true);
3161                 }
3162             }
3163 			break;
3164 
3165 		case INET_PROT_JAVASCRIPT:
3166 		case INET_PROT_DATA:
3167 		case INET_PROT_CID:
3168 		case INET_PROT_DB:
3169 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3170 			{
3171 				EscapeType eEscapeType;
3172 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3173 											 '%', eMechanism,
3174 											 eCharset, eEscapeType);
3175 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3176 						   PART_URIC, '%', eCharset, true);
3177 			}
3178 			break;
3179 
3180 		case INET_PROT_OUT:
3181 			if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '~')
3182 				return false;
3183 			aTheSynPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("/~"));
3184 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3185 			{
3186 				EscapeType eEscapeType;
3187 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3188 											 '%', eMechanism,
3189 											 eCharset, eEscapeType);
3190 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3191 						   PART_URIC, '%', eCharset, true);
3192 			}
3193 			break;
3194 
3195 		case INET_PROT_VND_SUN_STAR_HIER:
3196 		case INET_PROT_VND_SUN_STAR_PKG:
3197 			if (pPos < pEnd && *pPos != '/'
3198                 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3199 				return false;
3200 			while (pPos < pEnd && *pPos != nQueryDelimiter
3201 				   && *pPos != nFragmentDelimiter)
3202 			{
3203 				EscapeType eEscapeType;
3204 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3205 											 '%', eMechanism,
3206 											 eCharset, eEscapeType);
3207 				if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3208 					aTheSynPath.append(sal_Unicode('/'));
3209 				else
3210 					appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3211 							   PART_PCHAR, '%', eCharset, false);
3212 			}
3213 			if (aTheSynPath.getLength() == 0)
3214 				aTheSynPath.append(sal_Unicode('/'));
3215 			break;
3216 
3217 		case INET_PROT_VIM:
3218 		{
3219 /* test had to be taken out to make parsePath static; ok since INET_PROT_VIM is
3220    obsolete, anyway
3221 			if (m_aUser.isEmpty())
3222 				return false;
3223 */
3224 			sal_Unicode const * pPathEnd = pPos;
3225 			while (pPathEnd < pEnd && *pPathEnd != nFragmentDelimiter)
3226 				++pPathEnd;
3227 			aTheSynPath.append(sal_Unicode('/'));
3228 			if (pPos == pPathEnd)
3229 				break;
3230 			else if (*pPos++ != '/')
3231 				return false;
3232 			if (pPos == pPathEnd)
3233 				break;
3234 			while (pPos < pPathEnd && *pPos != '/')
3235 			{
3236 				EscapeType eEscapeType;
3237 				sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3238 											 '=', eMechanism,
3239 											 eCharset, eEscapeType);
3240 				appendUCS4(aTheSynPath,
3241 						   eEscapeType == ESCAPE_NO ?
3242 							   INetMIME::toLowerCase(nUTF32) : nUTF32,
3243 						   eEscapeType, bOctets, PART_VIM, '=',
3244 						   eCharset, false);
3245 			}
3246 			bool bInbox;
3247 			rtl::OUString sCompare(aTheSynPath);
3248 			if (sCompare.equalsAscii("/inbox"))
3249 				bInbox = true;
3250 			else if (sCompare.equalsAscii("/newsgroups"))
3251 				bInbox = false;
3252 			else
3253 				return false;
3254 			aTheSynPath.append(sal_Unicode('/'));
3255 			if (pPos == pPathEnd)
3256 				break;
3257 			else if (*pPos++ != '/')
3258 				return false;
3259 			if (!bInbox)
3260 			{
3261 				bool bEmpty = true;
3262 				while (pPos < pPathEnd && *pPos != '/')
3263 				{
3264 					EscapeType eEscapeType;
3265 					sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3266 												 '=', eMechanism,
3267 												 eCharset, eEscapeType);
3268 					appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3269 							   PART_VIM, '=', eCharset, false);
3270 					bEmpty = false;
3271 				}
3272 				if (bEmpty)
3273 					return false;
3274 				aTheSynPath.append(sal_Unicode('/'));
3275 				if (pPos == pPathEnd)
3276 					break;
3277 				else if (*pPos++ != '/')
3278 					return false;
3279 			}
3280 			bool bEmpty = true;
3281 			while (pPos < pPathEnd && *pPos != ':')
3282 			{
3283 				EscapeType eEscapeType;
3284 				sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3285 											 '=', eMechanism,
3286 											 eCharset, eEscapeType);
3287 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3288 						   PART_VIM, '=', eCharset, false);
3289 				bEmpty = false;
3290 			}
3291 			if (bEmpty)
3292 				return false;
3293 			if (pPos == pPathEnd)
3294 				break;
3295 			else if (*pPos++ != ':')
3296 				return false;
3297 			aTheSynPath.append(sal_Unicode(':'));
3298 			for (int i = 0; i < 3; ++i)
3299 			{
3300 				if (i != 0)
3301 				{
3302 					if (pPos == pPathEnd || *pPos++ != '.')
3303 						return false;
3304 					aTheSynPath.append(sal_Unicode('.'));
3305 				}
3306 				bEmpty = true;
3307 				while (pPos < pPathEnd && *pPos != '.')
3308 				{
3309 					EscapeType eEscapeType;
3310 					sal_uInt32 nUTF32 = getUTF32(pPos, pPathEnd, bOctets,
3311 												 '=', eMechanism,
3312 												 eCharset, eEscapeType);
3313 					if (!INetMIME::isDigit(nUTF32))
3314 						return false;
3315 					aTheSynPath.append(sal_Unicode(nUTF32));
3316 					bEmpty = false;
3317 				}
3318 				if (bEmpty)
3319 					return false;
3320 			}
3321 			if (pPos != pPathEnd)
3322 				return false;
3323 			break;
3324 		}
3325 
3326 		case INET_PROT_VND_SUN_STAR_CMD:
3327 		case INET_PROT_VND_SUN_STAR_EXPAND:
3328 		{
3329 			if (pPos == pEnd || *pPos == nFragmentDelimiter)
3330 				return false;
3331 			Part ePart = PART_URIC_NO_SLASH;
3332 			while (pPos != pEnd && *pPos != nFragmentDelimiter)
3333 			{
3334 				EscapeType eEscapeType;
3335 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3336 											 '%', eMechanism,
3337 											 eCharset, eEscapeType);
3338 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets, ePart,
3339 						   '%', eCharset, true);
3340 				ePart = PART_URIC;
3341 			}
3342 			break;
3343 		}
3344 
3345         case INET_PROT_VND_SUN_STAR_ODMA:
3346 			if (pPos < pEnd)
3347             {
3348                 if (*pPos == '/')
3349                     ++pPos;
3350                 else
3351                     return false;
3352             }
3353             aTheSynPath.append(sal_Unicode('/'));
3354 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3355 			{
3356 				EscapeType eEscapeType;
3357 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3358 											 '%', eMechanism,
3359 											 eCharset, eEscapeType);
3360 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3361 						   PART_URIC_NO_SLASH, '%', eCharset, true);
3362 			}
3363 			break;
3364 
3365         case INET_PROT_TELNET:
3366             if (pPos < pEnd)
3367             {
3368                 if (*pPos != '/' || pEnd - pPos > 1)
3369                     return false;
3370                 ++pPos;
3371             }
3372             aTheSynPath.append(sal_Unicode('/'));
3373             break;
3374 
3375 		case INET_PROT_VND_SUN_STAR_TDOC:
3376 			if (pPos == pEnd || *pPos != '/')
3377 				return false;
3378 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3379 			{
3380 				EscapeType eEscapeType;
3381 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3382 											 '%', eMechanism,
3383 											 eCharset, eEscapeType);
3384 				if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
3385 					aTheSynPath.append(sal_Unicode('/'));
3386 				else
3387 					appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3388 							   PART_PCHAR, '%', eCharset, false);
3389 			}
3390 			break;
3391 
3392         case INET_PROT_GENERIC:
3393 			while (pPos < pEnd && *pPos != nFragmentDelimiter)
3394 			{
3395 				EscapeType eEscapeType;
3396 				sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, bOctets,
3397 											 '%', eMechanism,
3398 											 eCharset, eEscapeType);
3399 				appendUCS4(aTheSynPath, nUTF32, eEscapeType, bOctets,
3400 						   PART_URIC, '%', eCharset, true);
3401 			}
3402 			if (aTheSynPath.getLength() == 0)
3403                 return false;
3404 			break;
3405         default:
3406             OSL_ASSERT(false);
3407             break;
3408 	}
3409 
3410 	*pBegin = pPos;
3411 	rSynPath = aTheSynPath;
3412 	return true;
3413 }
3414 
3415 //============================================================================
setPath(rtl::OUString const & rThePath,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3416 bool INetURLObject::setPath(rtl::OUString const & rThePath, bool bOctets,
3417 							EncodeMechanism eMechanism,
3418 							rtl_TextEncoding eCharset)
3419 {
3420 	rtl::OUStringBuffer aSynPath;
3421 	sal_Unicode const * p = rThePath.getStr();
3422 	sal_Unicode const * pEnd = p + rThePath.getLength();
3423 	if (!parsePath(m_eScheme, &p, pEnd, bOctets, eMechanism, eCharset, false,
3424                    '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3425 		|| p != pEnd)
3426 		return false;
3427 	sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3428 	m_aQuery += nDelta;
3429 	m_aFragment += nDelta;
3430 	return true;
3431 }
3432 
3433 //============================================================================
checkHierarchical() const3434 bool INetURLObject::checkHierarchical() const {
3435     if (m_eScheme == INET_PROT_VND_SUN_STAR_EXPAND) {
3436         OSL_ENSURE(
3437             false, "INetURLObject::checkHierarchical vnd.sun.star.expand");
3438         return true;
3439     } else {
3440         return getSchemeInfo().m_bHierarchical;
3441     }
3442 }
3443 
3444 //============================================================================
appendSegment(rtl::OUString const & rTheSegment,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3445 bool INetURLObject::appendSegment(rtl::OUString const & rTheSegment,
3446 								  bool bOctets, EncodeMechanism eMechanism,
3447 								  rtl_TextEncoding eCharset)
3448 {
3449 	return insertName(rTheSegment, bOctets, false, LAST_SEGMENT, true,
3450 					  eMechanism, eCharset);
3451 }
3452 
3453 //============================================================================
getSegment(sal_Int32 nIndex,bool bIgnoreFinalSlash) const3454 INetURLObject::SubString INetURLObject::getSegment(sal_Int32 nIndex,
3455 												   bool bIgnoreFinalSlash)
3456 	const
3457 {
3458 	DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3459 			   "INetURLObject::getSegment(): Bad index");
3460 
3461 	if (!checkHierarchical())
3462 		return SubString();
3463 
3464 	sal_Unicode const * pPathBegin
3465 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
3466 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3467 	sal_Unicode const * pSegBegin;
3468 	sal_Unicode const * pSegEnd;
3469 	if (nIndex == LAST_SEGMENT)
3470 	{
3471 		pSegEnd = pPathEnd;
3472 		if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3473 			--pSegEnd;
3474         if (pSegEnd <= pPathBegin)
3475             return SubString();
3476 		pSegBegin = pSegEnd - 1;
3477 		while (pSegBegin > pPathBegin && *pSegBegin != '/')
3478 			--pSegBegin;
3479 	}
3480 	else
3481 	{
3482 		pSegBegin = pPathBegin;
3483 		while (nIndex-- > 0)
3484 			do
3485 			{
3486 				++pSegBegin;
3487 				if (pSegBegin >= pPathEnd)
3488 					return SubString();
3489 			}
3490 			while (*pSegBegin != '/');
3491 		pSegEnd = pSegBegin + 1;
3492 		while (pSegEnd < pPathEnd && *pSegEnd != '/')
3493 			++pSegEnd;
3494 	}
3495 
3496 	return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3497 					 pSegEnd - pSegBegin);
3498 }
3499 
3500 //============================================================================
insertName(rtl::OUString const & rTheName,bool bOctets,bool bAppendFinalSlash,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3501 bool INetURLObject::insertName(rtl::OUString const & rTheName, bool bOctets,
3502 							   bool bAppendFinalSlash, sal_Int32 nIndex,
3503 							   bool bIgnoreFinalSlash,
3504 							   EncodeMechanism eMechanism,
3505 							   rtl_TextEncoding eCharset)
3506 {
3507 	DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3508 			   "INetURLObject::insertName(): Bad index");
3509 
3510 	if (!checkHierarchical())
3511 		return false;
3512 
3513 	sal_Unicode const * pPathBegin
3514 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
3515 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3516 	sal_Unicode const * pPrefixEnd;
3517     bool bInsertSlash;
3518 	sal_Unicode const * pSuffixBegin;
3519 	if (nIndex == LAST_SEGMENT)
3520 	{
3521 		pPrefixEnd = pPathEnd;
3522 		if (bIgnoreFinalSlash && pPrefixEnd > pPathBegin &&
3523             pPrefixEnd[-1] == '/')
3524         {
3525 			--pPrefixEnd;
3526         }
3527         bInsertSlash = bAppendFinalSlash;
3528         pSuffixBegin = pPathEnd;
3529 	}
3530     else if (nIndex == 0)
3531     {
3532         pPrefixEnd = pPathBegin;
3533         bInsertSlash =
3534             (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3535             (pPathBegin == pPathEnd && bAppendFinalSlash);
3536         pSuffixBegin =
3537             (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3538              !bAppendFinalSlash && bIgnoreFinalSlash)
3539             ? pPathEnd : pPathBegin;
3540     }
3541 	else
3542 	{
3543 		pPrefixEnd = pPathBegin;
3544 		sal_Unicode const * pEnd = pPathEnd;
3545 		if (bIgnoreFinalSlash && pEnd > pPathBegin && pEnd[-1] == '/')
3546 			--pEnd;
3547         bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3548         bInsertSlash = false;
3549         pSuffixBegin = pPathEnd;
3550  		while (nIndex-- > 0)
3551 			for (;;)
3552 			{
3553                 if (bSkip)
3554                     ++pPrefixEnd;
3555                 bSkip = true;
3556 				if (pPrefixEnd >= pEnd)
3557                 {
3558 					if (nIndex == 0)
3559 					{
3560                         bInsertSlash = bAppendFinalSlash;
3561 						break;
3562 					}
3563 					else
3564 						return false;
3565                 }
3566 				if (*pPrefixEnd == '/')
3567 				{
3568 					pSuffixBegin = pPrefixEnd;
3569 					break;
3570 				}
3571 			}
3572 	}
3573 
3574 	rtl::OUStringBuffer aNewPath;
3575 	aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3576     aNewPath.append(sal_Unicode('/'));
3577 	aNewPath.append(encodeText(rTheName, bOctets, PART_PCHAR, getEscapePrefix(),
3578 						   eMechanism, eCharset, true));
3579     if (bInsertSlash) {
3580         aNewPath.append(sal_Unicode('/'));
3581     }
3582     aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3583 
3584 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
3585 		RTL_TEXTENCODING_UTF8);
3586 }
3587 
3588 //============================================================================
clearQuery()3589 bool INetURLObject::clearQuery()
3590 {
3591 	if (HasError())
3592 		return false;
3593 	if (m_aQuery.isPresent())
3594 	{
3595 		lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3596 			m_aQuery.getLength() + 1);
3597 		m_aFragment += m_aQuery.clear() - 1;
3598 	}
3599 	return false;
3600 }
3601 
3602 //============================================================================
setQuery(rtl::OUString const & rTheQuery,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3603 bool INetURLObject::setQuery(rtl::OUString const & rTheQuery, bool bOctets,
3604 							 EncodeMechanism eMechanism,
3605 							 rtl_TextEncoding eCharset)
3606 {
3607 	if (!getSchemeInfo().m_bQuery)
3608 		return false;
3609 	rtl::OUString aNewQuery(encodeText(rTheQuery, bOctets, PART_URIC,
3610 								   getEscapePrefix(), eMechanism, eCharset,
3611 								   true));
3612 	sal_Int32 nDelta;
3613 	if (m_aQuery.isPresent())
3614 		nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3615 	else
3616 	{
3617 		m_aAbsURIRef.insert(m_aPath.getEnd(), sal_Unicode('?'));
3618 		nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3619 					 + 1;
3620 	}
3621 	m_aFragment += nDelta;
3622 	return true;
3623 }
3624 
3625 //============================================================================
clearFragment()3626 bool INetURLObject::clearFragment()
3627 {
3628 	if (HasError())
3629 		return false;
3630 	if (m_aFragment.isPresent())
3631 	{
3632 		m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3633 		m_aFragment.clear();
3634 	}
3635 	return true;
3636 }
3637 
3638 //============================================================================
setFragment(rtl::OUString const & rTheFragment,bool bOctets,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)3639 bool INetURLObject::setFragment(rtl::OUString const & rTheFragment,
3640 								bool bOctets, EncodeMechanism eMechanism,
3641 								rtl_TextEncoding eCharset)
3642 {
3643 	if (HasError())
3644 		return false;
3645 	rtl::OUString aNewFragment(encodeText(rTheFragment, bOctets, PART_URIC,
3646 									  getEscapePrefix(), eMechanism,
3647 									  eCharset, true));
3648 	if (m_aFragment.isPresent())
3649 		m_aFragment.set(m_aAbsURIRef, aNewFragment);
3650 	else
3651 	{
3652 		m_aAbsURIRef.append(sal_Unicode('#'));
3653 		m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3654 	}
3655 	return true;
3656 }
3657 
3658 //============================================================================
getFTPType() const3659 INetURLObject::FTPType INetURLObject::getFTPType() const
3660 {
3661 	if (m_eScheme == INET_PROT_FTP
3662 		&& m_aPath.getLength() >= RTL_CONSTASCII_LENGTH(";type=") + 1
3663 		&& rtl::OUString(m_aAbsURIRef).copy(
3664 			m_aPath.getEnd() - (RTL_CONSTASCII_LENGTH(";type=") + 1),
3665 			RTL_CONSTASCII_LENGTH(";type=")).equalsIgnoreAsciiCaseAscii(";type="))
3666 		switch (m_aAbsURIRef.charAt(m_aPath.getEnd()))
3667 		{
3668 			case 'A':
3669 			case 'a':
3670 				return FTP_TYPE_A;
3671 
3672 			case 'D':
3673 			case 'd':
3674 				return FTP_TYPE_D;
3675 
3676 			case 'I':
3677 			case 'i':
3678 				return FTP_TYPE_I;
3679 		}
3680 	return FTP_TYPE_NONE;
3681 }
3682 
3683 //============================================================================
hasDosVolume(FSysStyle eStyle) const3684 bool INetURLObject::hasDosVolume(FSysStyle eStyle) const
3685 {
3686     sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3687     return (eStyle & FSYS_DOS) != 0
3688            && m_aPath.getLength() >= 3
3689            && p[0] == '/'
3690            && INetMIME::isAlpha(p[1])
3691            && p[2] == ':'
3692            && (m_aPath.getLength() == 3 || p[3] == '/');
3693 }
3694 
3695 //============================================================================
getIMAPUID() const3696 sal_uInt32 INetURLObject::getIMAPUID() const
3697 {
3698 	if (m_eScheme == INET_PROT_IMAP
3699 		&& m_aPath.getLength() >= RTL_CONSTASCII_LENGTH("/;uid=") + 1)
3700 	{
3701 		sal_Unicode const * pBegin = m_aAbsURIRef.getStr()
3702 										 + m_aPath.getBegin()
3703 										 + RTL_CONSTASCII_LENGTH("/;uid=");
3704 		sal_Unicode const * pEnd = pBegin + m_aPath.getLength();
3705 		sal_Unicode const * p = pEnd;
3706 		while (p > pBegin && INetMIME::isDigit(p[-1]))
3707 			--p;
3708 		if (p < pEnd && *--p != '0'
3709 			&& rtl::OUString(m_aAbsURIRef).copy(
3710 				p - RTL_CONSTASCII_LENGTH("/;uid=") - m_aAbsURIRef.getStr(),
3711 				RTL_CONSTASCII_LENGTH("/;uid=")).equalsIgnoreAsciiCaseAscii("/;uid=")
3712 		   )
3713 		{
3714 			sal_uInt32 nUID;
3715 			if (INetMIME::scanUnsigned(p, pEnd, false, nUID))
3716 				return nUID;
3717 		}
3718 	}
3719 	return 0;
3720 }
3721 
3722 //============================================================================
3723 // static
encodeText(sal_Unicode const * pBegin,sal_Unicode const * pEnd,bool bOctets,Part ePart,sal_Char cEscapePrefix,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,bool bKeepVisibleEscapes)3724 rtl::OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3725 									sal_Unicode const * pEnd, bool bOctets,
3726 									Part ePart, sal_Char cEscapePrefix,
3727 									EncodeMechanism eMechanism,
3728 									rtl_TextEncoding eCharset,
3729 									bool bKeepVisibleEscapes)
3730 {
3731 	rtl::OUStringBuffer aResult;
3732 	while (pBegin < pEnd)
3733 	{
3734 		EscapeType eEscapeType;
3735 		sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, bOctets, cEscapePrefix,
3736 									 eMechanism, eCharset, eEscapeType);
3737 		appendUCS4(aResult, nUTF32, eEscapeType, bOctets, ePart,
3738 				   cEscapePrefix, eCharset, bKeepVisibleEscapes);
3739 	}
3740 	return aResult.makeStringAndClear();
3741 }
3742 
3743 //============================================================================
3744 // static
decode(sal_Unicode const * pBegin,sal_Unicode const * pEnd,sal_Char cEscapePrefix,DecodeMechanism eMechanism,rtl_TextEncoding eCharset)3745 rtl::OUString INetURLObject::decode(sal_Unicode const * pBegin,
3746 								sal_Unicode const * pEnd,
3747 								sal_Char cEscapePrefix,
3748 								DecodeMechanism eMechanism,
3749 								rtl_TextEncoding eCharset)
3750 {
3751 	switch (eMechanism)
3752 	{
3753 		case NO_DECODE:
3754 			return rtl::OUString(pBegin, pEnd - pBegin);
3755 
3756 		case DECODE_TO_IURI:
3757 			eCharset = RTL_TEXTENCODING_UTF8;
3758 			break;
3759 
3760         default:
3761             break;
3762 	}
3763 	rtl::OUStringBuffer aResult;
3764 	while (pBegin < pEnd)
3765 	{
3766 		EscapeType eEscapeType;
3767 		sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, false, cEscapePrefix,
3768 									 WAS_ENCODED, eCharset, eEscapeType);
3769 		switch (eEscapeType)
3770 		{
3771 			case ESCAPE_NO:
3772 				aResult.append(sal_Unicode(nUTF32));
3773 				break;
3774 
3775 			case ESCAPE_OCTET:
3776 				appendEscape(aResult, cEscapePrefix, nUTF32);
3777 				break;
3778 
3779 			case ESCAPE_UTF32:
3780 				if (
3781                      INetMIME::isUSASCII(nUTF32) &&
3782                      (
3783                        eMechanism == DECODE_TO_IURI ||
3784                        (
3785                          eMechanism == DECODE_UNAMBIGUOUS &&
3786                          mustEncode(nUTF32, PART_UNAMBIGUOUS)
3787                        )
3788                      )
3789                    )
3790                 {
3791 					appendEscape(aResult, cEscapePrefix, nUTF32);
3792                 }
3793 				else
3794 					aResult.append(sal_Unicode(nUTF32));
3795 				break;
3796 		}
3797 	}
3798 	return aResult.makeStringAndClear();
3799 }
3800 
3801 //============================================================================
GetURLNoPass(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3802 rtl::OUString INetURLObject::GetURLNoPass(DecodeMechanism eMechanism,
3803 									  rtl_TextEncoding eCharset) const
3804 {
3805 	INetURLObject aTemp(*this);
3806 	aTemp.clearPassword();
3807 	return aTemp.GetMainURL(eMechanism, eCharset);
3808 }
3809 
3810 //============================================================================
GetURLNoMark(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3811 rtl::OUString INetURLObject::GetURLNoMark(DecodeMechanism eMechanism,
3812 									  rtl_TextEncoding eCharset) const
3813 {
3814 	INetURLObject aTemp(*this);
3815 	aTemp.clearFragment();
3816 	return aTemp.GetMainURL(eMechanism, eCharset);
3817 }
3818 
3819 //============================================================================
3820 rtl::OUString
getAbbreviated(star::uno::Reference<star::util::XStringWidth> const & rStringWidth,sal_Int32 nWidth,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const3821 INetURLObject::getAbbreviated(
3822     star::uno::Reference< star::util::XStringWidth > const & rStringWidth,
3823     sal_Int32 nWidth,
3824     DecodeMechanism eMechanism,
3825     rtl_TextEncoding eCharset)
3826     const
3827 {
3828     OSL_ENSURE(rStringWidth.is(), "specification violation");
3829     sal_Char cEscapePrefix = getEscapePrefix();
3830     rtl::OUStringBuffer aBuffer;
3831 	// make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3832 	// is empty ("") in that case, so take the scheme from m_aAbsURIRef
3833 	if (m_eScheme != INET_PROT_GENERIC)
3834 	{
3835 		aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3836 	}
3837 	else
3838 	{
3839 		if (m_aAbsURIRef)
3840 		{
3841 			sal_Unicode const * pSchemeBegin
3842 				= m_aAbsURIRef.getStr();
3843 			sal_Unicode const * pSchemeEnd = pSchemeBegin;
3844 
3845 			while (pSchemeEnd[0] != ':')
3846 			{
3847 				++pSchemeEnd;
3848 			}
3849 			aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3850 		}
3851 	}
3852     aBuffer.append(static_cast< sal_Unicode >(':'));
3853     bool bAuthority = getSchemeInfo().m_bAuthority;
3854     sal_Unicode const * pCoreBegin
3855         = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3856                                                    m_aPath.getBegin());
3857     sal_Unicode const * pCoreEnd
3858         = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3859     bool bSegment = false;
3860     if (getSchemeInfo().m_bHierarchical)
3861     {
3862         rtl::OUString aRest;
3863         if (m_aQuery.isPresent())
3864             aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("?..."));
3865         else if (m_aFragment.isPresent())
3866             aRest = rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("#..."));
3867         rtl::OUStringBuffer aTrailer;
3868         sal_Unicode const * pBegin = pCoreBegin;
3869         sal_Unicode const * pEnd = pCoreEnd;
3870         sal_Unicode const * pPrefixBegin = pBegin;
3871         sal_Unicode const * pSuffixEnd = pEnd;
3872         bool bPrefix = true;
3873         bool bSuffix = true;
3874         do
3875         {
3876             if (bSuffix)
3877             {
3878                 sal_Unicode const * p = pSuffixEnd - 1;
3879                 if (pSuffixEnd == pCoreEnd && *p == '/')
3880                     --p;
3881                 while (*p != '/')
3882                     --p;
3883                 if (bAuthority && p == pCoreBegin + 1)
3884                     --p;
3885                 rtl::OUString
3886                     aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3887                                              1 : 0),
3888                                     pSuffixEnd,
3889                                     cEscapePrefix,
3890                                     eMechanism,
3891                                     eCharset));
3892                 pSuffixEnd = p;
3893                 rtl::OUStringBuffer aResult(aBuffer);
3894                 if (pSuffixEnd != pBegin)
3895                     aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3896                 aResult.append(aSegment);
3897                 aResult.append(aTrailer);
3898                 aResult.append(aRest);
3899                 if (rStringWidth->
3900                             queryStringWidth(aResult.makeStringAndClear())
3901                         <= nWidth)
3902                 {
3903                     aTrailer.insert(0, aSegment);
3904                     bSegment = true;
3905                     pEnd = pSuffixEnd;
3906                 }
3907                 else
3908                     bSuffix = false;
3909                 if (pPrefixBegin > pSuffixEnd)
3910                     pPrefixBegin = pSuffixEnd;
3911                 if (pBegin == pEnd)
3912                     break;
3913             }
3914             if (bPrefix)
3915             {
3916                 sal_Unicode const * p
3917                     = pPrefixBegin
3918                           + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3919                                                                         1);
3920                 OSL_ASSERT(p <= pEnd);
3921                 while (p < pEnd && *p != '/')
3922                     ++p;
3923                 if (p == pCoreEnd - 1 && *p == '/')
3924                     ++p;
3925                 rtl::OUString
3926                     aSegment(decode(pPrefixBegin
3927                                         + (pPrefixBegin == pCoreBegin ? 0 :
3928                                                                         1),
3929                                     p == pEnd ? p : p + 1,
3930                                     cEscapePrefix,
3931                                     eMechanism,
3932                                     eCharset));
3933                 pPrefixBegin = p;
3934                 rtl::OUStringBuffer aResult(aBuffer);
3935                 aResult.append(aSegment);
3936                 if (pPrefixBegin != pEnd)
3937                     aResult.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3938                 aResult.append(aTrailer);
3939                 aResult.append(aRest);
3940                 if (rStringWidth->
3941                             queryStringWidth(aResult.makeStringAndClear())
3942                         <= nWidth)
3943                 {
3944                     aBuffer.append(aSegment);
3945                     bSegment = true;
3946                     pBegin = pPrefixBegin;
3947                 }
3948                 else
3949                     bPrefix = false;
3950                 if (pPrefixBegin > pSuffixEnd)
3951                     pSuffixEnd = pPrefixBegin;
3952                 if (pBegin == pEnd)
3953                     break;
3954             }
3955         }
3956         while (bPrefix || bSuffix);
3957         if (bSegment)
3958         {
3959             if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3960                 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3961             aBuffer.append(aTrailer);
3962         }
3963     }
3964     if (!bSegment)
3965         aBuffer.append(decode(pCoreBegin,
3966                               pCoreEnd,
3967                               cEscapePrefix,
3968                               eMechanism,
3969                               eCharset));
3970     if (m_aQuery.isPresent())
3971     {
3972         aBuffer.append(static_cast< sal_Unicode >('?'));
3973         aBuffer.append(decode(m_aQuery, cEscapePrefix, eMechanism, eCharset));
3974     }
3975     if (m_aFragment.isPresent())
3976     {
3977         aBuffer.append(static_cast< sal_Unicode >('#'));
3978         aBuffer.
3979             append(decode(m_aFragment, cEscapePrefix, eMechanism, eCharset));
3980     }
3981     if (aBuffer.getLength() != 0)
3982     {
3983         rtl::OUStringBuffer aResult(aBuffer);
3984         if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3985                 > nWidth)
3986             for (sal_Int32 i = aBuffer.getLength();;)
3987             {
3988                 if (i == 0)
3989                 {
3990                     aBuffer.setLength(aBuffer.getLength() - 1);
3991                     if (aBuffer.getLength() == 0)
3992                         break;
3993                 }
3994                 else
3995                 {
3996                     aBuffer.setLength(--i);
3997                     aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("..."));
3998                 }
3999                 aResult = aBuffer;
4000                 if (rStringWidth->
4001                             queryStringWidth(aResult.makeStringAndClear())
4002                         <= nWidth)
4003                     break;
4004             }
4005     }
4006     return aBuffer.makeStringAndClear();
4007 }
4008 
4009 //============================================================================
operator ==(INetURLObject const & rObject) const4010 bool INetURLObject::operator ==(INetURLObject const & rObject) const
4011 {
4012 	if (m_eScheme != rObject.m_eScheme)
4013 		return false;
4014 	if (m_eScheme == INET_PROT_NOT_VALID)
4015 		return (m_aAbsURIRef == rObject.m_aAbsURIRef) != false;
4016     if ((m_aScheme.compare(
4017              rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
4018          != 0)
4019         || GetUser(NO_DECODE) != rObject.GetUser(NO_DECODE)
4020 		|| GetPass(NO_DECODE) != rObject.GetPass(NO_DECODE)
4021 		|| !GetHost(NO_DECODE).equalsIgnoreAsciiCase(
4022 			rObject.GetHost(NO_DECODE))
4023 		|| GetPort() != rObject.GetPort()
4024 		|| HasParam() != rObject.HasParam()
4025 		|| GetParam(NO_DECODE) != rObject.GetParam(NO_DECODE)
4026 		|| GetMsgId(NO_DECODE) != rObject.GetMsgId(NO_DECODE))
4027 		return false;
4028 	rtl::OUString aPath1(GetURLPath(NO_DECODE));
4029 	rtl::OUString aPath2(rObject.GetURLPath(NO_DECODE));
4030 	switch (m_eScheme)
4031 	{
4032 		case INET_PROT_FILE:
4033 		{
4034 			// If the URL paths of two file URLs only differ in that one has a
4035 			// final '/' and the other has not, take the two paths as
4036 			// equivalent (this could be usefull for other schemes, too):
4037 			sal_Int32 nLength = aPath1.getLength();
4038 			switch (nLength - aPath2.getLength())
4039 			{
4040 				case -1:
4041 					if (aPath2.getStr()[nLength] != '/')
4042 						return false;
4043 					break;
4044 
4045 				case 0:
4046 					break;
4047 
4048 				case 1:
4049 					if (aPath1.getStr()[--nLength] != '/')
4050 						return false;
4051 					break;
4052 
4053 				default:
4054 					return false;
4055 			}
4056 			return aPath1.compareTo(aPath2, nLength) == 0;
4057 		}
4058 
4059 		default:
4060 			return (aPath1 == aPath2) != false;
4061 	}
4062 }
4063 
4064 //============================================================================
operator <(INetURLObject const & rObject) const4065 bool INetURLObject::operator <(INetURLObject const & rObject) const
4066 {
4067     sal_Int32 nCompare = m_aScheme.compare(
4068         rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef);
4069     if (nCompare < 0) {
4070         return true;
4071     } else if (nCompare > 0) {
4072         return false;
4073     }
4074 	sal_uInt32 nPort1 = GetPort();
4075 	sal_uInt32 nPort2 = rObject.GetPort();
4076 	if (nPort1 < nPort2)
4077 		return true;
4078 	else if (nPort1 > nPort2)
4079 		return false;
4080 	nCompare = GetUser(NO_DECODE).compareTo(rObject.GetUser(NO_DECODE));
4081 	if (nCompare < 0)
4082 		return true;
4083 	else if (nCompare > 0)
4084 		return false;
4085 	nCompare = GetPass(NO_DECODE).compareTo(rObject.GetPass(NO_DECODE));
4086 	if (nCompare < 0)
4087 		return true;
4088 	else if (nCompare > 0)
4089 		return false;
4090 	nCompare = GetHost(NO_DECODE).compareTo(rObject.GetHost(NO_DECODE));
4091 	if (nCompare < 0)
4092 		return true;
4093 	else if (nCompare > 0)
4094 		return false;
4095 	const rtl::OUString &rPath1(GetURLPath(NO_DECODE));
4096 	const rtl::OUString &rPath2(rObject.GetURLPath(NO_DECODE));
4097 	nCompare = rPath1.compareTo(rPath2);
4098 	if (nCompare < 0)
4099 		return true;
4100 	else if (nCompare > 0)
4101 		return false;
4102 	nCompare = GetParam(NO_DECODE).compareTo(rObject.GetParam(NO_DECODE));
4103 	if (nCompare < 0)
4104 		return true;
4105 	else if (nCompare > 0)
4106 		return false;
4107 	return GetMsgId(NO_DECODE).compareTo(rObject.GetMsgId(NO_DECODE)) < 0;
4108 }
4109 
4110 //============================================================================
ConcatData(INetProtocol eTheScheme,rtl::OUString const & rTheUser,rtl::OUString const & rThePassword,rtl::OUString const & rTheHost,sal_uInt32 nThePort,rtl::OUString const & rThePath,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4111 bool INetURLObject::ConcatData(INetProtocol eTheScheme,
4112 							   rtl::OUString const & rTheUser,
4113 							   rtl::OUString const & rThePassword,
4114 							   rtl::OUString const & rTheHost,
4115 							   sal_uInt32 nThePort,
4116 							   rtl::OUString const & rThePath,
4117 							   EncodeMechanism eMechanism,
4118 							   rtl_TextEncoding eCharset)
4119 {
4120 	setInvalid();
4121 	m_eScheme = eTheScheme;
4122 	if (HasError() || m_eScheme == INET_PROT_GENERIC)
4123 		return false;
4124 	m_aAbsURIRef.setLength(0);
4125 	m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
4126 	m_aAbsURIRef.append(sal_Unicode(':'));
4127 	if (getSchemeInfo().m_bAuthority)
4128 	{
4129 		m_aAbsURIRef.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
4130 		bool bUserInfo = false;
4131 		if (getSchemeInfo().m_bUser)
4132 		{
4133 			if (m_eScheme == INET_PROT_IMAP && rTheUser.getLength() == 0)
4134 			{
4135 				setInvalid();
4136 				return false;
4137 			}
4138 			if (rTheUser.getLength() != 0)
4139 			{
4140 				m_aUser.set(m_aAbsURIRef,
4141 							encodeText(rTheUser, false,
4142 									   m_eScheme == INET_PROT_IMAP ?
4143 										   PART_IMAP_ACHAR :
4144 									   m_eScheme == INET_PROT_VIM ?
4145 										   PART_VIM :
4146 										   PART_USER_PASSWORD,
4147 									   getEscapePrefix(), eMechanism,
4148 									   eCharset, false),
4149 							m_aAbsURIRef.getLength());
4150 				bUserInfo = true;
4151 			}
4152 		}
4153 		else if (rTheUser.getLength() != 0)
4154 		{
4155 			setInvalid();
4156 			return false;
4157 		}
4158 		if (rThePassword.getLength() != 0)
4159         {
4160 			if (getSchemeInfo().m_bPassword)
4161 			{
4162 				m_aAbsURIRef.append(sal_Unicode(':'));
4163 				m_aAuth.set(m_aAbsURIRef,
4164 							encodeText(rThePassword, false,
4165 									   m_eScheme == INET_PROT_VIM ?
4166 										   PART_VIM : PART_USER_PASSWORD,
4167 									   getEscapePrefix(), eMechanism,
4168 									   eCharset, false),
4169 							m_aAbsURIRef.getLength());
4170 				bUserInfo = true;
4171 			}
4172 			else
4173 			{
4174 				setInvalid();
4175 				return false;
4176 			}
4177         }
4178 		if (bUserInfo && getSchemeInfo().m_bHost)
4179 			m_aAbsURIRef.append(sal_Unicode('@'));
4180 		if (getSchemeInfo().m_bHost)
4181 		{
4182 			rtl::OUStringBuffer aSynHost(rTheHost);
4183             bool bNetBiosName = false;
4184 			switch (m_eScheme)
4185 			{
4186 				case INET_PROT_FILE:
4187 					{
4188 				        rtl::OUString sTemp(aSynHost);
4189 				        if (sTemp.equalsIgnoreAsciiCaseAsciiL(
4190 					        RTL_CONSTASCII_STRINGPARAM("localhost")))
4191 						{
4192 						    aSynHost.setLength(0);
4193 						}
4194                         bNetBiosName = true;
4195 					}
4196 					break;
4197 
4198 				case INET_PROT_LDAP:
4199 					if (aSynHost.getLength() == 0 && nThePort != 0)
4200 					{
4201 						setInvalid();
4202 						return false;
4203 					}
4204 					break;
4205 
4206 				default:
4207 					if (aSynHost.getLength() == 0)
4208 					{
4209 						setInvalid();
4210 						return false;
4211 					}
4212 					break;
4213 			}
4214             if (!parseHostOrNetBiosName(
4215                     aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
4216                     false, eMechanism, eCharset, bNetBiosName, &aSynHost))
4217             {
4218                 setInvalid();
4219                 return false;
4220             }
4221 			m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
4222 				m_aAbsURIRef.getLength());
4223 			if (nThePort != 0)
4224 			{
4225 				if (getSchemeInfo().m_bPort)
4226 				{
4227 					m_aAbsURIRef.append(sal_Unicode(':'));
4228 					m_aPort.set(m_aAbsURIRef,
4229 								rtl::OUString::valueOf(sal_Int64(nThePort)),
4230 								m_aAbsURIRef.getLength());
4231 				}
4232 				else
4233 				{
4234 					setInvalid();
4235 					return false;
4236 				}
4237 			}
4238 		}
4239 		else if (rTheHost.getLength() != 0 || nThePort != 0)
4240 		{
4241 			setInvalid();
4242 			return false;
4243 		}
4244 	}
4245 	rtl::OUStringBuffer aSynPath;
4246 	sal_Unicode const * p = rThePath.getStr();
4247 	sal_Unicode const * pEnd = p + rThePath.getLength();
4248 	if (!parsePath(m_eScheme, &p, pEnd, false, eMechanism, eCharset, false, '/',
4249 				   0x80000000, 0x80000000, 0x80000000, aSynPath)
4250 		|| p != pEnd)
4251     {
4252         setInvalid();
4253 		return false;
4254     }
4255 	m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
4256 		m_aAbsURIRef.getLength());
4257 	return true;
4258 }
4259 
4260 //============================================================================
4261 // static
GetAbsURL(rtl::OUString const & rTheBaseURIRef,rtl::OUString const & rTheRelURIRef,bool bIgnoreFragment,EncodeMechanism eEncodeMechanism,DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,FSysStyle eStyle)4262 rtl::OUString INetURLObject::GetAbsURL(rtl::OUString const & rTheBaseURIRef,
4263                                        rtl::OUString const & rTheRelURIRef,
4264                                        bool bIgnoreFragment,
4265                                        EncodeMechanism eEncodeMechanism,
4266                                        DecodeMechanism eDecodeMechanism,
4267                                        rtl_TextEncoding eCharset,
4268                                        FSysStyle eStyle)
4269 {
4270 	// Backwards compatibility:
4271 	if (rTheRelURIRef.getLength() == 0 || rTheRelURIRef[0] == '#')
4272 		return rTheRelURIRef;
4273 
4274 	INetURLObject aTheAbsURIRef;
4275 	bool bWasAbsolute;
4276 	return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
4277             convertRelToAbs(rTheRelURIRef, false, aTheAbsURIRef,
4278                             bWasAbsolute, eEncodeMechanism,
4279                             eCharset, bIgnoreFragment, false,
4280                             false, eStyle)
4281 		   || eEncodeMechanism != WAS_ENCODED
4282 		   || eDecodeMechanism != DECODE_TO_IURI
4283 		   || eCharset != RTL_TEXTENCODING_UTF8 ?
4284 		       aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
4285 		       rTheRelURIRef;
4286 }
4287 
4288 //============================================================================
getExternalURL(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4289 rtl::OUString INetURLObject::getExternalURL(DecodeMechanism eMechanism,
4290 										rtl_TextEncoding eCharset) const
4291 {
4292 	rtl::OUString aTheExtURIRef;
4293 	translateToExternal(
4294         rtl::OUString(m_aAbsURIRef), aTheExtURIRef, eMechanism, eCharset);
4295 	return aTheExtURIRef;
4296 }
4297 
4298 //============================================================================
4299 // static
GetScheme(INetProtocol eTheScheme)4300 rtl::OUString INetURLObject::GetScheme(INetProtocol eTheScheme)
4301 {
4302 	return rtl::OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
4303 }
4304 
4305 //============================================================================
4306 // static
CompareProtocolScheme(rtl::OUString const & rTheAbsURIRef)4307 INetProtocol INetURLObject::CompareProtocolScheme(rtl::OUString const &
4308 													  rTheAbsURIRef)
4309 {
4310 	sal_Unicode const * p = rTheAbsURIRef.getStr();
4311 	PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
4312 	return pPrefix ? pPrefix->m_eScheme : INET_PROT_NOT_VALID;
4313 }
4314 
4315 //============================================================================
hasPassword() const4316 bool INetURLObject::hasPassword() const
4317 {
4318 	return m_aAuth.isPresent() && getSchemeInfo().m_bPassword;
4319 }
4320 
4321 //============================================================================
makeAuthCanonic()4322 void INetURLObject::makeAuthCanonic()
4323 {
4324 	if (m_eScheme == INET_PROT_IMAP && m_aAuth.getLength() == 1
4325 		&& m_aAbsURIRef.charAt(m_aAuth.getBegin()) == '*')
4326 	{
4327 		lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin()
4328 							   - RTL_CONSTASCII_LENGTH(";AUTH="),
4329 						   RTL_CONSTASCII_LENGTH(";AUTH=*"));
4330 		sal_Int32 nDelta = m_aAuth.clear() - RTL_CONSTASCII_LENGTH(";AUTH=");
4331 		m_aPath += nDelta;
4332 		m_aQuery += nDelta;
4333 		m_aFragment += nDelta;
4334 	}
4335 }
4336 
4337 //============================================================================
GetHostPort(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)4338 rtl::OUString INetURLObject::GetHostPort(DecodeMechanism eMechanism,
4339 									 rtl_TextEncoding eCharset)
4340 {
4341 	// Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
4342     // PROT_VND_SUN_STAR_PKG misuse m_aHost:
4343 	if (!getSchemeInfo().m_bHost)
4344 		return rtl::OUString();
4345 	rtl::OUStringBuffer aHostPort(decode(m_aHost, getEscapePrefix(),
4346 		eMechanism, eCharset));
4347 	if (m_aPort.isPresent())
4348 	{
4349 		aHostPort.append(sal_Unicode(':'));
4350 		aHostPort.append(decode(m_aPort, getEscapePrefix(),
4351 			eMechanism, eCharset));
4352 	}
4353 	return aHostPort.makeStringAndClear();
4354 }
4355 
4356 //============================================================================
GetPort() const4357 sal_uInt32 INetURLObject::GetPort() const
4358 {
4359 	if (m_aPort.isPresent())
4360 	{
4361 		sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4362 		sal_Unicode const * pEnd = p + m_aPort.getLength();
4363 		sal_uInt32 nThePort;
4364 		if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4365 			return nThePort;
4366 	}
4367 	return 0;
4368 }
4369 
4370 //============================================================================
SetPort(sal_uInt32 nThePort)4371 bool INetURLObject::SetPort(sal_uInt32 nThePort)
4372 {
4373 	if (getSchemeInfo().m_bPort && m_aHost.isPresent())
4374 	{
4375 		rtl::OUString aNewPort(rtl::OUString::valueOf(sal_Int64(nThePort)));
4376 		sal_Int32 nDelta;
4377 		if (m_aPort.isPresent())
4378 			nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
4379 		else
4380 		{
4381 			m_aAbsURIRef.insert(m_aHost.getEnd(), sal_Unicode(':'));
4382 			nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
4383 						 + 1;
4384 		}
4385 		m_aPath += nDelta;
4386 		m_aQuery += nDelta;
4387 		m_aFragment += nDelta;
4388 		return true;
4389 	}
4390 	return false;
4391 }
4392 
4393 //============================================================================
makePortCanonic()4394 void INetURLObject::makePortCanonic()
4395 {
4396 	if (m_aPort.isPresent())
4397 	{
4398 		sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
4399 		sal_Unicode const * pEnd = p + m_aPort.getLength();
4400 		sal_uInt32 nThePort;
4401 		if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
4402 		{
4403 			sal_Int32 nDelta;
4404 			if (nThePort != 0 && nThePort == getSchemeInfo().m_nDefaultPort)
4405 			{
4406 				lcl_Erase(m_aAbsURIRef, m_aPort.getBegin() - 1,
4407 								   m_aPort.getLength() + 1);
4408 				nDelta = m_aPort.clear() - 1;
4409 			}
4410 			else
4411 				nDelta = m_aPort.set(m_aAbsURIRef,
4412 								 rtl::OUString::valueOf(sal_Int64(nThePort)));
4413 			m_aPath += nDelta;
4414 			m_aQuery += nDelta;
4415 			m_aFragment += nDelta;
4416 		}
4417 	}
4418 }
4419 
4420 //============================================================================
getSegmentCount(bool bIgnoreFinalSlash) const4421 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
4422 {
4423     if (!checkHierarchical())
4424 		return 0;
4425 
4426 	sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4427 	sal_Unicode const * pEnd = p + m_aPath.getLength();
4428 	if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
4429 		--pEnd;
4430 	sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
4431 	while (p != pEnd)
4432 		if (*p++ == '/')
4433 			++n;
4434 	return n;
4435 }
4436 
4437 //============================================================================
removeSegment(sal_Int32 nIndex,bool bIgnoreFinalSlash)4438 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4439 {
4440 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4441 	if (!aSegment.isPresent())
4442 		return false;
4443 
4444 	rtl::OUStringBuffer aNewPath;
4445 	aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4446 					   aSegment.getBegin() - m_aPath.getBegin());
4447 	if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4448 		aNewPath.append(sal_Unicode('/'));
4449 	else
4450 		aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4451 						m_aPath.getEnd() - aSegment.getEnd());
4452 	if (aNewPath.getLength() == 0 && !aSegment.isEmpty() &&
4453         m_aAbsURIRef[aSegment.getBegin()] == '/')
4454     {
4455 		aNewPath.append(sal_Unicode('/'));
4456     }
4457 
4458 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4459 		RTL_TEXTENCODING_UTF8);
4460 }
4461 
4462 //============================================================================
getName(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4463 rtl::OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4464 								 DecodeMechanism eMechanism,
4465 								 rtl_TextEncoding eCharset) const
4466 {
4467 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4468 	if (!aSegment.isPresent())
4469 		return rtl::OUString();
4470 
4471 	sal_Unicode const * pSegBegin
4472 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4473 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4474 
4475     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4476         ++pSegBegin;
4477 	sal_Unicode const * p = pSegBegin;
4478 	while (p != pSegEnd && *p != ';')
4479 		++p;
4480 
4481 	return decode(pSegBegin, p, getEscapePrefix(), eMechanism, eCharset);
4482 }
4483 
4484 //============================================================================
setName(rtl::OUString const & rTheName,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4485 bool INetURLObject::setName(rtl::OUString const & rTheName, sal_Int32 nIndex,
4486 							bool bIgnoreFinalSlash,
4487 							EncodeMechanism eMechanism,
4488 							rtl_TextEncoding eCharset)
4489 {
4490 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4491 	if (!aSegment.isPresent())
4492 		return false;
4493 
4494 	sal_Unicode const * pPathBegin
4495 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4496 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4497 	sal_Unicode const * pSegBegin
4498 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4499 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4500 
4501     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4502         ++pSegBegin;
4503 	sal_Unicode const * p = pSegBegin;
4504 	while (p != pSegEnd && *p != ';')
4505 		++p;
4506 
4507 	rtl::OUStringBuffer aNewPath;
4508 	aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4509 	aNewPath.append(encodeText(rTheName, false, PART_PCHAR, getEscapePrefix(),
4510 		eMechanism, eCharset, true));
4511 	aNewPath.append(p, pPathEnd - p);
4512 
4513 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4514 		RTL_TEXTENCODING_UTF8);
4515 }
4516 
4517 //============================================================================
hasExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash) const4518 bool INetURLObject::hasExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4519 	const
4520 {
4521 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4522 	if (!aSegment.isPresent())
4523 		return false;
4524 
4525 	sal_Unicode const * pSegBegin
4526 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4527 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4528 
4529     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4530         ++pSegBegin;
4531 	for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4532 		if (*p == '.' && p != pSegBegin)
4533 			return true;
4534 	return false;
4535 }
4536 
4537 //============================================================================
getBase(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4538 rtl::OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4539 								 DecodeMechanism eMechanism,
4540 								 rtl_TextEncoding eCharset) const
4541 {
4542 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4543 	if (!aSegment.isPresent())
4544 		return rtl::OUString();
4545 
4546 	sal_Unicode const * pSegBegin
4547 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4548 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4549 
4550     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4551         ++pSegBegin;
4552 	sal_Unicode const * pExtension = 0;
4553 	sal_Unicode const * p = pSegBegin;
4554 	for (; p != pSegEnd && *p != ';'; ++p)
4555 		if (*p == '.' && p != pSegBegin)
4556 			pExtension = p;
4557 	if (!pExtension)
4558 		pExtension = p;
4559 
4560 	return decode(pSegBegin, pExtension, getEscapePrefix(), eMechanism,
4561 				  eCharset);
4562 }
4563 
4564 //============================================================================
setBase(rtl::OUString const & rTheBase,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4565 bool INetURLObject::setBase(rtl::OUString const & rTheBase, sal_Int32 nIndex,
4566 							bool bIgnoreFinalSlash,
4567 							EncodeMechanism eMechanism,
4568 							rtl_TextEncoding eCharset)
4569 {
4570 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4571 	if (!aSegment.isPresent())
4572 		return false;
4573 
4574 	sal_Unicode const * pPathBegin
4575 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4576 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4577 	sal_Unicode const * pSegBegin
4578 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4579 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4580 
4581     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4582         ++pSegBegin;
4583 	sal_Unicode const * pExtension = 0;
4584 	sal_Unicode const * p = pSegBegin;
4585 	for (; p != pSegEnd && *p != ';'; ++p)
4586 		if (*p == '.' && p != pSegBegin)
4587 			pExtension = p;
4588 	if (!pExtension)
4589 		pExtension = p;
4590 
4591 	rtl::OUStringBuffer aNewPath;
4592 	aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4593 	aNewPath.append(encodeText(rTheBase, false, PART_PCHAR, getEscapePrefix(),
4594 		eMechanism, eCharset, true));
4595 	aNewPath.append(pExtension, pPathEnd - pExtension);
4596 
4597 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4598 		RTL_TEXTENCODING_UTF8);
4599 }
4600 
4601 //============================================================================
getExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash,DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const4602 rtl::OUString INetURLObject::getExtension(sal_Int32 nIndex,
4603 									  bool bIgnoreFinalSlash,
4604 									  DecodeMechanism eMechanism,
4605 									  rtl_TextEncoding eCharset) const
4606 {
4607 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4608 	if (!aSegment.isPresent())
4609 		return rtl::OUString();
4610 
4611 	sal_Unicode const * pSegBegin
4612 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4613 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4614 
4615     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4616         ++pSegBegin;
4617 	sal_Unicode const * pExtension = 0;
4618 	sal_Unicode const * p = pSegBegin;
4619 	for (; p != pSegEnd && *p != ';'; ++p)
4620 		if (*p == '.' && p != pSegBegin)
4621 			pExtension = p;
4622 
4623 	if (!pExtension)
4624 		return rtl::OUString();
4625 
4626 	return decode(pExtension + 1, p, getEscapePrefix(), eMechanism, eCharset);
4627 }
4628 
4629 //============================================================================
setExtension(rtl::OUString const & rTheExtension,sal_Int32 nIndex,bool bIgnoreFinalSlash,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)4630 bool INetURLObject::setExtension(rtl::OUString const & rTheExtension,
4631 								 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4632 								 EncodeMechanism eMechanism,
4633 								 rtl_TextEncoding eCharset)
4634 {
4635 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4636 	if (!aSegment.isPresent())
4637 		return false;
4638 
4639 	sal_Unicode const * pPathBegin
4640 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4641 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4642 	sal_Unicode const * pSegBegin
4643 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4644 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4645 
4646     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4647         ++pSegBegin;
4648 	sal_Unicode const * pExtension = 0;
4649 	sal_Unicode const * p = pSegBegin;
4650 	for (; p != pSegEnd && *p != ';'; ++p)
4651 		if (*p == '.' && p != pSegBegin)
4652 			pExtension = p;
4653 	if (!pExtension)
4654 		pExtension = p;
4655 
4656 	rtl::OUStringBuffer aNewPath;
4657 	aNewPath.append(pPathBegin, pExtension - pPathBegin);
4658 	aNewPath.append(sal_Unicode('.'));
4659 	aNewPath.append(encodeText(rTheExtension, false, PART_PCHAR,
4660 		getEscapePrefix(), eMechanism, eCharset, true));
4661 	aNewPath.append(p, pPathEnd - p);
4662 
4663 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4664 		RTL_TEXTENCODING_UTF8);
4665 }
4666 
4667 //============================================================================
removeExtension(sal_Int32 nIndex,bool bIgnoreFinalSlash)4668 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4669 {
4670 	SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4671 	if (!aSegment.isPresent())
4672 		return false;
4673 
4674 	sal_Unicode const * pPathBegin
4675 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4676 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4677 	sal_Unicode const * pSegBegin
4678 		= m_aAbsURIRef.getStr() + aSegment.getBegin();
4679 	sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4680 
4681     if (pSegBegin < pSegEnd && *pSegBegin == '/')
4682         ++pSegBegin;
4683 	sal_Unicode const * pExtension = 0;
4684 	sal_Unicode const * p = pSegBegin;
4685 	for (; p != pSegEnd && *p != ';'; ++p)
4686 		if (*p == '.' && p != pSegBegin)
4687 			pExtension = p;
4688 	if (!pExtension)
4689 		return true;
4690 
4691 	rtl::OUStringBuffer aNewPath;
4692 	aNewPath.append(pPathBegin, pExtension - pPathBegin);
4693 	aNewPath.append(p, pPathEnd - p);
4694 
4695 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4696 		RTL_TEXTENCODING_UTF8);
4697 }
4698 
4699 //============================================================================
hasFinalSlash() const4700 bool INetURLObject::hasFinalSlash() const
4701 {
4702 	if (!checkHierarchical())
4703 		return false;
4704 
4705 	sal_Unicode const * pPathBegin
4706 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4707 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4708     return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4709 }
4710 
4711 //============================================================================
setFinalSlash()4712 bool INetURLObject::setFinalSlash()
4713 {
4714 	if (!checkHierarchical())
4715 		return false;
4716 
4717 	sal_Unicode const * pPathBegin
4718 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4719 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4720 	if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4721 		return true;
4722 
4723 	rtl::OUStringBuffer aNewPath;
4724 	aNewPath.append(pPathBegin, pPathEnd - pPathBegin);
4725 	aNewPath.append(sal_Unicode('/'));
4726 
4727 	return setPath(aNewPath.makeStringAndClear(), false, NOT_CANONIC,
4728 		RTL_TEXTENCODING_UTF8);
4729 }
4730 
4731 //============================================================================
removeFinalSlash()4732 bool INetURLObject::removeFinalSlash()
4733 {
4734 	if (!checkHierarchical())
4735 		return false;
4736 
4737 	sal_Unicode const * pPathBegin
4738 		= m_aAbsURIRef.getStr() + m_aPath.getBegin();
4739 	sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4740 	if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4741 		return true;
4742 
4743 	--pPathEnd;
4744 	if (pPathEnd == pPathBegin && *pPathBegin == '/')
4745 		return false;
4746 	rtl::OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4747 
4748 	return setPath(aNewPath, false, NOT_CANONIC, RTL_TEXTENCODING_UTF8);
4749 }
4750 
4751 //============================================================================
4752 // static
createFragment(rtl::OUString const & rText)4753 rtl::OUString INetURLObject::createFragment(rtl::OUString const & rText)
4754 {
4755 	rtl::OUString aFragment(rText);
4756 	for (sal_Int32 i = 0; i < aFragment.getLength();)
4757 	{
4758 		sal_Unicode c = aFragment.getStr()[i];
4759 		if (mustEncode(c, PART_CREATEFRAGMENT))
4760 			aFragment = aFragment.replaceAt(i, 1, rtl::OUString());
4761 		else
4762 			++i;
4763 	}
4764 	return aFragment;
4765 }
4766 
4767 //============================================================================
setFSysPath(rtl::OUString const & rFSysPath,FSysStyle eStyle)4768 bool INetURLObject::setFSysPath(rtl::OUString const & rFSysPath,
4769 	FSysStyle eStyle)
4770 {
4771 	sal_Unicode const * pFSysBegin = rFSysPath.getStr();
4772 	sal_Unicode const * pFSysEnd = pFSysBegin + rFSysPath.getLength();
4773 
4774 	switch ((eStyle & FSYS_VOS ? 1 : 0)
4775 			    + (eStyle & FSYS_UNX ? 1 : 0)
4776 			    + (eStyle & FSYS_DOS ? 1 : 0)
4777 			    + (eStyle & FSYS_MAC ? 1 : 0))
4778 	{
4779 		case 0:
4780 			return false;
4781 
4782 		case 1:
4783 			break;
4784 
4785 		default:
4786 			if (eStyle & FSYS_VOS
4787 				&& pFSysEnd - pFSysBegin >= 2
4788 				&& pFSysBegin[0] == '/'
4789 				&& pFSysBegin[1] == '/')
4790 			{
4791 				if (pFSysEnd - pFSysBegin >= 3
4792 					&& pFSysBegin[2] == '.'
4793 					&& (pFSysEnd - pFSysBegin == 3 || pFSysBegin[3] == '/'))
4794 				{
4795 					eStyle = FSYS_VOS; // Production T1
4796 					break;
4797 				}
4798 
4799 				sal_Unicode const * p = pFSysBegin + 2;
4800 				rtl::OUString aHost;
4801 				if (parseHost(p, pFSysEnd, aHost)
4802 					&& (p == pFSysEnd || *p == '/'))
4803 				{
4804 					eStyle = FSYS_VOS; // Production T2
4805 					break;
4806 				}
4807 			}
4808 
4809 			if (eStyle & FSYS_DOS
4810 				&& pFSysEnd - pFSysBegin >= 2
4811 				&& pFSysBegin[0] == '\\'
4812 				&& pFSysBegin[1] == '\\')
4813 			{
4814 				sal_Unicode const * p = pFSysBegin + 2;
4815 				rtl::OUString aHost;
4816 				if (parseHost(p, pFSysEnd, aHost)
4817 					&& (p == pFSysEnd || *p == '\\'))
4818 				{
4819 					eStyle = FSYS_DOS; // Production T3
4820 					break;
4821 				}
4822 			}
4823 
4824 			if (eStyle & FSYS_DOS
4825 				&& pFSysEnd - pFSysBegin >= 2
4826 				&& INetMIME::isAlpha(pFSysBegin[0])
4827 				&& pFSysBegin[1] == ':'
4828 				&& (pFSysEnd - pFSysBegin == 2
4829                     || pFSysBegin[2] == '/'
4830                     || pFSysBegin[2] == '\\'))
4831 			{
4832 				eStyle = FSYS_DOS; // Productions T4, T5
4833 				break;
4834 			}
4835 
4836 			if (!(eStyle & (FSYS_UNX | FSYS_DOS | FSYS_MAC)))
4837 				return false;
4838 
4839 			eStyle = guessFSysStyleByCounting(pFSysBegin, pFSysEnd, eStyle);
4840 			    // Production T6
4841 			break;
4842 	}
4843 
4844 	rtl::OUStringBuffer aSynAbsURIRef(rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("file://")));
4845 
4846 	switch (eStyle)
4847 	{
4848 		case FSYS_VOS:
4849 		{
4850 			sal_Unicode const * p = pFSysBegin;
4851 			if (pFSysEnd - p < 2 || *p++ != '/' || *p++ != '/')
4852 				return false;
4853 			if (p != pFSysEnd && *p == '.'
4854 				&& (pFSysEnd - p == 1 || p[1] == '/'))
4855 				++p;
4856 			for (; p != pFSysEnd; ++p)
4857 				switch (*p)
4858 				{
4859 					case '#':
4860 					case '%':
4861 						appendEscape(aSynAbsURIRef, '%', *p);
4862 						break;
4863 
4864 					default:
4865 						aSynAbsURIRef.append(*p);
4866 						break;
4867 				}
4868 			break;
4869 		}
4870 
4871 		case FSYS_UNX:
4872 		{
4873 			sal_Unicode const * p = pFSysBegin;
4874 			if (p != pFSysEnd && *p != '/')
4875 				return false;
4876 			for (; p != pFSysEnd; ++p)
4877 				switch (*p)
4878 				{
4879 					case '|':
4880 					case '#':
4881 					case '%':
4882 						appendEscape(aSynAbsURIRef, '%', *p);
4883 						break;
4884 
4885 					default:
4886 						aSynAbsURIRef.append(*p);
4887 						break;
4888 				}
4889 			break;
4890 		}
4891 
4892 		case FSYS_DOS:
4893 		{
4894 			sal_uInt32 nAltDelimiter = 0x80000000;
4895 			sal_Unicode const * p = pFSysBegin;
4896 			if (pFSysEnd - p >= 3 && p[0] == '\\' && p[1] == '\\')
4897 				p += 2;
4898 			else
4899 			{
4900 				aSynAbsURIRef.append(sal_Unicode('/'));
4901 				if (pFSysEnd - p >= 2
4902                     && INetMIME::isAlpha(p[0])
4903 					&& p[1] == ':'
4904                     && (pFSysEnd - p == 2 || p[2] == '\\' || p[2] == '/'))
4905 					nAltDelimiter = '/';
4906 			}
4907 			for (; p != pFSysEnd; ++p)
4908 				if (*p == '\\' || *p == nAltDelimiter)
4909 					aSynAbsURIRef.append(sal_Unicode('/'));
4910 				else
4911 					switch (*p)
4912 					{
4913 						case '/':
4914 						case '#':
4915 						case '%':
4916 							appendEscape(aSynAbsURIRef, '%', *p);
4917 							break;
4918 
4919 						default:
4920 							aSynAbsURIRef.append(*p);
4921 							break;
4922 					}
4923 			break;
4924 		}
4925 
4926 		case FSYS_MAC:
4927 			aSynAbsURIRef.append(sal_Unicode('/'));
4928 			{for (sal_Unicode const * p = pFSysBegin; p != pFSysEnd; ++p)
4929 				switch (*p)
4930 				{
4931 					case ':':
4932 						aSynAbsURIRef.append(sal_Unicode('/'));
4933 						break;
4934 
4935 					case '/':
4936 					case '|':
4937 					case '#':
4938 					case '%':
4939 						appendEscape(aSynAbsURIRef, '%', *p);
4940 						break;
4941 
4942 					default:
4943 						aSynAbsURIRef.append(*p);
4944 						break;
4945 				}
4946 			}
4947 			break;
4948 
4949         default:
4950             OSL_ASSERT(false);
4951             break;
4952 	}
4953 
4954 	INetURLObject aTemp(aSynAbsURIRef.makeStringAndClear(), WAS_ENCODED,
4955 		RTL_TEXTENCODING_UTF8);
4956 	if (aTemp.HasError())
4957 		return false;
4958 
4959 	*this = aTemp;
4960 	return true;
4961 }
4962 
4963 //============================================================================
getFSysPath(FSysStyle eStyle,sal_Unicode * pDelimiter) const4964 rtl::OUString INetURLObject::getFSysPath(FSysStyle eStyle,
4965 									 sal_Unicode * pDelimiter) const
4966 {
4967 	if (m_eScheme != INET_PROT_FILE)
4968 		return rtl::OUString();
4969 
4970 	if ((eStyle & FSYS_VOS ? 1 : 0)
4971 		        + (eStyle & FSYS_UNX ? 1 : 0)
4972 			    + (eStyle & FSYS_DOS ? 1 : 0)
4973 		        + (eStyle & FSYS_MAC ? 1 : 0)
4974 		    > 1)
4975 	{
4976 		eStyle = eStyle & FSYS_VOS
4977 			     && m_aHost.isPresent()
4978 			     && m_aHost.getLength() > 0 ?
4979 			         FSYS_VOS :
4980 				 hasDosVolume(eStyle)
4981                  || ((eStyle & FSYS_DOS) != 0
4982                     && m_aHost.isPresent()
4983                     && m_aHost.getLength() > 0) ?
4984 				     FSYS_DOS :
4985 			     eStyle & FSYS_UNX
4986 			     && (!m_aHost.isPresent() || m_aHost.getLength() == 0) ?
4987 			         FSYS_UNX :
4988 			         FSysStyle(0);
4989 	}
4990 
4991 	switch (eStyle)
4992 	{
4993 		case FSYS_VOS:
4994 		{
4995 			if (pDelimiter)
4996 				*pDelimiter = '/';
4997 
4998 			rtl::OUStringBuffer aSynFSysPath;
4999 			aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
5000 			if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5001 				aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5002 									   RTL_TEXTENCODING_UTF8));
5003 			else
5004 				aSynFSysPath.append(sal_Unicode('.'));
5005 			aSynFSysPath.append(decode(m_aPath, '%', DECODE_WITH_CHARSET,
5006 								   RTL_TEXTENCODING_UTF8));
5007 			return aSynFSysPath.makeStringAndClear();
5008 		}
5009 
5010 		case FSYS_UNX:
5011 		{
5012 			if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5013 				return rtl::OUString();
5014 
5015 			if (pDelimiter)
5016 				*pDelimiter = '/';
5017 
5018 			return decode(m_aPath, '%', DECODE_WITH_CHARSET,
5019 						  RTL_TEXTENCODING_UTF8);
5020 		}
5021 
5022 		case FSYS_DOS:
5023 		{
5024 			if (pDelimiter)
5025 				*pDelimiter = '\\';
5026 
5027 			rtl::OUStringBuffer aSynFSysPath;
5028 			if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5029 			{
5030 				aSynFSysPath.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\\\"));
5031 				aSynFSysPath.append(decode(m_aHost, '%', DECODE_WITH_CHARSET,
5032 									   RTL_TEXTENCODING_UTF8));
5033 				aSynFSysPath.append(sal_Unicode('\\'));
5034 			}
5035 			sal_Unicode const * p
5036 				= m_aAbsURIRef.getStr() + m_aPath.getBegin();
5037 			sal_Unicode const * pEnd = p + m_aPath.getLength();
5038 			DBG_ASSERT(p < pEnd && *p == '/',
5039 					   "INetURLObject::getFSysPath(): Bad path");
5040 			++p;
5041 			while (p < pEnd)
5042 			{
5043 				EscapeType eEscapeType;
5044 				sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5045 											 RTL_TEXTENCODING_UTF8,
5046 											 eEscapeType);
5047 				if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5048 					aSynFSysPath.append(sal_Unicode('\\'));
5049 				else
5050 					aSynFSysPath.appendUtf32(nUTF32);
5051 			}
5052 			return aSynFSysPath.makeStringAndClear();
5053 		}
5054 
5055 		case FSYS_MAC:
5056 		{
5057 			if (m_aHost.isPresent() && m_aHost.getLength() > 0)
5058 				return rtl::OUString();
5059 
5060 			if (pDelimiter)
5061 				*pDelimiter = ':';
5062 
5063 			rtl::OUStringBuffer aSynFSysPath;
5064 			sal_Unicode const * p
5065 				= m_aAbsURIRef.getStr() + m_aPath.getBegin();
5066 			sal_Unicode const * pEnd = p + m_aPath.getLength();
5067 			DBG_ASSERT(p < pEnd && *p == '/',
5068 					   "INetURLObject::getFSysPath(): Bad path");
5069 			++p;
5070 			while (p < pEnd)
5071 			{
5072 				EscapeType eEscapeType;
5073 				sal_uInt32 nUTF32 = getUTF32(p, pEnd, false, '%', WAS_ENCODED,
5074 											 RTL_TEXTENCODING_UTF8,
5075 											 eEscapeType);
5076 				if (eEscapeType == ESCAPE_NO && nUTF32 == '/')
5077 					aSynFSysPath.append(sal_Unicode(':'));
5078 				else
5079 					aSynFSysPath.appendUtf32(nUTF32);
5080 			}
5081 			return aSynFSysPath.makeStringAndClear();
5082 		}
5083 
5084         default:
5085             return rtl::OUString();
5086 	}
5087 }
5088 
5089 //============================================================================
HasMsgId() const5090 bool INetURLObject::HasMsgId() const
5091 {
5092 	if (m_eScheme != INET_PROT_POP3)
5093 		return false;
5094 	sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5095 	sal_Unicode const * pEnd = p + m_aPath.getLength();
5096 	for (; p < pEnd; ++p)
5097 		if (*p == '<')
5098 			return true;
5099 	return false;
5100 }
5101 
5102 //============================================================================
GetMsgId(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5103 rtl::OUString INetURLObject::GetMsgId(DecodeMechanism eMechanism,
5104 								  rtl_TextEncoding eCharset) const
5105 {
5106 	if (m_eScheme != INET_PROT_POP3)
5107 		return rtl::OUString();
5108 	sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
5109 	sal_Unicode const * pEnd = p + m_aPath.getLength();
5110 	for (; p < pEnd; ++p)
5111 		if (*p == '<')
5112 			return decode(p, pEnd, getEscapePrefix(), eMechanism, eCharset);
5113 	return rtl::OUString();
5114 }
5115 
5116 //============================================================================
5117 // static
appendUCS4Escape(rtl::OUStringBuffer & rTheText,sal_Char cEscapePrefix,sal_uInt32 nUCS4)5118 void INetURLObject::appendUCS4Escape(rtl::OUStringBuffer & rTheText,
5119 									 sal_Char cEscapePrefix, sal_uInt32 nUCS4)
5120 {
5121 	DBG_ASSERT(nUCS4 < 0x80000000,
5122 			   "INetURLObject::appendUCS4Escape(): Bad char");
5123 	if (nUCS4 < 0x80)
5124 		appendEscape(rTheText, cEscapePrefix, nUCS4);
5125 	else if (nUCS4 < 0x800)
5126 	{
5127 		appendEscape(rTheText, cEscapePrefix, nUCS4 >> 6 | 0xC0);
5128 		appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5129 	}
5130 	else if (nUCS4 < 0x10000)
5131 	{
5132 		appendEscape(rTheText, cEscapePrefix, nUCS4 >> 12 | 0xE0);
5133 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5134 		appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5135 	}
5136 	else if (nUCS4 < 0x200000)
5137 	{
5138 		appendEscape(rTheText, cEscapePrefix, nUCS4 >> 18 | 0xF0);
5139 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5140 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5141 		appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5142 	}
5143 	else if (nUCS4 < 0x4000000)
5144 	{
5145 		appendEscape(rTheText, cEscapePrefix, nUCS4 >> 24 | 0xF8);
5146 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5147 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5148 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5149 		appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5150 	}
5151 	else
5152 	{
5153 		appendEscape(rTheText, cEscapePrefix, nUCS4 >> 30 | 0xFC);
5154 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 24 & 0x3F) | 0x80);
5155 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 18 & 0x3F) | 0x80);
5156 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 12 & 0x3F) | 0x80);
5157 		appendEscape(rTheText, cEscapePrefix, (nUCS4 >> 6 & 0x3F) | 0x80);
5158 		appendEscape(rTheText, cEscapePrefix, (nUCS4 & 0x3F) | 0x80);
5159 	}
5160 }
5161 
5162 //============================================================================
5163 // static
appendUCS4(rtl::OUStringBuffer & rTheText,sal_uInt32 nUCS4,EscapeType eEscapeType,bool bOctets,Part ePart,sal_Char cEscapePrefix,rtl_TextEncoding eCharset,bool bKeepVisibleEscapes)5164 void INetURLObject::appendUCS4(rtl::OUStringBuffer& rTheText, sal_uInt32 nUCS4,
5165 							   EscapeType eEscapeType, bool bOctets,
5166 							   Part ePart, sal_Char cEscapePrefix,
5167 							   rtl_TextEncoding eCharset,
5168 							   bool bKeepVisibleEscapes)
5169 {
5170 	bool bEscape;
5171 	rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
5172 	switch (eEscapeType)
5173 	{
5174 		case ESCAPE_NO:
5175 			if (mustEncode(nUCS4, ePart))
5176 			{
5177 				bEscape = true;
5178 				eTargetCharset = bOctets ? RTL_TEXTENCODING_ISO_8859_1 :
5179 										   RTL_TEXTENCODING_UTF8;
5180 			}
5181 			else
5182 				bEscape = false;
5183 			break;
5184 
5185 		case ESCAPE_OCTET:
5186 			bEscape = true;
5187 			eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
5188 			break;
5189 
5190 		case ESCAPE_UTF32:
5191 			if (mustEncode(nUCS4, ePart))
5192 			{
5193 				bEscape = true;
5194 				eTargetCharset = eCharset;
5195 			}
5196 			else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
5197 			{
5198 				bEscape = true;
5199 				eTargetCharset = RTL_TEXTENCODING_ASCII_US;
5200 			}
5201 			else
5202 				bEscape = false;
5203 			break;
5204 		default:
5205 			bEscape = false;
5206 	}
5207 
5208 	if (bEscape)
5209 	{
5210 		switch (eTargetCharset)
5211 		{
5212 			default:
5213 				DBG_ERROR("INetURLObject::appendUCS4(): Unsupported charset");
5214 			case RTL_TEXTENCODING_ASCII_US:
5215 			case RTL_TEXTENCODING_ISO_8859_1:
5216 				appendEscape(rTheText, cEscapePrefix, nUCS4);
5217 				break;
5218 
5219 			case RTL_TEXTENCODING_UTF8:
5220 				appendUCS4Escape(rTheText, cEscapePrefix, nUCS4);
5221 				break;
5222 		}
5223 	}
5224 	else
5225 		rTheText.append(sal_Unicode(nUCS4));
5226 }
5227 
5228 //============================================================================
5229 // static
getUTF32(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,bool bOctets,sal_Char cEscapePrefix,EncodeMechanism eMechanism,rtl_TextEncoding eCharset,EscapeType & rEscapeType)5230 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
5231 								   sal_Unicode const * pEnd, bool bOctets,
5232 								   sal_Char cEscapePrefix,
5233 								   EncodeMechanism eMechanism,
5234 								   rtl_TextEncoding eCharset,
5235 								   EscapeType & rEscapeType)
5236 {
5237 	DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
5238 	sal_uInt32 nUTF32 = bOctets ? *rBegin++ :
5239 								  INetMIME::getUTF32Character(rBegin, pEnd);
5240 	switch (eMechanism)
5241 	{
5242 		case ENCODE_ALL:
5243 			rEscapeType = ESCAPE_NO;
5244 			break;
5245 
5246 		case WAS_ENCODED:
5247 		{
5248 			int nWeight1;
5249 			int nWeight2;
5250 			if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5251 				&& (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
5252 				&& (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
5253 			{
5254 				rBegin += 2;
5255 				nUTF32 = nWeight1 << 4 | nWeight2;
5256 				switch (eCharset)
5257 				{
5258 					default:
5259 						DBG_ERROR(
5260 							"INetURLObject::getUTF32(): Unsupported charset");
5261 					case RTL_TEXTENCODING_ASCII_US:
5262 						rEscapeType = INetMIME::isUSASCII(nUTF32) ?
5263 										  ESCAPE_UTF32 : ESCAPE_OCTET;
5264 						break;
5265 
5266 					case RTL_TEXTENCODING_ISO_8859_1:
5267 						rEscapeType = ESCAPE_UTF32;
5268 						break;
5269 
5270 					case RTL_TEXTENCODING_UTF8:
5271 						if (INetMIME::isUSASCII(nUTF32))
5272 							rEscapeType = ESCAPE_UTF32;
5273 						else
5274 						{
5275 							if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
5276 							{
5277 								sal_uInt32 nEncoded;
5278 								int nShift;
5279 								sal_uInt32 nMin;
5280 								if (nUTF32 <= 0xDF)
5281 								{
5282 									nEncoded = (nUTF32 & 0x1F) << 6;
5283 									nShift = 0;
5284 									nMin = 0x80;
5285 								}
5286 								else if (nUTF32 <= 0xEF)
5287 								{
5288 									nEncoded = (nUTF32 & 0x0F) << 12;
5289 									nShift = 6;
5290 									nMin = 0x800;
5291 								}
5292 								else
5293 								{
5294 									nEncoded = (nUTF32 & 0x07) << 18;
5295 									nShift = 12;
5296 									nMin = 0x10000;
5297 								}
5298 								sal_Unicode const * p = rBegin;
5299 								bool bUTF8 = true;
5300 								for (;;)
5301 								{
5302 									if (pEnd - p < 3
5303 										|| p[0] != cEscapePrefix
5304 										|| (nWeight1
5305 											   = INetMIME::getHexWeight(p[1]))
5306 											   < 8
5307 										|| nWeight1 > 11
5308 										|| (nWeight2
5309 											   = INetMIME::getHexWeight(p[2]))
5310 										       < 0)
5311 									{
5312 										bUTF8 = false;
5313 										break;
5314 									}
5315 									p += 3;
5316 									nEncoded
5317 										|= ((nWeight1 & 3) << 4 | nWeight2)
5318 											   << nShift;
5319 									if (nShift == 0)
5320 										break;
5321 									nShift -= 6;
5322 								}
5323 								if (bUTF8 && nEncoded >= nMin
5324 									&& !INetMIME::isHighSurrogate(nEncoded)
5325 									&& !INetMIME::isLowSurrogate(nEncoded)
5326 									&& nEncoded <= 0x10FFFF)
5327 								{
5328 									rBegin = p;
5329 									nUTF32 = nEncoded;
5330 									rEscapeType = ESCAPE_UTF32;
5331 									break;
5332 								}
5333 							}
5334 							rEscapeType = ESCAPE_OCTET;
5335 						}
5336 						break;
5337 				}
5338 			}
5339 			else
5340 				rEscapeType = ESCAPE_NO;
5341 			break;
5342 		}
5343 
5344 		case NOT_CANONIC:
5345 		{
5346 			int nWeight1;
5347 			int nWeight2;
5348 			if (nUTF32 == sal_uChar(cEscapePrefix) && rBegin + 1 < pEnd
5349 				&& ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
5350 				&& ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
5351 			{
5352 				rBegin += 2;
5353 				nUTF32 = nWeight1 << 4 | nWeight2;
5354 				rEscapeType = ESCAPE_OCTET;
5355 			}
5356 			else
5357 				rEscapeType = ESCAPE_NO;
5358 			break;
5359 		}
5360 	}
5361 	return nUTF32;
5362 }
5363 
5364 //============================================================================
5365 // static
scanDomain(sal_Unicode const * & rBegin,sal_Unicode const * pEnd,bool bEager)5366 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
5367 									 sal_Unicode const * pEnd,
5368 									 bool bEager)
5369 {
5370 	enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
5371 	State eState = STATE_DOT;
5372 	sal_Int32 nLabels = 0;
5373 	sal_Unicode const * pLastAlphanumeric = 0;
5374 	for (sal_Unicode const * p = rBegin;; ++p)
5375 		switch (eState)
5376 		{
5377 			case STATE_DOT:
5378 				if (p != pEnd && INetMIME::isAlphanumeric(*p))
5379 				{
5380 					++nLabels;
5381 					eState = STATE_LABEL;
5382 					break;
5383 				}
5384 				if (bEager || nLabels == 0)
5385 					return 0;
5386 				rBegin = p - 1;
5387 				return nLabels;
5388 
5389 			case STATE_LABEL:
5390 				if (p != pEnd)
5391                 {
5392 					if (INetMIME::isAlphanumeric(*p))
5393 						break;
5394 					else if (*p == '.')
5395 					{
5396 						eState = STATE_DOT;
5397 						break;
5398 					}
5399 					else if (*p == '-')
5400 					{
5401 						pLastAlphanumeric = p;
5402 						eState = STATE_HYPHEN;
5403 						break;
5404 					}
5405                 }
5406 				rBegin = p;
5407 				return nLabels;
5408 
5409 			case STATE_HYPHEN:
5410 				if (p != pEnd)
5411                 {
5412 					if (INetMIME::isAlphanumeric(*p))
5413 					{
5414 						eState = STATE_LABEL;
5415 						break;
5416 					}
5417 					else if (*p == '-')
5418 						break;
5419                 }
5420 				if (bEager)
5421 					return 0;
5422 				rBegin = pLastAlphanumeric;
5423 				return nLabels;
5424 		}
5425 }
5426 
5427 //============================================================================
5428 // static
scanIPv6reference(sal_Unicode const * & rBegin,sal_Unicode const * pEnd)5429 bool INetURLObject::scanIPv6reference(sal_Unicode const *& rBegin,
5430 									  sal_Unicode const * pEnd)
5431 {
5432     if (rBegin != pEnd && *rBegin == '[') {
5433         sal_Unicode const * p = rBegin + 1;
5434         //TODO: check for valid IPv6address (RFC 2373):
5435         while (p != pEnd && (INetMIME::isHexDigit(*p) || *p == ':' || *p == '.'))
5436         {
5437             ++p;
5438         }
5439         if (p != pEnd && *p == ']') {
5440             rBegin = p + 1;
5441             return true;
5442         }
5443     }
5444     return false;
5445 }
5446 
5447 //============================================================================
GetPartBeforeLastName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5448 rtl::OUString INetURLObject::GetPartBeforeLastName(DecodeMechanism eMechanism,
5449 											   rtl_TextEncoding eCharset)
5450 	const
5451 {
5452 	if (!checkHierarchical())
5453 		return rtl::OUString();
5454 	INetURLObject aTemp(*this);
5455 	aTemp.clearFragment();
5456 	aTemp.clearQuery();
5457 	aTemp.removeSegment(LAST_SEGMENT, false);
5458 	aTemp.setFinalSlash();
5459 	return aTemp.GetMainURL(eMechanism, eCharset);
5460 }
5461 
5462 //============================================================================
GetLastName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5463 rtl::OUString INetURLObject::GetLastName(DecodeMechanism eMechanism,
5464 									 rtl_TextEncoding eCharset) const
5465 {
5466 	return getName(LAST_SEGMENT, true, eMechanism, eCharset);
5467 }
5468 
5469 //============================================================================
GetFileExtension(DecodeMechanism eMechanism,rtl_TextEncoding eCharset) const5470 rtl::OUString INetURLObject::GetFileExtension(DecodeMechanism eMechanism,
5471 										  rtl_TextEncoding eCharset) const
5472 {
5473 	return getExtension(LAST_SEGMENT, false, eMechanism, eCharset);
5474 }
5475 
5476 //============================================================================
CutLastName()5477 bool INetURLObject::CutLastName()
5478 {
5479 	INetURLObject aTemp(*this);
5480 	aTemp.clearFragment();
5481 	aTemp.clearQuery();
5482 	if (!aTemp.removeSegment(LAST_SEGMENT, false))
5483 		return false;
5484 	*this = aTemp;
5485 	return true;
5486 }
5487 
5488 //============================================================================
PathToFileName() const5489 rtl::OUString INetURLObject::PathToFileName() const
5490 {
5491 	if (m_eScheme != INET_PROT_FILE)
5492 		return rtl::OUString();
5493 	rtl::OUString aSystemPath;
5494 	if (osl::FileBase::getSystemPathFromFileURL(
5495 				decode(m_aAbsURIRef.getStr(),
5496 					   m_aAbsURIRef.getStr() + m_aPath.getEnd(),
5497 					   getEscapePrefix(), NO_DECODE, RTL_TEXTENCODING_UTF8),
5498 				aSystemPath)
5499 			!= osl::FileBase::E_None)
5500 		return rtl::OUString();
5501     return aSystemPath;
5502 }
5503 
5504 //============================================================================
GetFull() const5505 rtl::OUString INetURLObject::GetFull() const
5506 {
5507 	INetURLObject aTemp(*this);
5508 	aTemp.removeFinalSlash();
5509 	return aTemp.PathToFileName();
5510 }
5511 
5512 //============================================================================
GetPath() const5513 rtl::OUString INetURLObject::GetPath() const
5514 {
5515 	INetURLObject aTemp(*this);
5516 	aTemp.removeSegment(LAST_SEGMENT, true);
5517 	aTemp.removeFinalSlash();
5518 	return aTemp.PathToFileName();
5519 }
5520 
5521 //============================================================================
SetBase(rtl::OUString const & rTheBase)5522 void INetURLObject::SetBase(rtl::OUString const & rTheBase)
5523 {
5524 	setBase(rTheBase, LAST_SEGMENT, true, ENCODE_ALL);
5525 }
5526 
5527 //============================================================================
GetBase() const5528 rtl::OUString INetURLObject::GetBase() const
5529 {
5530 	return getBase(LAST_SEGMENT, true, DECODE_WITH_CHARSET);
5531 }
5532 
5533 //============================================================================
SetName(rtl::OUString const & rTheName,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)5534 void INetURLObject::SetName(rtl::OUString const & rTheName,
5535 							EncodeMechanism eMechanism,
5536 							rtl_TextEncoding eCharset)
5537 {
5538 	INetURLObject aTemp(*this);
5539 	if (aTemp.removeSegment(LAST_SEGMENT, true)
5540 		&& aTemp.insertName(rTheName, false, LAST_SEGMENT, true, eMechanism,
5541 							eCharset))
5542 		*this = aTemp;
5543 }
5544 
5545 //============================================================================
CutName(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)5546 rtl::OUString INetURLObject::CutName(DecodeMechanism eMechanism,
5547 								 rtl_TextEncoding eCharset)
5548 {
5549 	rtl::OUString aTheName(getName(LAST_SEGMENT, true, eMechanism, eCharset));
5550 	return removeSegment(LAST_SEGMENT, true) ? aTheName : rtl::OUString();
5551 }
5552 
5553 //============================================================================
SetExtension(rtl::OUString const & rTheExtension,EncodeMechanism eMechanism,rtl_TextEncoding eCharset)5554 void INetURLObject::SetExtension(rtl::OUString const & rTheExtension,
5555 								 EncodeMechanism eMechanism,
5556 								 rtl_TextEncoding eCharset)
5557 {
5558 	setExtension(rTheExtension, LAST_SEGMENT, false, eMechanism, eCharset);
5559 }
5560 
5561 //============================================================================
CutExtension(DecodeMechanism eMechanism,rtl_TextEncoding eCharset)5562 rtl::OUString INetURLObject::CutExtension(DecodeMechanism eMechanism,
5563 									  rtl_TextEncoding eCharset)
5564 {
5565 	rtl::OUString aTheExtension(getExtension(LAST_SEGMENT, false, eMechanism,
5566 										 eCharset));
5567 	return removeExtension(LAST_SEGMENT, false)
5568 		? aTheExtension : rtl::OUString();
5569 }
5570 
5571 //============================================================================
IsCaseSensitive() const5572 bool INetURLObject::IsCaseSensitive() const
5573 {
5574 	return true;
5575 }
5576