xref: /aoo41x/main/svl/source/misc/urihelper.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_svl.hxx"
30 #include <svl/urihelper.hxx>
31 #include <com/sun/star/beans/XPropertySet.hpp>
32 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp"
33 #include "com/sun/star/lang/XMultiComponentFactory.hpp"
34 #include "com/sun/star/ucb/Command.hpp"
35 #include <com/sun/star/ucb/FileSystemNotation.hpp>
36 #include "com/sun/star/ucb/IllegalIdentifierException.hpp"
37 #include "com/sun/star/ucb/UnsupportedCommandException.hpp"
38 #include "com/sun/star/ucb/XCommandEnvironment.hpp"
39 #include "com/sun/star/ucb/XCommandProcessor.hpp"
40 #include "com/sun/star/ucb/XContent.hpp"
41 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp"
42 #include "com/sun/star/ucb/XContentProvider.hpp"
43 #include <com/sun/star/ucb/XContentProviderManager.hpp>
44 #include "com/sun/star/uno/Any.hxx"
45 #include "com/sun/star/uno/Exception.hpp"
46 #include "com/sun/star/uno/Reference.hxx"
47 #include "com/sun/star/uno/RuntimeException.hpp"
48 #include "com/sun/star/uno/Sequence.hxx"
49 #include "com/sun/star/uno/XComponentContext.hpp"
50 #include "com/sun/star/uno/XInterface.hpp"
51 #include "com/sun/star/uri/UriReferenceFactory.hpp"
52 #include "com/sun/star/uri/XUriReference.hpp"
53 #include "com/sun/star/uri/XUriReferenceFactory.hpp"
54 #include "cppuhelper/exc_hlp.hxx"
55 #include "comphelper/processfactory.hxx"
56 #include "osl/diagnose.h"
57 #include "rtl/ustrbuf.hxx"
58 #include "rtl/ustring.h"
59 #include "rtl/ustring.hxx"
60 #include "sal/types.h"
61 #include <tools/debug.hxx>
62 #include <tools/inetmime.hxx>
63 #include <ucbhelper/contentbroker.hxx>
64 #include <unotools/charclass.hxx>
65 #include "rtl/instance.hxx"
66 
67 namespace unnamed_svl_urihelper {}
68 using namespace unnamed_svl_urihelper;
69 	// unnamed namespaces don't work well yet...
70 
71 namespace css = com::sun::star;
72 using namespace com::sun::star;
73 
74 //============================================================================
75 //
76 //  SmartRel2Abs
77 //
78 //============================================================================
79 
80 namespace unnamed_svl_urihelper {
81 
82 inline UniString toUniString(ByteString const & rString)
83 {
84 	return UniString(rString, RTL_TEXTENCODING_ISO_8859_1);
85 }
86 
87 inline UniString toUniString(UniString const & rString)
88 {
89 	return rString;
90 }
91 
92 template< typename Str >
93 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,
94 								   Str const & rTheRelURIRef,
95 								   Link const & rMaybeFileHdl,
96 								   bool bCheckFileExists,
97 								   bool bIgnoreFragment,
98 								   INetURLObject::EncodeMechanism
99 								       eEncodeMechanism,
100 								   INetURLObject::DecodeMechanism
101 								       eDecodeMechanism,
102 								   rtl_TextEncoding eCharset,
103 								   bool bRelativeNonURIs,
104 								   INetURLObject::FSysStyle eStyle)
105 {
106 	// Backwards compatibility:
107 	if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#')
108 		return toUniString(rTheRelURIRef);
109 
110 	INetURLObject aAbsURIRef;
111     if (rTheBaseURIRef.HasError())
112         aAbsURIRef.
113             SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
114     else
115     {
116         bool bWasAbsolute;
117 		aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
118                                                  bWasAbsolute,
119                                                  bIgnoreFragment,
120                                                  eEncodeMechanism,
121                                                  eCharset,
122                                                  bRelativeNonURIs,
123                                                  eStyle);
124         if (bCheckFileExists
125             && !bWasAbsolute
126             && (aAbsURIRef.GetProtocol() == INET_PROT_FILE))
127         {
128             INetURLObject aNonFileURIRef;
129             aNonFileURIRef.SetSmartURL(rTheRelURIRef,
130                                        eEncodeMechanism,
131                                        eCharset,
132                                        eStyle);
133             if (!aNonFileURIRef.HasError()
134                 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE)
135             {
136                 bool bMaybeFile = false;
137                 if (rMaybeFileHdl.IsSet())
138                 {
139                     UniString aFilePath(toUniString(rTheRelURIRef));
140                     bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0;
141                 }
142                 if (!bMaybeFile)
143                     aAbsURIRef = aNonFileURIRef;
144             }
145         }
146     }
147 	return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
148 }
149 
150 }
151 
152 UniString
153 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
154 						ByteString const & rTheRelURIRef,
155 						Link const & rMaybeFileHdl,
156 						bool bCheckFileExists,
157 						bool bIgnoreFragment,
158 						INetURLObject::EncodeMechanism eEncodeMechanism,
159 						INetURLObject::DecodeMechanism eDecodeMechanism,
160 						rtl_TextEncoding eCharset,
161 						bool bRelativeNonURIs,
162 						INetURLObject::FSysStyle eStyle)
163 {
164 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
165 							 bCheckFileExists, bIgnoreFragment,
166 							 eEncodeMechanism, eDecodeMechanism, eCharset,
167 							 bRelativeNonURIs, eStyle);
168 }
169 
170 UniString
171 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
172 						UniString const & rTheRelURIRef,
173 						Link const & rMaybeFileHdl,
174 						bool bCheckFileExists,
175 						bool bIgnoreFragment,
176 						INetURLObject::EncodeMechanism eEncodeMechanism,
177 						INetURLObject::DecodeMechanism eDecodeMechanism,
178 						rtl_TextEncoding eCharset,
179 						bool bRelativeNonURIs,
180 						INetURLObject::FSysStyle eStyle)
181 {
182 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
183 							 bCheckFileExists, bIgnoreFragment,
184 							 eEncodeMechanism, eDecodeMechanism, eCharset,
185 							 bRelativeNonURIs, eStyle);
186 }
187 
188 //============================================================================
189 //
190 //  SetMaybeFileHdl
191 //
192 //============================================================================
193 
194 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; }
195 
196 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl)
197 {
198 	MaybeFileHdl::get() = rTheMaybeFileHdl;
199 }
200 
201 //============================================================================
202 //
203 //  GetMaybeFileHdl
204 //
205 //============================================================================
206 
207 Link URIHelper::GetMaybeFileHdl()
208 {
209 	return MaybeFileHdl::get();
210 }
211 
212 namespace {
213 
214 bool isAbsoluteHierarchicalUriReference(
215     css::uno::Reference< css::uri::XUriReference > const & uriReference)
216 {
217     return uriReference.is() && uriReference->isAbsolute()
218         && uriReference->isHierarchical() && !uriReference->hasRelativePath();
219 }
220 
221 // To improve performance, assume that if for any prefix URL of a given
222 // hierarchical URL either a UCB content cannot be created, or the UCB content
223 // does not support the getCasePreservingURL command, then this will hold for
224 // any other prefix URL of the given URL, too:
225 enum Result { Success, GeneralFailure, SpecificFailure };
226 
227 Result normalizePrefix(
228     css::uno::Reference< css::ucb::XContentProvider > const & broker,
229     rtl::OUString const & uri, rtl::OUString * normalized)
230 {
231     OSL_ASSERT(broker.is() && normalized != 0);
232     css::uno::Reference< css::ucb::XContent > content;
233     try {
234         content = broker->queryContent(
235             css::uno::Reference< css::ucb::XContentIdentifierFactory >(
236                 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier(
237                     uri));
238     } catch (css::ucb::IllegalIdentifierException &) {}
239     if (!content.is()) {
240         return GeneralFailure;
241     }
242     try {
243         #if OSL_DEBUG_LEVEL > 0
244         bool ok =
245         #endif
246             (css::uno::Reference< css::ucb::XCommandProcessor >(
247                    content, css::uno::UNO_QUERY_THROW)->execute(
248                        css::ucb::Command(
249                            rtl::OUString(
250                                RTL_CONSTASCII_USTRINGPARAM(
251                                    "getCasePreservingURL")),
252                            -1, css::uno::Any()),
253                        0,
254                        css::uno::Reference< css::ucb::XCommandEnvironment >())
255                >>= *normalized);
256         OSL_ASSERT(ok);
257     } catch (css::uno::RuntimeException &) {
258         throw;
259     } catch (css::ucb::UnsupportedCommandException &) {
260         return GeneralFailure;
261     } catch (css::uno::Exception &) {
262         return SpecificFailure;
263     }
264     return Success;
265 }
266 
267 rtl::OUString normalize(
268     css::uno::Reference< css::ucb::XContentProvider > const & broker,
269     css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
270     rtl::OUString const & uriReference)
271 {
272     // normalizePrefix can potentially fail (a typically example being a file
273     // URL that denotes a non-existing resource); in such a case, try to
274     // normalize as long a prefix of the given URL as possible (i.e., normalize
275     // all the existing directories within the path):
276     rtl::OUString normalized;
277     sal_Int32 n = uriReference.indexOf('#');
278     normalized = n == -1 ? uriReference : uriReference.copy(0, n);
279     switch (normalizePrefix(broker, normalized, &normalized)) {
280     case Success:
281         return n == -1 ? normalized : normalized + uriReference.copy(n);
282     case GeneralFailure:
283         return uriReference;
284     case SpecificFailure:
285     default:
286         break;
287     }
288     css::uno::Reference< css::uri::XUriReference > ref(
289         uriFactory->parse(uriReference));
290     if (!isAbsoluteHierarchicalUriReference(ref)) {
291         return uriReference;
292     }
293     sal_Int32 count = ref->getPathSegmentCount();
294     if (count < 2) {
295         return uriReference;
296     }
297     rtl::OUStringBuffer head(ref->getScheme());
298     head.append(static_cast< sal_Unicode >(':'));
299     if (ref->hasAuthority()) {
300         head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
301         head.append(ref->getAuthority());
302     }
303     for (sal_Int32 i = count - 1; i > 0; --i) {
304         rtl::OUStringBuffer buf(head);
305         for (sal_Int32 j = 0; j < i; ++j) {
306             buf.append(static_cast< sal_Unicode >('/'));
307             buf.append(ref->getPathSegment(j));
308         }
309         normalized = buf.makeStringAndClear();
310         if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
311         {
312             buf.append(normalized);
313             css::uno::Reference< css::uri::XUriReference > preRef(
314                 uriFactory->parse(normalized));
315             if (!isAbsoluteHierarchicalUriReference(preRef)) {
316                 // This could only happen if something is inconsistent:
317                 break;
318             }
319             sal_Int32 preCount = preRef->getPathSegmentCount();
320             // normalizePrefix may have added or removed a final slash:
321             if (preCount != i) {
322                 if (preCount == i - 1) {
323                     buf.append(static_cast< sal_Unicode >('/'));
324                 } else if (preCount - 1 == i && buf.getLength() > 0
325                            && buf.charAt(buf.getLength() - 1) == '/')
326                 {
327                     buf.setLength(buf.getLength() - 1);
328                 } else {
329                     // This could only happen if something is inconsistent:
330                     break;
331                 }
332             }
333             for (sal_Int32 j = i; j < count; ++j) {
334                 buf.append(static_cast< sal_Unicode >('/'));
335                 buf.append(ref->getPathSegment(j));
336             }
337             if (ref->hasQuery()) {
338                 buf.append(static_cast< sal_Unicode >('?'));
339                 buf.append(ref->getQuery());
340             }
341             if (ref->hasFragment()) {
342                 buf.append(static_cast< sal_Unicode >('#'));
343                 buf.append(ref->getFragment());
344             }
345             return buf.makeStringAndClear();
346         }
347     }
348     return uriReference;
349 }
350 
351 }
352 
353 css::uno::Reference< css::uri::XUriReference >
354 URIHelper::normalizedMakeRelative(
355     css::uno::Reference< css::uno::XComponentContext > const & context,
356     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
357 {
358     OSL_ASSERT(context.is());
359     css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory(
360         context->getServiceManager());
361     if (!componentFactory.is()) {
362         throw css::uno::RuntimeException(
363             rtl::OUString(
364                 RTL_CONSTASCII_USTRINGPARAM(
365                     "component context has no service manager")),
366             css::uno::Reference< css::uno::XInterface >());
367     }
368     css::uno::Sequence< css::uno::Any > args(2);
369     args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local"));
370     args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office"));
371     css::uno::Reference< css::ucb::XContentProvider > broker;
372     try {
373         broker = css::uno::Reference< css::ucb::XContentProvider >(
374             componentFactory->createInstanceWithArgumentsAndContext(
375                 rtl::OUString(
376                     RTL_CONSTASCII_USTRINGPARAM(
377                         "com.sun.star.ucb.UniversalContentBroker")),
378                 args, context),
379             css::uno::UNO_QUERY_THROW);
380     } catch (css::uno::RuntimeException &) {
381         throw;
382     } catch (css::uno::Exception &) {
383         css::uno::Any exception(cppu::getCaughtException());
384         throw css::lang::WrappedTargetRuntimeException(
385             rtl::OUString(
386                 RTL_CONSTASCII_USTRINGPARAM(
387                     "creating com.sun.star.ucb.UniversalContentBroker failed")),
388             css::uno::Reference< css::uno::XInterface >(),
389             exception);
390     }
391     css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
392         css::uri::UriReferenceFactory::create(context));
393     return uriFactory->makeRelative(
394         uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
395         uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
396         true, false);
397 }
398 
399 rtl::OUString URIHelper::simpleNormalizedMakeRelative(
400     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
401 {
402     com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel(
403         URIHelper::normalizedMakeRelative(
404             com::sun::star::uno::Reference<
405             com::sun::star::uno::XComponentContext >(
406                 (com::sun::star::uno::Reference<
407                  com::sun::star::beans::XPropertySet >(
408                     comphelper::getProcessServiceFactory(),
409                     com::sun::star::uno::UNO_QUERY_THROW)->
410                  getPropertyValue(
411                      rtl::OUString(
412                          RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))),
413                 com::sun::star::uno::UNO_QUERY_THROW),
414             baseUriReference, uriReference));
415     return rel.is() ? rel->getUriReference() : uriReference;
416 }
417 
418 //============================================================================
419 //
420 //  FindFirstURLInText
421 //
422 //============================================================================
423 
424 namespace unnamed_svl_urihelper {
425 
426 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos)
427 {
428 	return INetMIME::isHighSurrogate(rStr.GetChar(nPos))
429 		   && rStr.Len() - nPos >= 2
430 		   && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ?
431 		       nPos + 2 : nPos + 1;
432 }
433 
434 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr,
435                  xub_StrLen nPos, xub_StrLen nEnd)
436 {
437     if (nPos == nEnd)
438         return true;
439     if (rCharClass.isLetterNumeric(rStr, nPos))
440         return false;
441     switch (rStr.GetChar(nPos))
442     {
443     case '$':
444     case '%':
445     case '&':
446     case '-':
447     case '/':
448     case '@':
449     case '\\':
450         return false;
451     default:
452         return true;
453     }
454 }
455 
456 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr,
457                  xub_StrLen nPos, xub_StrLen nEnd)
458 {
459     if (nPos == nEnd)
460         return true;
461     if (rCharClass.isLetterNumeric(rStr, nPos))
462         return false;
463     switch (rStr.GetChar(nPos))
464     {
465     case '!':
466     case '#':
467     case '$':
468     case '%':
469     case '&':
470     case '\'':
471     case '*':
472     case '+':
473     case '-':
474     case '/':
475     case '=':
476     case '?':
477     case '@':
478     case '^':
479     case '_':
480     case '`':
481     case '{':
482     case '|':
483     case '}':
484     case '~':
485         return false;
486     default:
487         return true;
488     }
489 }
490 
491 bool checkWChar(CharClass const & rCharClass, UniString const & rStr,
492                 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false,
493                 bool bPipe = false)
494 {
495 	sal_Unicode c = rStr.GetChar(*pPos);
496 	if (INetMIME::isUSASCII(c))
497 	{
498 		static sal_uInt8 const aMap[128]
499 			= { 0, 0, 0, 0, 0, 0, 0, 0,
500 				0, 0, 0, 0, 0, 0, 0, 0,
501 				0, 0, 0, 0, 0, 0, 0, 0,
502 				0, 0, 0, 0, 0, 0, 0, 0,
503 				0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
504 				1, 1, 1, 1, 1, 4, 1, 4,   // ()*+,-./
505 				4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
506 				4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
507 				4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
508 				4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
509 				4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
510 				4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
511 				0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
512 				4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
513 				4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
514 				4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
515 		switch (aMap[c])
516 		{
517 			default: // not uric
518 				return false;
519 
520 			case 1: // uric
521 				++(*pPos);
522 				return true;
523 
524 			case 2: // "\"
525 				if (bBackslash)
526 				{
527 					*pEnd = ++(*pPos);
528 					return true;
529 				}
530 				else
531 					return false;
532 
533 			case 3: // "|"
534 				if (bPipe)
535 				{
536 					*pEnd = ++(*pPos);
537 					return true;
538 				}
539 				else
540 					return false;
541 
542 			case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
543                     // isBoundary1)
544 				*pEnd = ++(*pPos);
545 				return true;
546 		}
547 	}
548 	else if (rCharClass.isLetterNumeric(rStr, *pPos))
549 	{
550 		*pEnd = *pPos = nextChar(rStr, *pPos);
551 		return true;
552 	}
553 	else
554 		return false;
555 }
556 
557 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos,
558                       xub_StrLen nEnd)
559 {
560 	sal_Unicode const * pBuffer = rStr.GetBuffer();
561 	sal_Unicode const * p = pBuffer + *pPos;
562 	sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
563 	*pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer);
564 	return nLabels;
565 }
566 
567 }
568 
569 UniString
570 URIHelper::FindFirstURLInText(UniString const & rText,
571                               xub_StrLen & rBegin,
572                               xub_StrLen & rEnd,
573                               CharClass const & rCharClass,
574                               INetURLObject::EncodeMechanism eMechanism,
575                               rtl_TextEncoding eCharset,
576                               INetURLObject::FSysStyle eStyle)
577 {
578     if (!(rBegin <= rEnd && rEnd <= rText.Len()))
579         return UniString();
580 
581     // Search for the first substring of [rBegin..rEnd[ that matches any of the
582     // following productions (for which the appropriate style bit is set in
583     // eStyle, if applicable).
584     //
585     // 1st Production (known scheme):
586     //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
587     //        \B1
588     //
589     // 2nd Production (file):
590     //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
591     //
592     // 3rd Production (ftp):
593     //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
594     //
595     // 4th Production (http):
596     //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
597     //
598     // 5th Production (mailto):
599     //    \B2 local-part "@" domain \B1
600     //
601     // 6th Production (UNC file):
602     //    \B1 "\\" domain "\" *(wchar / "\") \B1
603     //
604     // 7th Production (DOS file):
605     //    \B1 ALPHA ":\" *(wchar / "\") \B1
606     //
607     // 8th Production (Unix-like DOS file):
608     //    \B1 ALPHA ":/" *(wchar / "\") \B1
609     //
610     // The productions use the following auxiliary rules.
611     //
612     //    local-part = atom *("." atom)
613     //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
614     //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
615     //              / "~")
616     //    domain = label *("." label)
617     //    label = alphanum [*(alphanum / "-") alphanum]
618     //    alphanum = ALPHA / DIGIT
619     //    wchar = <any uric character (ignoring the escaped rule), or "%", or
620     //             a letter or digit (according to rCharClass)>
621     //
622     // "\B1" (boundary 1) stands for the beginning or end of the block of text,
623     // or a character that is neither (a) a letter or digit (according to
624     // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
625     // (FIXME:  What was the rationale for this set of punctuation characters?)
626     //
627     // "\B2" (boundary 2) stands for the beginning or end of the block of text,
628     // or a character that is neither (a) a letter or digit (according to
629     // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
630     // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
631     // 822 <atom> character, or "@" from \B1's set above).
632     //
633     // Productions 1--4, and 6--8 try to find a maximum-length match, but they
634     // stop at the first <wchar> character that is a "\B1" character which is
635     // only followed by "\B1" characters (taking "\" and "|" characters into
636     // account appropriately).  Production 5 simply tries to find a maximum-
637     // length match.
638     //
639     // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
640     // use ENCODE_ALL.
641     //
642     // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
643     // eStyle.
644 
645     bool bBoundary1 = true;
646     bool bBoundary2 = true;
647     for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
648     {
649         sal_Unicode c = rText.GetChar(nPos);
650         if (bBoundary1)
651         {
652             if (INetMIME::isAlpha(c))
653             {
654                 xub_StrLen i = nPos;
655                 INetProtocol eScheme
656                     = INetURLObject::CompareProtocolScheme(UniString(rText, i,
657                                                                      rEnd));
658                 if (eScheme == INET_PROT_FILE) // 2nd
659                 {
660                     while (rText.GetChar(i++) != ':') ;
661                     xub_StrLen nPrefixEnd = i;
662                     xub_StrLen nUriEnd = i;
663                     while (i != rEnd
664                            && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
665                                          true)) ;
666                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
667                     {
668                         ++i;
669                         while (i != rEnd
670                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
671                     }
672                     if (nUriEnd != nPrefixEnd
673                         && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
674                     {
675                         INetURLObject aUri(UniString(rText, nPos,
676                                                      nUriEnd - nPos),
677                                            INET_PROT_FILE, eMechanism, eCharset,
678                                            eStyle);
679                         if (!aUri.HasError())
680                         {
681                             rBegin = nPos;
682                             rEnd = nUriEnd;
683                             return
684                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
685                         }
686                     }
687                 }
688                 else if (eScheme != INET_PROT_NOT_VALID) // 1st
689                 {
690                     while (rText.GetChar(i++) != ':') ;
691                     xub_StrLen nPrefixEnd = i;
692                     xub_StrLen nUriEnd = i;
693                     while (i != rEnd
694                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
695                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
696                     {
697                         ++i;
698                         while (i != rEnd
699                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
700                     }
701                     if (nUriEnd != nPrefixEnd
702                         && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
703                             || rText.GetChar(nUriEnd) == '\\'))
704                     {
705                         INetURLObject aUri(UniString(rText, nPos,
706                                                      nUriEnd - nPos),
707                                            INET_PROT_HTTP, eMechanism,
708                                            eCharset);
709                         if (!aUri.HasError())
710                         {
711                             rBegin = nPos;
712                             rEnd = nUriEnd;
713                             return
714                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
715                         }
716                     }
717                 }
718 
719                 // 3rd, 4th:
720                 i = nPos;
721                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
722                 if (nLabels >= 3
723                     && rText.GetChar(nPos + 3) == '.'
724                     && (((rText.GetChar(nPos) == 'w'
725                           || rText.GetChar(nPos) == 'W')
726                          && (rText.GetChar(nPos + 1) == 'w'
727                              || rText.GetChar(nPos + 1) == 'W')
728                          && (rText.GetChar(nPos + 2) == 'w'
729                              || rText.GetChar(nPos + 2) == 'W'))
730                         || ((rText.GetChar(nPos) == 'f'
731                              || rText.GetChar(nPos) == 'F')
732                             && (rText.GetChar(nPos + 1) == 't'
733                                 || rText.GetChar(nPos + 1) == 'T')
734                             && (rText.GetChar(nPos + 2) == 'p'
735                                 || rText.GetChar(nPos + 2) == 'P'))))
736                     // (note that rText.GetChar(nPos + 3) is guaranteed to be
737                     // valid)
738                 {
739                     xub_StrLen nUriEnd = i;
740                     if (i != rEnd && rText.GetChar(i) == '/')
741                     {
742                         nUriEnd = ++i;
743                         while (i != rEnd
744                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
745                     }
746                     if (i != rEnd && rText.GetChar(i) == '#')
747                     {
748                         ++i;
749                         while (i != rEnd
750                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
751                     }
752                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
753                         || rText.GetChar(nUriEnd) == '\\')
754                     {
755                         INetURLObject aUri(UniString(rText, nPos,
756                                                      nUriEnd - nPos),
757                                            INET_PROT_HTTP, eMechanism,
758                                            eCharset);
759                         if (!aUri.HasError())
760                         {
761                             rBegin = nPos;
762                             rEnd = nUriEnd;
763                             return
764                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
765                         }
766                     }
767                 }
768 
769                 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3
770                     && rText.GetChar(nPos + 1) == ':'
771                     && (rText.GetChar(nPos + 2) == '/'
772                         || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th
773                 {
774                     i = nPos + 3;
775                     xub_StrLen nUriEnd = i;
776                     while (i != rEnd
777                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
778                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
779                     {
780                         INetURLObject aUri(UniString(rText, nPos,
781                                                      nUriEnd - nPos),
782                                            INET_PROT_FILE,
783                                            INetURLObject::ENCODE_ALL,
784                                            RTL_TEXTENCODING_UTF8,
785                                            INetURLObject::FSYS_DOS);
786                         if (!aUri.HasError())
787                         {
788                             rBegin = nPos;
789                             rEnd = nUriEnd;
790                             return
791                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
792                         }
793                     }
794                 }
795             }
796             else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2
797                      && rText.GetChar(nPos) == '\\'
798                      && rText.GetChar(nPos + 1) == '\\') // 6th
799             {
800                 xub_StrLen i = nPos + 2;
801                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
802                 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\')
803                 {
804                     xub_StrLen nUriEnd = ++i;
805                     while (i != rEnd
806                            && checkWChar(rCharClass, rText, &i, &nUriEnd,
807                                          true)) ;
808                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
809                     {
810                         INetURLObject aUri(UniString(rText, nPos,
811                                                      nUriEnd - nPos),
812                                            INET_PROT_FILE,
813                                            INetURLObject::ENCODE_ALL,
814                                            RTL_TEXTENCODING_UTF8,
815                                            INetURLObject::FSYS_DOS);
816                         if (!aUri.HasError())
817                         {
818                             rBegin = nPos;
819                             rEnd = nUriEnd;
820                             return
821                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
822                         }
823                     }
824                 }
825             }
826         }
827         if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
828         {
829             bool bDot = false;
830             for (xub_StrLen i = nPos + 1; i != rEnd; ++i)
831             {
832                 sal_Unicode c2 = rText.GetChar(i);
833                 if (INetMIME::isAtomChar(c2))
834                     bDot = false;
835                 else if (bDot)
836                     break;
837                 else if (c2 == '.')
838                     bDot = true;
839                 else
840                 {
841                     if (c2 == '@')
842                     {
843                         ++i;
844                         sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
845                         if (nLabels >= 1
846                             && isBoundary1(rCharClass, rText, i, rEnd))
847                         {
848                             INetURLObject aUri(UniString(rText, nPos, i - nPos),
849                                                INET_PROT_MAILTO,
850                                                INetURLObject::ENCODE_ALL);
851                             if (!aUri.HasError())
852                             {
853                                 rBegin = nPos;
854                                 rEnd = i;
855                                 return aUri.GetMainURL(
856                                            INetURLObject::DECODE_TO_IURI);
857                             }
858                         }
859                     }
860                     break;
861                 }
862             }
863         }
864         bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
865         bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
866     }
867     rBegin = rEnd;
868     return UniString();
869 }
870 
871 //============================================================================
872 //
873 //  removePassword
874 //
875 //============================================================================
876 
877 UniString
878 URIHelper::removePassword(UniString const & rURI,
879 						  INetURLObject::EncodeMechanism eEncodeMechanism,
880 						  INetURLObject::DecodeMechanism eDecodeMechanism,
881 						  rtl_TextEncoding eCharset)
882 {
883 	INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
884 	return aObj.HasError() ?
885 		       rURI :
886 		       String(aObj.GetURLNoPass(eDecodeMechanism, eCharset));
887 }
888 
889 //============================================================================
890 //
891 //  queryFSysStyle
892 //
893 //============================================================================
894 
895 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl,
896 												   bool bAddConvenienceStyles)
897 	throw (uno::RuntimeException)
898 {
899 	::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get();
900 	uno::Reference< ucb::XContentProviderManager > xManager;
901 	if (pBroker)
902 		xManager = pBroker->getContentProviderManagerInterface();
903 	uno::Reference< beans::XPropertySet > xProperties;
904 	if (xManager.is())
905 		xProperties
906 			= uno::Reference< beans::XPropertySet >(
907 				  xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY);
908 	sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION;
909 	if (xProperties.is())
910 		try
911 		{
912 			xProperties->getPropertyValue(rtl::OUString(
913 				                              RTL_CONSTASCII_USTRINGPARAM(
914 												  "FileSystemNotation")))
915 				>>= nNotation;
916 		}
917 		catch (beans::UnknownPropertyException const &) {}
918 		catch (lang::WrappedTargetException const &) {}
919 
920 	// The following code depends on the fact that the
921 	// com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to
922 	// MAC, without any holes.  The table below has two entries per notation,
923 	// the first is used if bAddConvenienceStyles == false, while the second
924 	// is used if bAddConvenienceStyles == true:
925 	static INetURLObject::FSysStyle const aMap[][2]
926 		= { { INetURLObject::FSysStyle(0),
927 			  INetURLObject::FSYS_DETECT },
928 			    // UNKNOWN
929 			{ INetURLObject::FSYS_UNX,
930 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
931 									       | INetURLObject::FSYS_UNX) },
932 			    // UNIX
933 			{ INetURLObject::FSYS_DOS,
934 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
935 									       | INetURLObject::FSYS_UNX
936 									       | INetURLObject::FSYS_DOS) },
937 			    // DOS
938 			{ INetURLObject::FSYS_MAC,
939 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
940 									       | INetURLObject::FSYS_UNX
941 									       | INetURLObject::FSYS_MAC) } };
942 	return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION
943 			    || nNotation > ucb::FileSystemNotation::MAC_NOTATION ?
944 			            0 :
945 			            nNotation
946 			                - ucb::FileSystemNotation::UNKNOWN_NOTATION]
947 		           [bAddConvenienceStyles];
948 }
949