xref: /aoo42x/main/svl/source/misc/urihelper.cxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_svl.hxx"
30*cdf0e10cSrcweir #include <svl/urihelper.hxx>
31*cdf0e10cSrcweir #include <com/sun/star/beans/XPropertySet.hpp>
32*cdf0e10cSrcweir #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp"
33*cdf0e10cSrcweir #include "com/sun/star/lang/XMultiComponentFactory.hpp"
34*cdf0e10cSrcweir #include "com/sun/star/ucb/Command.hpp"
35*cdf0e10cSrcweir #include <com/sun/star/ucb/FileSystemNotation.hpp>
36*cdf0e10cSrcweir #include "com/sun/star/ucb/IllegalIdentifierException.hpp"
37*cdf0e10cSrcweir #include "com/sun/star/ucb/UnsupportedCommandException.hpp"
38*cdf0e10cSrcweir #include "com/sun/star/ucb/XCommandEnvironment.hpp"
39*cdf0e10cSrcweir #include "com/sun/star/ucb/XCommandProcessor.hpp"
40*cdf0e10cSrcweir #include "com/sun/star/ucb/XContent.hpp"
41*cdf0e10cSrcweir #include "com/sun/star/ucb/XContentIdentifierFactory.hpp"
42*cdf0e10cSrcweir #include "com/sun/star/ucb/XContentProvider.hpp"
43*cdf0e10cSrcweir #include <com/sun/star/ucb/XContentProviderManager.hpp>
44*cdf0e10cSrcweir #include "com/sun/star/uno/Any.hxx"
45*cdf0e10cSrcweir #include "com/sun/star/uno/Exception.hpp"
46*cdf0e10cSrcweir #include "com/sun/star/uno/Reference.hxx"
47*cdf0e10cSrcweir #include "com/sun/star/uno/RuntimeException.hpp"
48*cdf0e10cSrcweir #include "com/sun/star/uno/Sequence.hxx"
49*cdf0e10cSrcweir #include "com/sun/star/uno/XComponentContext.hpp"
50*cdf0e10cSrcweir #include "com/sun/star/uno/XInterface.hpp"
51*cdf0e10cSrcweir #include "com/sun/star/uri/UriReferenceFactory.hpp"
52*cdf0e10cSrcweir #include "com/sun/star/uri/XUriReference.hpp"
53*cdf0e10cSrcweir #include "com/sun/star/uri/XUriReferenceFactory.hpp"
54*cdf0e10cSrcweir #include "cppuhelper/exc_hlp.hxx"
55*cdf0e10cSrcweir #include "comphelper/processfactory.hxx"
56*cdf0e10cSrcweir #include "osl/diagnose.h"
57*cdf0e10cSrcweir #include "rtl/ustrbuf.hxx"
58*cdf0e10cSrcweir #include "rtl/ustring.h"
59*cdf0e10cSrcweir #include "rtl/ustring.hxx"
60*cdf0e10cSrcweir #include "sal/types.h"
61*cdf0e10cSrcweir #include <tools/debug.hxx>
62*cdf0e10cSrcweir #include <tools/inetmime.hxx>
63*cdf0e10cSrcweir #include <ucbhelper/contentbroker.hxx>
64*cdf0e10cSrcweir #include <unotools/charclass.hxx>
65*cdf0e10cSrcweir #include "rtl/instance.hxx"
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir namespace unnamed_svl_urihelper {}
68*cdf0e10cSrcweir using namespace unnamed_svl_urihelper;
69*cdf0e10cSrcweir 	// unnamed namespaces don't work well yet...
70*cdf0e10cSrcweir 
71*cdf0e10cSrcweir namespace css = com::sun::star;
72*cdf0e10cSrcweir using namespace com::sun::star;
73*cdf0e10cSrcweir 
74*cdf0e10cSrcweir //============================================================================
75*cdf0e10cSrcweir //
76*cdf0e10cSrcweir //  SmartRel2Abs
77*cdf0e10cSrcweir //
78*cdf0e10cSrcweir //============================================================================
79*cdf0e10cSrcweir 
80*cdf0e10cSrcweir namespace unnamed_svl_urihelper {
81*cdf0e10cSrcweir 
82*cdf0e10cSrcweir inline UniString toUniString(ByteString const & rString)
83*cdf0e10cSrcweir {
84*cdf0e10cSrcweir 	return UniString(rString, RTL_TEXTENCODING_ISO_8859_1);
85*cdf0e10cSrcweir }
86*cdf0e10cSrcweir 
87*cdf0e10cSrcweir inline UniString toUniString(UniString const & rString)
88*cdf0e10cSrcweir {
89*cdf0e10cSrcweir 	return rString;
90*cdf0e10cSrcweir }
91*cdf0e10cSrcweir 
92*cdf0e10cSrcweir template< typename Str >
93*cdf0e10cSrcweir inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,
94*cdf0e10cSrcweir 								   Str const & rTheRelURIRef,
95*cdf0e10cSrcweir 								   Link const & rMaybeFileHdl,
96*cdf0e10cSrcweir 								   bool bCheckFileExists,
97*cdf0e10cSrcweir 								   bool bIgnoreFragment,
98*cdf0e10cSrcweir 								   INetURLObject::EncodeMechanism
99*cdf0e10cSrcweir 								       eEncodeMechanism,
100*cdf0e10cSrcweir 								   INetURLObject::DecodeMechanism
101*cdf0e10cSrcweir 								       eDecodeMechanism,
102*cdf0e10cSrcweir 								   rtl_TextEncoding eCharset,
103*cdf0e10cSrcweir 								   bool bRelativeNonURIs,
104*cdf0e10cSrcweir 								   INetURLObject::FSysStyle eStyle)
105*cdf0e10cSrcweir {
106*cdf0e10cSrcweir 	// Backwards compatibility:
107*cdf0e10cSrcweir 	if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#')
108*cdf0e10cSrcweir 		return toUniString(rTheRelURIRef);
109*cdf0e10cSrcweir 
110*cdf0e10cSrcweir 	INetURLObject aAbsURIRef;
111*cdf0e10cSrcweir     if (rTheBaseURIRef.HasError())
112*cdf0e10cSrcweir         aAbsURIRef.
113*cdf0e10cSrcweir             SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
114*cdf0e10cSrcweir     else
115*cdf0e10cSrcweir     {
116*cdf0e10cSrcweir         bool bWasAbsolute;
117*cdf0e10cSrcweir 		aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
118*cdf0e10cSrcweir                                                  bWasAbsolute,
119*cdf0e10cSrcweir                                                  bIgnoreFragment,
120*cdf0e10cSrcweir                                                  eEncodeMechanism,
121*cdf0e10cSrcweir                                                  eCharset,
122*cdf0e10cSrcweir                                                  bRelativeNonURIs,
123*cdf0e10cSrcweir                                                  eStyle);
124*cdf0e10cSrcweir         if (bCheckFileExists
125*cdf0e10cSrcweir             && !bWasAbsolute
126*cdf0e10cSrcweir             && (aAbsURIRef.GetProtocol() == INET_PROT_FILE))
127*cdf0e10cSrcweir         {
128*cdf0e10cSrcweir             INetURLObject aNonFileURIRef;
129*cdf0e10cSrcweir             aNonFileURIRef.SetSmartURL(rTheRelURIRef,
130*cdf0e10cSrcweir                                        eEncodeMechanism,
131*cdf0e10cSrcweir                                        eCharset,
132*cdf0e10cSrcweir                                        eStyle);
133*cdf0e10cSrcweir             if (!aNonFileURIRef.HasError()
134*cdf0e10cSrcweir                 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE)
135*cdf0e10cSrcweir             {
136*cdf0e10cSrcweir                 bool bMaybeFile = false;
137*cdf0e10cSrcweir                 if (rMaybeFileHdl.IsSet())
138*cdf0e10cSrcweir                 {
139*cdf0e10cSrcweir                     UniString aFilePath(toUniString(rTheRelURIRef));
140*cdf0e10cSrcweir                     bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0;
141*cdf0e10cSrcweir                 }
142*cdf0e10cSrcweir                 if (!bMaybeFile)
143*cdf0e10cSrcweir                     aAbsURIRef = aNonFileURIRef;
144*cdf0e10cSrcweir             }
145*cdf0e10cSrcweir         }
146*cdf0e10cSrcweir     }
147*cdf0e10cSrcweir 	return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
148*cdf0e10cSrcweir }
149*cdf0e10cSrcweir 
150*cdf0e10cSrcweir }
151*cdf0e10cSrcweir 
152*cdf0e10cSrcweir UniString
153*cdf0e10cSrcweir URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
154*cdf0e10cSrcweir 						ByteString const & rTheRelURIRef,
155*cdf0e10cSrcweir 						Link const & rMaybeFileHdl,
156*cdf0e10cSrcweir 						bool bCheckFileExists,
157*cdf0e10cSrcweir 						bool bIgnoreFragment,
158*cdf0e10cSrcweir 						INetURLObject::EncodeMechanism eEncodeMechanism,
159*cdf0e10cSrcweir 						INetURLObject::DecodeMechanism eDecodeMechanism,
160*cdf0e10cSrcweir 						rtl_TextEncoding eCharset,
161*cdf0e10cSrcweir 						bool bRelativeNonURIs,
162*cdf0e10cSrcweir 						INetURLObject::FSysStyle eStyle)
163*cdf0e10cSrcweir {
164*cdf0e10cSrcweir 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
165*cdf0e10cSrcweir 							 bCheckFileExists, bIgnoreFragment,
166*cdf0e10cSrcweir 							 eEncodeMechanism, eDecodeMechanism, eCharset,
167*cdf0e10cSrcweir 							 bRelativeNonURIs, eStyle);
168*cdf0e10cSrcweir }
169*cdf0e10cSrcweir 
170*cdf0e10cSrcweir UniString
171*cdf0e10cSrcweir URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
172*cdf0e10cSrcweir 						UniString const & rTheRelURIRef,
173*cdf0e10cSrcweir 						Link const & rMaybeFileHdl,
174*cdf0e10cSrcweir 						bool bCheckFileExists,
175*cdf0e10cSrcweir 						bool bIgnoreFragment,
176*cdf0e10cSrcweir 						INetURLObject::EncodeMechanism eEncodeMechanism,
177*cdf0e10cSrcweir 						INetURLObject::DecodeMechanism eDecodeMechanism,
178*cdf0e10cSrcweir 						rtl_TextEncoding eCharset,
179*cdf0e10cSrcweir 						bool bRelativeNonURIs,
180*cdf0e10cSrcweir 						INetURLObject::FSysStyle eStyle)
181*cdf0e10cSrcweir {
182*cdf0e10cSrcweir 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
183*cdf0e10cSrcweir 							 bCheckFileExists, bIgnoreFragment,
184*cdf0e10cSrcweir 							 eEncodeMechanism, eDecodeMechanism, eCharset,
185*cdf0e10cSrcweir 							 bRelativeNonURIs, eStyle);
186*cdf0e10cSrcweir }
187*cdf0e10cSrcweir 
188*cdf0e10cSrcweir //============================================================================
189*cdf0e10cSrcweir //
190*cdf0e10cSrcweir //  SetMaybeFileHdl
191*cdf0e10cSrcweir //
192*cdf0e10cSrcweir //============================================================================
193*cdf0e10cSrcweir 
194*cdf0e10cSrcweir namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; }
195*cdf0e10cSrcweir 
196*cdf0e10cSrcweir void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl)
197*cdf0e10cSrcweir {
198*cdf0e10cSrcweir 	MaybeFileHdl::get() = rTheMaybeFileHdl;
199*cdf0e10cSrcweir }
200*cdf0e10cSrcweir 
201*cdf0e10cSrcweir //============================================================================
202*cdf0e10cSrcweir //
203*cdf0e10cSrcweir //  GetMaybeFileHdl
204*cdf0e10cSrcweir //
205*cdf0e10cSrcweir //============================================================================
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir Link URIHelper::GetMaybeFileHdl()
208*cdf0e10cSrcweir {
209*cdf0e10cSrcweir 	return MaybeFileHdl::get();
210*cdf0e10cSrcweir }
211*cdf0e10cSrcweir 
212*cdf0e10cSrcweir namespace {
213*cdf0e10cSrcweir 
214*cdf0e10cSrcweir bool isAbsoluteHierarchicalUriReference(
215*cdf0e10cSrcweir     css::uno::Reference< css::uri::XUriReference > const & uriReference)
216*cdf0e10cSrcweir {
217*cdf0e10cSrcweir     return uriReference.is() && uriReference->isAbsolute()
218*cdf0e10cSrcweir         && uriReference->isHierarchical() && !uriReference->hasRelativePath();
219*cdf0e10cSrcweir }
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir // To improve performance, assume that if for any prefix URL of a given
222*cdf0e10cSrcweir // hierarchical URL either a UCB content cannot be created, or the UCB content
223*cdf0e10cSrcweir // does not support the getCasePreservingURL command, then this will hold for
224*cdf0e10cSrcweir // any other prefix URL of the given URL, too:
225*cdf0e10cSrcweir enum Result { Success, GeneralFailure, SpecificFailure };
226*cdf0e10cSrcweir 
227*cdf0e10cSrcweir Result normalizePrefix(
228*cdf0e10cSrcweir     css::uno::Reference< css::ucb::XContentProvider > const & broker,
229*cdf0e10cSrcweir     rtl::OUString const & uri, rtl::OUString * normalized)
230*cdf0e10cSrcweir {
231*cdf0e10cSrcweir     OSL_ASSERT(broker.is() && normalized != 0);
232*cdf0e10cSrcweir     css::uno::Reference< css::ucb::XContent > content;
233*cdf0e10cSrcweir     try {
234*cdf0e10cSrcweir         content = broker->queryContent(
235*cdf0e10cSrcweir             css::uno::Reference< css::ucb::XContentIdentifierFactory >(
236*cdf0e10cSrcweir                 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier(
237*cdf0e10cSrcweir                     uri));
238*cdf0e10cSrcweir     } catch (css::ucb::IllegalIdentifierException &) {}
239*cdf0e10cSrcweir     if (!content.is()) {
240*cdf0e10cSrcweir         return GeneralFailure;
241*cdf0e10cSrcweir     }
242*cdf0e10cSrcweir     try {
243*cdf0e10cSrcweir         #if OSL_DEBUG_LEVEL > 0
244*cdf0e10cSrcweir         bool ok =
245*cdf0e10cSrcweir         #endif
246*cdf0e10cSrcweir             (css::uno::Reference< css::ucb::XCommandProcessor >(
247*cdf0e10cSrcweir                    content, css::uno::UNO_QUERY_THROW)->execute(
248*cdf0e10cSrcweir                        css::ucb::Command(
249*cdf0e10cSrcweir                            rtl::OUString(
250*cdf0e10cSrcweir                                RTL_CONSTASCII_USTRINGPARAM(
251*cdf0e10cSrcweir                                    "getCasePreservingURL")),
252*cdf0e10cSrcweir                            -1, css::uno::Any()),
253*cdf0e10cSrcweir                        0,
254*cdf0e10cSrcweir                        css::uno::Reference< css::ucb::XCommandEnvironment >())
255*cdf0e10cSrcweir                >>= *normalized);
256*cdf0e10cSrcweir         OSL_ASSERT(ok);
257*cdf0e10cSrcweir     } catch (css::uno::RuntimeException &) {
258*cdf0e10cSrcweir         throw;
259*cdf0e10cSrcweir     } catch (css::ucb::UnsupportedCommandException &) {
260*cdf0e10cSrcweir         return GeneralFailure;
261*cdf0e10cSrcweir     } catch (css::uno::Exception &) {
262*cdf0e10cSrcweir         return SpecificFailure;
263*cdf0e10cSrcweir     }
264*cdf0e10cSrcweir     return Success;
265*cdf0e10cSrcweir }
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir rtl::OUString normalize(
268*cdf0e10cSrcweir     css::uno::Reference< css::ucb::XContentProvider > const & broker,
269*cdf0e10cSrcweir     css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
270*cdf0e10cSrcweir     rtl::OUString const & uriReference)
271*cdf0e10cSrcweir {
272*cdf0e10cSrcweir     // normalizePrefix can potentially fail (a typically example being a file
273*cdf0e10cSrcweir     // URL that denotes a non-existing resource); in such a case, try to
274*cdf0e10cSrcweir     // normalize as long a prefix of the given URL as possible (i.e., normalize
275*cdf0e10cSrcweir     // all the existing directories within the path):
276*cdf0e10cSrcweir     rtl::OUString normalized;
277*cdf0e10cSrcweir     sal_Int32 n = uriReference.indexOf('#');
278*cdf0e10cSrcweir     normalized = n == -1 ? uriReference : uriReference.copy(0, n);
279*cdf0e10cSrcweir     switch (normalizePrefix(broker, normalized, &normalized)) {
280*cdf0e10cSrcweir     case Success:
281*cdf0e10cSrcweir         return n == -1 ? normalized : normalized + uriReference.copy(n);
282*cdf0e10cSrcweir     case GeneralFailure:
283*cdf0e10cSrcweir         return uriReference;
284*cdf0e10cSrcweir     case SpecificFailure:
285*cdf0e10cSrcweir     default:
286*cdf0e10cSrcweir         break;
287*cdf0e10cSrcweir     }
288*cdf0e10cSrcweir     css::uno::Reference< css::uri::XUriReference > ref(
289*cdf0e10cSrcweir         uriFactory->parse(uriReference));
290*cdf0e10cSrcweir     if (!isAbsoluteHierarchicalUriReference(ref)) {
291*cdf0e10cSrcweir         return uriReference;
292*cdf0e10cSrcweir     }
293*cdf0e10cSrcweir     sal_Int32 count = ref->getPathSegmentCount();
294*cdf0e10cSrcweir     if (count < 2) {
295*cdf0e10cSrcweir         return uriReference;
296*cdf0e10cSrcweir     }
297*cdf0e10cSrcweir     rtl::OUStringBuffer head(ref->getScheme());
298*cdf0e10cSrcweir     head.append(static_cast< sal_Unicode >(':'));
299*cdf0e10cSrcweir     if (ref->hasAuthority()) {
300*cdf0e10cSrcweir         head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
301*cdf0e10cSrcweir         head.append(ref->getAuthority());
302*cdf0e10cSrcweir     }
303*cdf0e10cSrcweir     for (sal_Int32 i = count - 1; i > 0; --i) {
304*cdf0e10cSrcweir         rtl::OUStringBuffer buf(head);
305*cdf0e10cSrcweir         for (sal_Int32 j = 0; j < i; ++j) {
306*cdf0e10cSrcweir             buf.append(static_cast< sal_Unicode >('/'));
307*cdf0e10cSrcweir             buf.append(ref->getPathSegment(j));
308*cdf0e10cSrcweir         }
309*cdf0e10cSrcweir         normalized = buf.makeStringAndClear();
310*cdf0e10cSrcweir         if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
311*cdf0e10cSrcweir         {
312*cdf0e10cSrcweir             buf.append(normalized);
313*cdf0e10cSrcweir             css::uno::Reference< css::uri::XUriReference > preRef(
314*cdf0e10cSrcweir                 uriFactory->parse(normalized));
315*cdf0e10cSrcweir             if (!isAbsoluteHierarchicalUriReference(preRef)) {
316*cdf0e10cSrcweir                 // This could only happen if something is inconsistent:
317*cdf0e10cSrcweir                 break;
318*cdf0e10cSrcweir             }
319*cdf0e10cSrcweir             sal_Int32 preCount = preRef->getPathSegmentCount();
320*cdf0e10cSrcweir             // normalizePrefix may have added or removed a final slash:
321*cdf0e10cSrcweir             if (preCount != i) {
322*cdf0e10cSrcweir                 if (preCount == i - 1) {
323*cdf0e10cSrcweir                     buf.append(static_cast< sal_Unicode >('/'));
324*cdf0e10cSrcweir                 } else if (preCount - 1 == i && buf.getLength() > 0
325*cdf0e10cSrcweir                            && buf.charAt(buf.getLength() - 1) == '/')
326*cdf0e10cSrcweir                 {
327*cdf0e10cSrcweir                     buf.setLength(buf.getLength() - 1);
328*cdf0e10cSrcweir                 } else {
329*cdf0e10cSrcweir                     // This could only happen if something is inconsistent:
330*cdf0e10cSrcweir                     break;
331*cdf0e10cSrcweir                 }
332*cdf0e10cSrcweir             }
333*cdf0e10cSrcweir             for (sal_Int32 j = i; j < count; ++j) {
334*cdf0e10cSrcweir                 buf.append(static_cast< sal_Unicode >('/'));
335*cdf0e10cSrcweir                 buf.append(ref->getPathSegment(j));
336*cdf0e10cSrcweir             }
337*cdf0e10cSrcweir             if (ref->hasQuery()) {
338*cdf0e10cSrcweir                 buf.append(static_cast< sal_Unicode >('?'));
339*cdf0e10cSrcweir                 buf.append(ref->getQuery());
340*cdf0e10cSrcweir             }
341*cdf0e10cSrcweir             if (ref->hasFragment()) {
342*cdf0e10cSrcweir                 buf.append(static_cast< sal_Unicode >('#'));
343*cdf0e10cSrcweir                 buf.append(ref->getFragment());
344*cdf0e10cSrcweir             }
345*cdf0e10cSrcweir             return buf.makeStringAndClear();
346*cdf0e10cSrcweir         }
347*cdf0e10cSrcweir     }
348*cdf0e10cSrcweir     return uriReference;
349*cdf0e10cSrcweir }
350*cdf0e10cSrcweir 
351*cdf0e10cSrcweir }
352*cdf0e10cSrcweir 
353*cdf0e10cSrcweir css::uno::Reference< css::uri::XUriReference >
354*cdf0e10cSrcweir URIHelper::normalizedMakeRelative(
355*cdf0e10cSrcweir     css::uno::Reference< css::uno::XComponentContext > const & context,
356*cdf0e10cSrcweir     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
357*cdf0e10cSrcweir {
358*cdf0e10cSrcweir     OSL_ASSERT(context.is());
359*cdf0e10cSrcweir     css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory(
360*cdf0e10cSrcweir         context->getServiceManager());
361*cdf0e10cSrcweir     if (!componentFactory.is()) {
362*cdf0e10cSrcweir         throw css::uno::RuntimeException(
363*cdf0e10cSrcweir             rtl::OUString(
364*cdf0e10cSrcweir                 RTL_CONSTASCII_USTRINGPARAM(
365*cdf0e10cSrcweir                     "component context has no service manager")),
366*cdf0e10cSrcweir             css::uno::Reference< css::uno::XInterface >());
367*cdf0e10cSrcweir     }
368*cdf0e10cSrcweir     css::uno::Sequence< css::uno::Any > args(2);
369*cdf0e10cSrcweir     args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local"));
370*cdf0e10cSrcweir     args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office"));
371*cdf0e10cSrcweir     css::uno::Reference< css::ucb::XContentProvider > broker;
372*cdf0e10cSrcweir     try {
373*cdf0e10cSrcweir         broker = css::uno::Reference< css::ucb::XContentProvider >(
374*cdf0e10cSrcweir             componentFactory->createInstanceWithArgumentsAndContext(
375*cdf0e10cSrcweir                 rtl::OUString(
376*cdf0e10cSrcweir                     RTL_CONSTASCII_USTRINGPARAM(
377*cdf0e10cSrcweir                         "com.sun.star.ucb.UniversalContentBroker")),
378*cdf0e10cSrcweir                 args, context),
379*cdf0e10cSrcweir             css::uno::UNO_QUERY_THROW);
380*cdf0e10cSrcweir     } catch (css::uno::RuntimeException &) {
381*cdf0e10cSrcweir         throw;
382*cdf0e10cSrcweir     } catch (css::uno::Exception &) {
383*cdf0e10cSrcweir         css::uno::Any exception(cppu::getCaughtException());
384*cdf0e10cSrcweir         throw css::lang::WrappedTargetRuntimeException(
385*cdf0e10cSrcweir             rtl::OUString(
386*cdf0e10cSrcweir                 RTL_CONSTASCII_USTRINGPARAM(
387*cdf0e10cSrcweir                     "creating com.sun.star.ucb.UniversalContentBroker failed")),
388*cdf0e10cSrcweir             css::uno::Reference< css::uno::XInterface >(),
389*cdf0e10cSrcweir             exception);
390*cdf0e10cSrcweir     }
391*cdf0e10cSrcweir     css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
392*cdf0e10cSrcweir         css::uri::UriReferenceFactory::create(context));
393*cdf0e10cSrcweir     return uriFactory->makeRelative(
394*cdf0e10cSrcweir         uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
395*cdf0e10cSrcweir         uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
396*cdf0e10cSrcweir         true, false);
397*cdf0e10cSrcweir }
398*cdf0e10cSrcweir 
399*cdf0e10cSrcweir rtl::OUString URIHelper::simpleNormalizedMakeRelative(
400*cdf0e10cSrcweir     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
401*cdf0e10cSrcweir {
402*cdf0e10cSrcweir     com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel(
403*cdf0e10cSrcweir         URIHelper::normalizedMakeRelative(
404*cdf0e10cSrcweir             com::sun::star::uno::Reference<
405*cdf0e10cSrcweir             com::sun::star::uno::XComponentContext >(
406*cdf0e10cSrcweir                 (com::sun::star::uno::Reference<
407*cdf0e10cSrcweir                  com::sun::star::beans::XPropertySet >(
408*cdf0e10cSrcweir                     comphelper::getProcessServiceFactory(),
409*cdf0e10cSrcweir                     com::sun::star::uno::UNO_QUERY_THROW)->
410*cdf0e10cSrcweir                  getPropertyValue(
411*cdf0e10cSrcweir                      rtl::OUString(
412*cdf0e10cSrcweir                          RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))),
413*cdf0e10cSrcweir                 com::sun::star::uno::UNO_QUERY_THROW),
414*cdf0e10cSrcweir             baseUriReference, uriReference));
415*cdf0e10cSrcweir     return rel.is() ? rel->getUriReference() : uriReference;
416*cdf0e10cSrcweir }
417*cdf0e10cSrcweir 
418*cdf0e10cSrcweir //============================================================================
419*cdf0e10cSrcweir //
420*cdf0e10cSrcweir //  FindFirstURLInText
421*cdf0e10cSrcweir //
422*cdf0e10cSrcweir //============================================================================
423*cdf0e10cSrcweir 
424*cdf0e10cSrcweir namespace unnamed_svl_urihelper {
425*cdf0e10cSrcweir 
426*cdf0e10cSrcweir inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos)
427*cdf0e10cSrcweir {
428*cdf0e10cSrcweir 	return INetMIME::isHighSurrogate(rStr.GetChar(nPos))
429*cdf0e10cSrcweir 		   && rStr.Len() - nPos >= 2
430*cdf0e10cSrcweir 		   && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ?
431*cdf0e10cSrcweir 		       nPos + 2 : nPos + 1;
432*cdf0e10cSrcweir }
433*cdf0e10cSrcweir 
434*cdf0e10cSrcweir bool isBoundary1(CharClass const & rCharClass, UniString const & rStr,
435*cdf0e10cSrcweir                  xub_StrLen nPos, xub_StrLen nEnd)
436*cdf0e10cSrcweir {
437*cdf0e10cSrcweir     if (nPos == nEnd)
438*cdf0e10cSrcweir         return true;
439*cdf0e10cSrcweir     if (rCharClass.isLetterNumeric(rStr, nPos))
440*cdf0e10cSrcweir         return false;
441*cdf0e10cSrcweir     switch (rStr.GetChar(nPos))
442*cdf0e10cSrcweir     {
443*cdf0e10cSrcweir     case '$':
444*cdf0e10cSrcweir     case '%':
445*cdf0e10cSrcweir     case '&':
446*cdf0e10cSrcweir     case '-':
447*cdf0e10cSrcweir     case '/':
448*cdf0e10cSrcweir     case '@':
449*cdf0e10cSrcweir     case '\\':
450*cdf0e10cSrcweir         return false;
451*cdf0e10cSrcweir     default:
452*cdf0e10cSrcweir         return true;
453*cdf0e10cSrcweir     }
454*cdf0e10cSrcweir }
455*cdf0e10cSrcweir 
456*cdf0e10cSrcweir bool isBoundary2(CharClass const & rCharClass, UniString const & rStr,
457*cdf0e10cSrcweir                  xub_StrLen nPos, xub_StrLen nEnd)
458*cdf0e10cSrcweir {
459*cdf0e10cSrcweir     if (nPos == nEnd)
460*cdf0e10cSrcweir         return true;
461*cdf0e10cSrcweir     if (rCharClass.isLetterNumeric(rStr, nPos))
462*cdf0e10cSrcweir         return false;
463*cdf0e10cSrcweir     switch (rStr.GetChar(nPos))
464*cdf0e10cSrcweir     {
465*cdf0e10cSrcweir     case '!':
466*cdf0e10cSrcweir     case '#':
467*cdf0e10cSrcweir     case '$':
468*cdf0e10cSrcweir     case '%':
469*cdf0e10cSrcweir     case '&':
470*cdf0e10cSrcweir     case '\'':
471*cdf0e10cSrcweir     case '*':
472*cdf0e10cSrcweir     case '+':
473*cdf0e10cSrcweir     case '-':
474*cdf0e10cSrcweir     case '/':
475*cdf0e10cSrcweir     case '=':
476*cdf0e10cSrcweir     case '?':
477*cdf0e10cSrcweir     case '@':
478*cdf0e10cSrcweir     case '^':
479*cdf0e10cSrcweir     case '_':
480*cdf0e10cSrcweir     case '`':
481*cdf0e10cSrcweir     case '{':
482*cdf0e10cSrcweir     case '|':
483*cdf0e10cSrcweir     case '}':
484*cdf0e10cSrcweir     case '~':
485*cdf0e10cSrcweir         return false;
486*cdf0e10cSrcweir     default:
487*cdf0e10cSrcweir         return true;
488*cdf0e10cSrcweir     }
489*cdf0e10cSrcweir }
490*cdf0e10cSrcweir 
491*cdf0e10cSrcweir bool checkWChar(CharClass const & rCharClass, UniString const & rStr,
492*cdf0e10cSrcweir                 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false,
493*cdf0e10cSrcweir                 bool bPipe = false)
494*cdf0e10cSrcweir {
495*cdf0e10cSrcweir 	sal_Unicode c = rStr.GetChar(*pPos);
496*cdf0e10cSrcweir 	if (INetMIME::isUSASCII(c))
497*cdf0e10cSrcweir 	{
498*cdf0e10cSrcweir 		static sal_uInt8 const aMap[128]
499*cdf0e10cSrcweir 			= { 0, 0, 0, 0, 0, 0, 0, 0,
500*cdf0e10cSrcweir 				0, 0, 0, 0, 0, 0, 0, 0,
501*cdf0e10cSrcweir 				0, 0, 0, 0, 0, 0, 0, 0,
502*cdf0e10cSrcweir 				0, 0, 0, 0, 0, 0, 0, 0,
503*cdf0e10cSrcweir 				0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
504*cdf0e10cSrcweir 				1, 1, 1, 1, 1, 4, 1, 4,   // ()*+,-./
505*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
506*cdf0e10cSrcweir 				4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
507*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
508*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
509*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
510*cdf0e10cSrcweir 				4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
511*cdf0e10cSrcweir 				0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
512*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
513*cdf0e10cSrcweir 				4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
514*cdf0e10cSrcweir 				4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
515*cdf0e10cSrcweir 		switch (aMap[c])
516*cdf0e10cSrcweir 		{
517*cdf0e10cSrcweir 			default: // not uric
518*cdf0e10cSrcweir 				return false;
519*cdf0e10cSrcweir 
520*cdf0e10cSrcweir 			case 1: // uric
521*cdf0e10cSrcweir 				++(*pPos);
522*cdf0e10cSrcweir 				return true;
523*cdf0e10cSrcweir 
524*cdf0e10cSrcweir 			case 2: // "\"
525*cdf0e10cSrcweir 				if (bBackslash)
526*cdf0e10cSrcweir 				{
527*cdf0e10cSrcweir 					*pEnd = ++(*pPos);
528*cdf0e10cSrcweir 					return true;
529*cdf0e10cSrcweir 				}
530*cdf0e10cSrcweir 				else
531*cdf0e10cSrcweir 					return false;
532*cdf0e10cSrcweir 
533*cdf0e10cSrcweir 			case 3: // "|"
534*cdf0e10cSrcweir 				if (bPipe)
535*cdf0e10cSrcweir 				{
536*cdf0e10cSrcweir 					*pEnd = ++(*pPos);
537*cdf0e10cSrcweir 					return true;
538*cdf0e10cSrcweir 				}
539*cdf0e10cSrcweir 				else
540*cdf0e10cSrcweir 					return false;
541*cdf0e10cSrcweir 
542*cdf0e10cSrcweir 			case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
543*cdf0e10cSrcweir                     // isBoundary1)
544*cdf0e10cSrcweir 				*pEnd = ++(*pPos);
545*cdf0e10cSrcweir 				return true;
546*cdf0e10cSrcweir 		}
547*cdf0e10cSrcweir 	}
548*cdf0e10cSrcweir 	else if (rCharClass.isLetterNumeric(rStr, *pPos))
549*cdf0e10cSrcweir 	{
550*cdf0e10cSrcweir 		*pEnd = *pPos = nextChar(rStr, *pPos);
551*cdf0e10cSrcweir 		return true;
552*cdf0e10cSrcweir 	}
553*cdf0e10cSrcweir 	else
554*cdf0e10cSrcweir 		return false;
555*cdf0e10cSrcweir }
556*cdf0e10cSrcweir 
557*cdf0e10cSrcweir sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos,
558*cdf0e10cSrcweir                       xub_StrLen nEnd)
559*cdf0e10cSrcweir {
560*cdf0e10cSrcweir 	sal_Unicode const * pBuffer = rStr.GetBuffer();
561*cdf0e10cSrcweir 	sal_Unicode const * p = pBuffer + *pPos;
562*cdf0e10cSrcweir 	sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
563*cdf0e10cSrcweir 	*pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer);
564*cdf0e10cSrcweir 	return nLabels;
565*cdf0e10cSrcweir }
566*cdf0e10cSrcweir 
567*cdf0e10cSrcweir }
568*cdf0e10cSrcweir 
569*cdf0e10cSrcweir UniString
570*cdf0e10cSrcweir URIHelper::FindFirstURLInText(UniString const & rText,
571*cdf0e10cSrcweir                               xub_StrLen & rBegin,
572*cdf0e10cSrcweir                               xub_StrLen & rEnd,
573*cdf0e10cSrcweir                               CharClass const & rCharClass,
574*cdf0e10cSrcweir                               INetURLObject::EncodeMechanism eMechanism,
575*cdf0e10cSrcweir                               rtl_TextEncoding eCharset,
576*cdf0e10cSrcweir                               INetURLObject::FSysStyle eStyle)
577*cdf0e10cSrcweir {
578*cdf0e10cSrcweir     if (!(rBegin <= rEnd && rEnd <= rText.Len()))
579*cdf0e10cSrcweir         return UniString();
580*cdf0e10cSrcweir 
581*cdf0e10cSrcweir     // Search for the first substring of [rBegin..rEnd[ that matches any of the
582*cdf0e10cSrcweir     // following productions (for which the appropriate style bit is set in
583*cdf0e10cSrcweir     // eStyle, if applicable).
584*cdf0e10cSrcweir     //
585*cdf0e10cSrcweir     // 1st Production (known scheme):
586*cdf0e10cSrcweir     //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
587*cdf0e10cSrcweir     //        \B1
588*cdf0e10cSrcweir     //
589*cdf0e10cSrcweir     // 2nd Production (file):
590*cdf0e10cSrcweir     //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
591*cdf0e10cSrcweir     //
592*cdf0e10cSrcweir     // 3rd Production (ftp):
593*cdf0e10cSrcweir     //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
594*cdf0e10cSrcweir     //
595*cdf0e10cSrcweir     // 4th Production (http):
596*cdf0e10cSrcweir     //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
597*cdf0e10cSrcweir     //
598*cdf0e10cSrcweir     // 5th Production (mailto):
599*cdf0e10cSrcweir     //    \B2 local-part "@" domain \B1
600*cdf0e10cSrcweir     //
601*cdf0e10cSrcweir     // 6th Production (UNC file):
602*cdf0e10cSrcweir     //    \B1 "\\" domain "\" *(wchar / "\") \B1
603*cdf0e10cSrcweir     //
604*cdf0e10cSrcweir     // 7th Production (DOS file):
605*cdf0e10cSrcweir     //    \B1 ALPHA ":\" *(wchar / "\") \B1
606*cdf0e10cSrcweir     //
607*cdf0e10cSrcweir     // 8th Production (Unix-like DOS file):
608*cdf0e10cSrcweir     //    \B1 ALPHA ":/" *(wchar / "\") \B1
609*cdf0e10cSrcweir     //
610*cdf0e10cSrcweir     // The productions use the following auxiliary rules.
611*cdf0e10cSrcweir     //
612*cdf0e10cSrcweir     //    local-part = atom *("." atom)
613*cdf0e10cSrcweir     //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
614*cdf0e10cSrcweir     //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
615*cdf0e10cSrcweir     //              / "~")
616*cdf0e10cSrcweir     //    domain = label *("." label)
617*cdf0e10cSrcweir     //    label = alphanum [*(alphanum / "-") alphanum]
618*cdf0e10cSrcweir     //    alphanum = ALPHA / DIGIT
619*cdf0e10cSrcweir     //    wchar = <any uric character (ignoring the escaped rule), or "%", or
620*cdf0e10cSrcweir     //             a letter or digit (according to rCharClass)>
621*cdf0e10cSrcweir     //
622*cdf0e10cSrcweir     // "\B1" (boundary 1) stands for the beginning or end of the block of text,
623*cdf0e10cSrcweir     // or a character that is neither (a) a letter or digit (according to
624*cdf0e10cSrcweir     // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
625*cdf0e10cSrcweir     // (FIXME:  What was the rationale for this set of punctuation characters?)
626*cdf0e10cSrcweir     //
627*cdf0e10cSrcweir     // "\B2" (boundary 2) stands for the beginning or end of the block of text,
628*cdf0e10cSrcweir     // or a character that is neither (a) a letter or digit (according to
629*cdf0e10cSrcweir     // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
630*cdf0e10cSrcweir     // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
631*cdf0e10cSrcweir     // 822 <atom> character, or "@" from \B1's set above).
632*cdf0e10cSrcweir     //
633*cdf0e10cSrcweir     // Productions 1--4, and 6--8 try to find a maximum-length match, but they
634*cdf0e10cSrcweir     // stop at the first <wchar> character that is a "\B1" character which is
635*cdf0e10cSrcweir     // only followed by "\B1" characters (taking "\" and "|" characters into
636*cdf0e10cSrcweir     // account appropriately).  Production 5 simply tries to find a maximum-
637*cdf0e10cSrcweir     // length match.
638*cdf0e10cSrcweir     //
639*cdf0e10cSrcweir     // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
640*cdf0e10cSrcweir     // use ENCODE_ALL.
641*cdf0e10cSrcweir     //
642*cdf0e10cSrcweir     // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
643*cdf0e10cSrcweir     // eStyle.
644*cdf0e10cSrcweir 
645*cdf0e10cSrcweir     bool bBoundary1 = true;
646*cdf0e10cSrcweir     bool bBoundary2 = true;
647*cdf0e10cSrcweir     for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
648*cdf0e10cSrcweir     {
649*cdf0e10cSrcweir         sal_Unicode c = rText.GetChar(nPos);
650*cdf0e10cSrcweir         if (bBoundary1)
651*cdf0e10cSrcweir         {
652*cdf0e10cSrcweir             if (INetMIME::isAlpha(c))
653*cdf0e10cSrcweir             {
654*cdf0e10cSrcweir                 xub_StrLen i = nPos;
655*cdf0e10cSrcweir                 INetProtocol eScheme
656*cdf0e10cSrcweir                     = INetURLObject::CompareProtocolScheme(UniString(rText, i,
657*cdf0e10cSrcweir                                                                      rEnd));
658*cdf0e10cSrcweir                 if (eScheme == INET_PROT_FILE) // 2nd
659*cdf0e10cSrcweir                 {
660*cdf0e10cSrcweir                     while (rText.GetChar(i++) != ':') ;
661*cdf0e10cSrcweir                     xub_StrLen nPrefixEnd = i;
662*cdf0e10cSrcweir                     xub_StrLen nUriEnd = i;
663*cdf0e10cSrcweir                     while (i != rEnd
664*cdf0e10cSrcweir                            && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
665*cdf0e10cSrcweir                                          true)) ;
666*cdf0e10cSrcweir                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
667*cdf0e10cSrcweir                     {
668*cdf0e10cSrcweir                         ++i;
669*cdf0e10cSrcweir                         while (i != rEnd
670*cdf0e10cSrcweir                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
671*cdf0e10cSrcweir                     }
672*cdf0e10cSrcweir                     if (nUriEnd != nPrefixEnd
673*cdf0e10cSrcweir                         && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
674*cdf0e10cSrcweir                     {
675*cdf0e10cSrcweir                         INetURLObject aUri(UniString(rText, nPos,
676*cdf0e10cSrcweir                                                      nUriEnd - nPos),
677*cdf0e10cSrcweir                                            INET_PROT_FILE, eMechanism, eCharset,
678*cdf0e10cSrcweir                                            eStyle);
679*cdf0e10cSrcweir                         if (!aUri.HasError())
680*cdf0e10cSrcweir                         {
681*cdf0e10cSrcweir                             rBegin = nPos;
682*cdf0e10cSrcweir                             rEnd = nUriEnd;
683*cdf0e10cSrcweir                             return
684*cdf0e10cSrcweir                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
685*cdf0e10cSrcweir                         }
686*cdf0e10cSrcweir                     }
687*cdf0e10cSrcweir                 }
688*cdf0e10cSrcweir                 else if (eScheme != INET_PROT_NOT_VALID) // 1st
689*cdf0e10cSrcweir                 {
690*cdf0e10cSrcweir                     while (rText.GetChar(i++) != ':') ;
691*cdf0e10cSrcweir                     xub_StrLen nPrefixEnd = i;
692*cdf0e10cSrcweir                     xub_StrLen nUriEnd = i;
693*cdf0e10cSrcweir                     while (i != rEnd
694*cdf0e10cSrcweir                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
695*cdf0e10cSrcweir                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
696*cdf0e10cSrcweir                     {
697*cdf0e10cSrcweir                         ++i;
698*cdf0e10cSrcweir                         while (i != rEnd
699*cdf0e10cSrcweir                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
700*cdf0e10cSrcweir                     }
701*cdf0e10cSrcweir                     if (nUriEnd != nPrefixEnd
702*cdf0e10cSrcweir                         && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
703*cdf0e10cSrcweir                             || rText.GetChar(nUriEnd) == '\\'))
704*cdf0e10cSrcweir                     {
705*cdf0e10cSrcweir                         INetURLObject aUri(UniString(rText, nPos,
706*cdf0e10cSrcweir                                                      nUriEnd - nPos),
707*cdf0e10cSrcweir                                            INET_PROT_HTTP, eMechanism,
708*cdf0e10cSrcweir                                            eCharset);
709*cdf0e10cSrcweir                         if (!aUri.HasError())
710*cdf0e10cSrcweir                         {
711*cdf0e10cSrcweir                             rBegin = nPos;
712*cdf0e10cSrcweir                             rEnd = nUriEnd;
713*cdf0e10cSrcweir                             return
714*cdf0e10cSrcweir                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
715*cdf0e10cSrcweir                         }
716*cdf0e10cSrcweir                     }
717*cdf0e10cSrcweir                 }
718*cdf0e10cSrcweir 
719*cdf0e10cSrcweir                 // 3rd, 4th:
720*cdf0e10cSrcweir                 i = nPos;
721*cdf0e10cSrcweir                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
722*cdf0e10cSrcweir                 if (nLabels >= 3
723*cdf0e10cSrcweir                     && rText.GetChar(nPos + 3) == '.'
724*cdf0e10cSrcweir                     && (((rText.GetChar(nPos) == 'w'
725*cdf0e10cSrcweir                           || rText.GetChar(nPos) == 'W')
726*cdf0e10cSrcweir                          && (rText.GetChar(nPos + 1) == 'w'
727*cdf0e10cSrcweir                              || rText.GetChar(nPos + 1) == 'W')
728*cdf0e10cSrcweir                          && (rText.GetChar(nPos + 2) == 'w'
729*cdf0e10cSrcweir                              || rText.GetChar(nPos + 2) == 'W'))
730*cdf0e10cSrcweir                         || ((rText.GetChar(nPos) == 'f'
731*cdf0e10cSrcweir                              || rText.GetChar(nPos) == 'F')
732*cdf0e10cSrcweir                             && (rText.GetChar(nPos + 1) == 't'
733*cdf0e10cSrcweir                                 || rText.GetChar(nPos + 1) == 'T')
734*cdf0e10cSrcweir                             && (rText.GetChar(nPos + 2) == 'p'
735*cdf0e10cSrcweir                                 || rText.GetChar(nPos + 2) == 'P'))))
736*cdf0e10cSrcweir                     // (note that rText.GetChar(nPos + 3) is guaranteed to be
737*cdf0e10cSrcweir                     // valid)
738*cdf0e10cSrcweir                 {
739*cdf0e10cSrcweir                     xub_StrLen nUriEnd = i;
740*cdf0e10cSrcweir                     if (i != rEnd && rText.GetChar(i) == '/')
741*cdf0e10cSrcweir                     {
742*cdf0e10cSrcweir                         nUriEnd = ++i;
743*cdf0e10cSrcweir                         while (i != rEnd
744*cdf0e10cSrcweir                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
745*cdf0e10cSrcweir                     }
746*cdf0e10cSrcweir                     if (i != rEnd && rText.GetChar(i) == '#')
747*cdf0e10cSrcweir                     {
748*cdf0e10cSrcweir                         ++i;
749*cdf0e10cSrcweir                         while (i != rEnd
750*cdf0e10cSrcweir                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
751*cdf0e10cSrcweir                     }
752*cdf0e10cSrcweir                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
753*cdf0e10cSrcweir                         || rText.GetChar(nUriEnd) == '\\')
754*cdf0e10cSrcweir                     {
755*cdf0e10cSrcweir                         INetURLObject aUri(UniString(rText, nPos,
756*cdf0e10cSrcweir                                                      nUriEnd - nPos),
757*cdf0e10cSrcweir                                            INET_PROT_HTTP, eMechanism,
758*cdf0e10cSrcweir                                            eCharset);
759*cdf0e10cSrcweir                         if (!aUri.HasError())
760*cdf0e10cSrcweir                         {
761*cdf0e10cSrcweir                             rBegin = nPos;
762*cdf0e10cSrcweir                             rEnd = nUriEnd;
763*cdf0e10cSrcweir                             return
764*cdf0e10cSrcweir                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
765*cdf0e10cSrcweir                         }
766*cdf0e10cSrcweir                     }
767*cdf0e10cSrcweir                 }
768*cdf0e10cSrcweir 
769*cdf0e10cSrcweir                 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3
770*cdf0e10cSrcweir                     && rText.GetChar(nPos + 1) == ':'
771*cdf0e10cSrcweir                     && (rText.GetChar(nPos + 2) == '/'
772*cdf0e10cSrcweir                         || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th
773*cdf0e10cSrcweir                 {
774*cdf0e10cSrcweir                     i = nPos + 3;
775*cdf0e10cSrcweir                     xub_StrLen nUriEnd = i;
776*cdf0e10cSrcweir                     while (i != rEnd
777*cdf0e10cSrcweir                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
778*cdf0e10cSrcweir                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
779*cdf0e10cSrcweir                     {
780*cdf0e10cSrcweir                         INetURLObject aUri(UniString(rText, nPos,
781*cdf0e10cSrcweir                                                      nUriEnd - nPos),
782*cdf0e10cSrcweir                                            INET_PROT_FILE,
783*cdf0e10cSrcweir                                            INetURLObject::ENCODE_ALL,
784*cdf0e10cSrcweir                                            RTL_TEXTENCODING_UTF8,
785*cdf0e10cSrcweir                                            INetURLObject::FSYS_DOS);
786*cdf0e10cSrcweir                         if (!aUri.HasError())
787*cdf0e10cSrcweir                         {
788*cdf0e10cSrcweir                             rBegin = nPos;
789*cdf0e10cSrcweir                             rEnd = nUriEnd;
790*cdf0e10cSrcweir                             return
791*cdf0e10cSrcweir                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
792*cdf0e10cSrcweir                         }
793*cdf0e10cSrcweir                     }
794*cdf0e10cSrcweir                 }
795*cdf0e10cSrcweir             }
796*cdf0e10cSrcweir             else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2
797*cdf0e10cSrcweir                      && rText.GetChar(nPos) == '\\'
798*cdf0e10cSrcweir                      && rText.GetChar(nPos + 1) == '\\') // 6th
799*cdf0e10cSrcweir             {
800*cdf0e10cSrcweir                 xub_StrLen i = nPos + 2;
801*cdf0e10cSrcweir                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
802*cdf0e10cSrcweir                 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\')
803*cdf0e10cSrcweir                 {
804*cdf0e10cSrcweir                     xub_StrLen nUriEnd = ++i;
805*cdf0e10cSrcweir                     while (i != rEnd
806*cdf0e10cSrcweir                            && checkWChar(rCharClass, rText, &i, &nUriEnd,
807*cdf0e10cSrcweir                                          true)) ;
808*cdf0e10cSrcweir                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
809*cdf0e10cSrcweir                     {
810*cdf0e10cSrcweir                         INetURLObject aUri(UniString(rText, nPos,
811*cdf0e10cSrcweir                                                      nUriEnd - nPos),
812*cdf0e10cSrcweir                                            INET_PROT_FILE,
813*cdf0e10cSrcweir                                            INetURLObject::ENCODE_ALL,
814*cdf0e10cSrcweir                                            RTL_TEXTENCODING_UTF8,
815*cdf0e10cSrcweir                                            INetURLObject::FSYS_DOS);
816*cdf0e10cSrcweir                         if (!aUri.HasError())
817*cdf0e10cSrcweir                         {
818*cdf0e10cSrcweir                             rBegin = nPos;
819*cdf0e10cSrcweir                             rEnd = nUriEnd;
820*cdf0e10cSrcweir                             return
821*cdf0e10cSrcweir                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
822*cdf0e10cSrcweir                         }
823*cdf0e10cSrcweir                     }
824*cdf0e10cSrcweir                 }
825*cdf0e10cSrcweir             }
826*cdf0e10cSrcweir         }
827*cdf0e10cSrcweir         if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
828*cdf0e10cSrcweir         {
829*cdf0e10cSrcweir             bool bDot = false;
830*cdf0e10cSrcweir             for (xub_StrLen i = nPos + 1; i != rEnd; ++i)
831*cdf0e10cSrcweir             {
832*cdf0e10cSrcweir                 sal_Unicode c2 = rText.GetChar(i);
833*cdf0e10cSrcweir                 if (INetMIME::isAtomChar(c2))
834*cdf0e10cSrcweir                     bDot = false;
835*cdf0e10cSrcweir                 else if (bDot)
836*cdf0e10cSrcweir                     break;
837*cdf0e10cSrcweir                 else if (c2 == '.')
838*cdf0e10cSrcweir                     bDot = true;
839*cdf0e10cSrcweir                 else
840*cdf0e10cSrcweir                 {
841*cdf0e10cSrcweir                     if (c2 == '@')
842*cdf0e10cSrcweir                     {
843*cdf0e10cSrcweir                         ++i;
844*cdf0e10cSrcweir                         sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
845*cdf0e10cSrcweir                         if (nLabels >= 1
846*cdf0e10cSrcweir                             && isBoundary1(rCharClass, rText, i, rEnd))
847*cdf0e10cSrcweir                         {
848*cdf0e10cSrcweir                             INetURLObject aUri(UniString(rText, nPos, i - nPos),
849*cdf0e10cSrcweir                                                INET_PROT_MAILTO,
850*cdf0e10cSrcweir                                                INetURLObject::ENCODE_ALL);
851*cdf0e10cSrcweir                             if (!aUri.HasError())
852*cdf0e10cSrcweir                             {
853*cdf0e10cSrcweir                                 rBegin = nPos;
854*cdf0e10cSrcweir                                 rEnd = i;
855*cdf0e10cSrcweir                                 return aUri.GetMainURL(
856*cdf0e10cSrcweir                                            INetURLObject::DECODE_TO_IURI);
857*cdf0e10cSrcweir                             }
858*cdf0e10cSrcweir                         }
859*cdf0e10cSrcweir                     }
860*cdf0e10cSrcweir                     break;
861*cdf0e10cSrcweir                 }
862*cdf0e10cSrcweir             }
863*cdf0e10cSrcweir         }
864*cdf0e10cSrcweir         bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
865*cdf0e10cSrcweir         bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
866*cdf0e10cSrcweir     }
867*cdf0e10cSrcweir     rBegin = rEnd;
868*cdf0e10cSrcweir     return UniString();
869*cdf0e10cSrcweir }
870*cdf0e10cSrcweir 
871*cdf0e10cSrcweir //============================================================================
872*cdf0e10cSrcweir //
873*cdf0e10cSrcweir //  removePassword
874*cdf0e10cSrcweir //
875*cdf0e10cSrcweir //============================================================================
876*cdf0e10cSrcweir 
877*cdf0e10cSrcweir UniString
878*cdf0e10cSrcweir URIHelper::removePassword(UniString const & rURI,
879*cdf0e10cSrcweir 						  INetURLObject::EncodeMechanism eEncodeMechanism,
880*cdf0e10cSrcweir 						  INetURLObject::DecodeMechanism eDecodeMechanism,
881*cdf0e10cSrcweir 						  rtl_TextEncoding eCharset)
882*cdf0e10cSrcweir {
883*cdf0e10cSrcweir 	INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
884*cdf0e10cSrcweir 	return aObj.HasError() ?
885*cdf0e10cSrcweir 		       rURI :
886*cdf0e10cSrcweir 		       String(aObj.GetURLNoPass(eDecodeMechanism, eCharset));
887*cdf0e10cSrcweir }
888*cdf0e10cSrcweir 
889*cdf0e10cSrcweir //============================================================================
890*cdf0e10cSrcweir //
891*cdf0e10cSrcweir //  queryFSysStyle
892*cdf0e10cSrcweir //
893*cdf0e10cSrcweir //============================================================================
894*cdf0e10cSrcweir 
895*cdf0e10cSrcweir INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl,
896*cdf0e10cSrcweir 												   bool bAddConvenienceStyles)
897*cdf0e10cSrcweir 	throw (uno::RuntimeException)
898*cdf0e10cSrcweir {
899*cdf0e10cSrcweir 	::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get();
900*cdf0e10cSrcweir 	uno::Reference< ucb::XContentProviderManager > xManager;
901*cdf0e10cSrcweir 	if (pBroker)
902*cdf0e10cSrcweir 		xManager = pBroker->getContentProviderManagerInterface();
903*cdf0e10cSrcweir 	uno::Reference< beans::XPropertySet > xProperties;
904*cdf0e10cSrcweir 	if (xManager.is())
905*cdf0e10cSrcweir 		xProperties
906*cdf0e10cSrcweir 			= uno::Reference< beans::XPropertySet >(
907*cdf0e10cSrcweir 				  xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY);
908*cdf0e10cSrcweir 	sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION;
909*cdf0e10cSrcweir 	if (xProperties.is())
910*cdf0e10cSrcweir 		try
911*cdf0e10cSrcweir 		{
912*cdf0e10cSrcweir 			xProperties->getPropertyValue(rtl::OUString(
913*cdf0e10cSrcweir 				                              RTL_CONSTASCII_USTRINGPARAM(
914*cdf0e10cSrcweir 												  "FileSystemNotation")))
915*cdf0e10cSrcweir 				>>= nNotation;
916*cdf0e10cSrcweir 		}
917*cdf0e10cSrcweir 		catch (beans::UnknownPropertyException const &) {}
918*cdf0e10cSrcweir 		catch (lang::WrappedTargetException const &) {}
919*cdf0e10cSrcweir 
920*cdf0e10cSrcweir 	// The following code depends on the fact that the
921*cdf0e10cSrcweir 	// com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to
922*cdf0e10cSrcweir 	// MAC, without any holes.  The table below has two entries per notation,
923*cdf0e10cSrcweir 	// the first is used if bAddConvenienceStyles == false, while the second
924*cdf0e10cSrcweir 	// is used if bAddConvenienceStyles == true:
925*cdf0e10cSrcweir 	static INetURLObject::FSysStyle const aMap[][2]
926*cdf0e10cSrcweir 		= { { INetURLObject::FSysStyle(0),
927*cdf0e10cSrcweir 			  INetURLObject::FSYS_DETECT },
928*cdf0e10cSrcweir 			    // UNKNOWN
929*cdf0e10cSrcweir 			{ INetURLObject::FSYS_UNX,
930*cdf0e10cSrcweir 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
931*cdf0e10cSrcweir 									       | INetURLObject::FSYS_UNX) },
932*cdf0e10cSrcweir 			    // UNIX
933*cdf0e10cSrcweir 			{ INetURLObject::FSYS_DOS,
934*cdf0e10cSrcweir 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
935*cdf0e10cSrcweir 									       | INetURLObject::FSYS_UNX
936*cdf0e10cSrcweir 									       | INetURLObject::FSYS_DOS) },
937*cdf0e10cSrcweir 			    // DOS
938*cdf0e10cSrcweir 			{ INetURLObject::FSYS_MAC,
939*cdf0e10cSrcweir 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
940*cdf0e10cSrcweir 									       | INetURLObject::FSYS_UNX
941*cdf0e10cSrcweir 									       | INetURLObject::FSYS_MAC) } };
942*cdf0e10cSrcweir 	return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION
943*cdf0e10cSrcweir 			    || nNotation > ucb::FileSystemNotation::MAC_NOTATION ?
944*cdf0e10cSrcweir 			            0 :
945*cdf0e10cSrcweir 			            nNotation
946*cdf0e10cSrcweir 			                - ucb::FileSystemNotation::UNKNOWN_NOTATION]
947*cdf0e10cSrcweir 		           [bAddConvenienceStyles];
948*cdf0e10cSrcweir }
949