1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_svl.hxx"
26 #include <svl/urihelper.hxx>
27 #include <com/sun/star/beans/XPropertySet.hpp>
28 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp"
29 #include "com/sun/star/lang/XMultiComponentFactory.hpp"
30 #include "com/sun/star/ucb/Command.hpp"
31 #include <com/sun/star/ucb/FileSystemNotation.hpp>
32 #include "com/sun/star/ucb/IllegalIdentifierException.hpp"
33 #include "com/sun/star/ucb/UnsupportedCommandException.hpp"
34 #include "com/sun/star/ucb/XCommandEnvironment.hpp"
35 #include "com/sun/star/ucb/XCommandProcessor.hpp"
36 #include "com/sun/star/ucb/XContent.hpp"
37 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp"
38 #include "com/sun/star/ucb/XContentProvider.hpp"
39 #include <com/sun/star/ucb/XContentProviderManager.hpp>
40 #include "com/sun/star/uno/Any.hxx"
41 #include "com/sun/star/uno/Exception.hpp"
42 #include "com/sun/star/uno/Reference.hxx"
43 #include "com/sun/star/uno/RuntimeException.hpp"
44 #include "com/sun/star/uno/Sequence.hxx"
45 #include "com/sun/star/uno/XComponentContext.hpp"
46 #include "com/sun/star/uno/XInterface.hpp"
47 #include "com/sun/star/uri/UriReferenceFactory.hpp"
48 #include "com/sun/star/uri/XUriReference.hpp"
49 #include "com/sun/star/uri/XUriReferenceFactory.hpp"
50 #include "cppuhelper/exc_hlp.hxx"
51 #include "comphelper/processfactory.hxx"
52 #include "osl/diagnose.h"
53 #include "rtl/ustrbuf.hxx"
54 #include "rtl/ustring.h"
55 #include "rtl/ustring.hxx"
56 #include "sal/types.h"
57 #include <tools/debug.hxx>
58 #include <tools/inetmime.hxx>
59 #include <ucbhelper/contentbroker.hxx>
60 #include <unotools/charclass.hxx>
61 #include "rtl/instance.hxx"
62
63 namespace unnamed_svl_urihelper {}
64 using namespace unnamed_svl_urihelper;
65 // unnamed namespaces don't work well yet...
66
67 namespace css = com::sun::star;
68 using namespace com::sun::star;
69
70 //============================================================================
71 //
72 // SmartRel2Abs
73 //
74 //============================================================================
75
76 namespace unnamed_svl_urihelper {
77
toUniString(ByteString const & rString)78 inline UniString toUniString(ByteString const & rString)
79 {
80 return UniString(rString, RTL_TEXTENCODING_ISO_8859_1);
81 }
82
toUniString(UniString const & rString)83 inline UniString toUniString(UniString const & rString)
84 {
85 return rString;
86 }
87
88 template< typename Str >
SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,Str const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)89 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,
90 Str const & rTheRelURIRef,
91 Link const & rMaybeFileHdl,
92 bool bCheckFileExists,
93 bool bIgnoreFragment,
94 INetURLObject::EncodeMechanism
95 eEncodeMechanism,
96 INetURLObject::DecodeMechanism
97 eDecodeMechanism,
98 rtl_TextEncoding eCharset,
99 bool bRelativeNonURIs,
100 INetURLObject::FSysStyle eStyle)
101 {
102 // Backwards compatibility:
103 if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#')
104 return toUniString(rTheRelURIRef);
105
106 INetURLObject aAbsURIRef;
107 if (rTheBaseURIRef.HasError())
108 aAbsURIRef.
109 SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
110 else
111 {
112 bool bWasAbsolute;
113 aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
114 bWasAbsolute,
115 bIgnoreFragment,
116 eEncodeMechanism,
117 eCharset,
118 bRelativeNonURIs,
119 eStyle);
120 if (bCheckFileExists
121 && !bWasAbsolute
122 && (aAbsURIRef.GetProtocol() == INET_PROT_FILE))
123 {
124 INetURLObject aNonFileURIRef;
125 aNonFileURIRef.SetSmartURL(rTheRelURIRef,
126 eEncodeMechanism,
127 eCharset,
128 eStyle);
129 if (!aNonFileURIRef.HasError()
130 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE)
131 {
132 bool bMaybeFile = false;
133 if (rMaybeFileHdl.IsSet())
134 {
135 UniString aFilePath(toUniString(rTheRelURIRef));
136 bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0;
137 }
138 if (!bMaybeFile)
139 aAbsURIRef = aNonFileURIRef;
140 }
141 }
142 }
143 return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
144 }
145
146 }
147
148 UniString
SmartRel2Abs(INetURLObject const & rTheBaseURIRef,ByteString const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)149 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
150 ByteString const & rTheRelURIRef,
151 Link const & rMaybeFileHdl,
152 bool bCheckFileExists,
153 bool bIgnoreFragment,
154 INetURLObject::EncodeMechanism eEncodeMechanism,
155 INetURLObject::DecodeMechanism eDecodeMechanism,
156 rtl_TextEncoding eCharset,
157 bool bRelativeNonURIs,
158 INetURLObject::FSysStyle eStyle)
159 {
160 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
161 bCheckFileExists, bIgnoreFragment,
162 eEncodeMechanism, eDecodeMechanism, eCharset,
163 bRelativeNonURIs, eStyle);
164 }
165
166 UniString
SmartRel2Abs(INetURLObject const & rTheBaseURIRef,UniString const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)167 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
168 UniString const & rTheRelURIRef,
169 Link const & rMaybeFileHdl,
170 bool bCheckFileExists,
171 bool bIgnoreFragment,
172 INetURLObject::EncodeMechanism eEncodeMechanism,
173 INetURLObject::DecodeMechanism eDecodeMechanism,
174 rtl_TextEncoding eCharset,
175 bool bRelativeNonURIs,
176 INetURLObject::FSysStyle eStyle)
177 {
178 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
179 bCheckFileExists, bIgnoreFragment,
180 eEncodeMechanism, eDecodeMechanism, eCharset,
181 bRelativeNonURIs, eStyle);
182 }
183
184 //============================================================================
185 //
186 // SetMaybeFileHdl
187 //
188 //============================================================================
189
190 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; }
191
SetMaybeFileHdl(Link const & rTheMaybeFileHdl)192 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl)
193 {
194 MaybeFileHdl::get() = rTheMaybeFileHdl;
195 }
196
197 //============================================================================
198 //
199 // GetMaybeFileHdl
200 //
201 //============================================================================
202
GetMaybeFileHdl()203 Link URIHelper::GetMaybeFileHdl()
204 {
205 return MaybeFileHdl::get();
206 }
207
208 namespace {
209
isAbsoluteHierarchicalUriReference(css::uno::Reference<css::uri::XUriReference> const & uriReference)210 bool isAbsoluteHierarchicalUriReference(
211 css::uno::Reference< css::uri::XUriReference > const & uriReference)
212 {
213 return uriReference.is() && uriReference->isAbsolute()
214 && uriReference->isHierarchical() && !uriReference->hasRelativePath();
215 }
216
217 // To improve performance, assume that if for any prefix URL of a given
218 // hierarchical URL either a UCB content cannot be created, or the UCB content
219 // does not support the getCasePreservingURL command, then this will hold for
220 // any other prefix URL of the given URL, too:
221 enum Result { Success, GeneralFailure, SpecificFailure };
222
normalizePrefix(css::uno::Reference<css::ucb::XContentProvider> const & broker,rtl::OUString const & uri,rtl::OUString * normalized)223 Result normalizePrefix(
224 css::uno::Reference< css::ucb::XContentProvider > const & broker,
225 rtl::OUString const & uri, rtl::OUString * normalized)
226 {
227 OSL_ASSERT(broker.is() && normalized != 0);
228 css::uno::Reference< css::ucb::XContent > content;
229 try {
230 content = broker->queryContent(
231 css::uno::Reference< css::ucb::XContentIdentifierFactory >(
232 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier(
233 uri));
234 } catch (css::ucb::IllegalIdentifierException &) {}
235 if (!content.is()) {
236 return GeneralFailure;
237 }
238 try {
239 #if OSL_DEBUG_LEVEL > 0
240 bool ok =
241 #endif
242 (css::uno::Reference< css::ucb::XCommandProcessor >(
243 content, css::uno::UNO_QUERY_THROW)->execute(
244 css::ucb::Command(
245 rtl::OUString(
246 RTL_CONSTASCII_USTRINGPARAM(
247 "getCasePreservingURL")),
248 -1, css::uno::Any()),
249 0,
250 css::uno::Reference< css::ucb::XCommandEnvironment >())
251 >>= *normalized);
252 OSL_ASSERT(ok);
253 } catch (css::uno::RuntimeException &) {
254 throw;
255 } catch (css::ucb::UnsupportedCommandException &) {
256 return GeneralFailure;
257 } catch (css::uno::Exception &) {
258 return SpecificFailure;
259 }
260 return Success;
261 }
262
normalize(css::uno::Reference<css::ucb::XContentProvider> const & broker,css::uno::Reference<css::uri::XUriReferenceFactory> const & uriFactory,rtl::OUString const & uriReference)263 rtl::OUString normalize(
264 css::uno::Reference< css::ucb::XContentProvider > const & broker,
265 css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
266 rtl::OUString const & uriReference)
267 {
268 // normalizePrefix can potentially fail (a typically example being a file
269 // URL that denotes a non-existing resource); in such a case, try to
270 // normalize as long a prefix of the given URL as possible (i.e., normalize
271 // all the existing directories within the path):
272 rtl::OUString normalized;
273 sal_Int32 n = uriReference.indexOf('#');
274 normalized = n == -1 ? uriReference : uriReference.copy(0, n);
275 switch (normalizePrefix(broker, normalized, &normalized)) {
276 case Success:
277 return n == -1 ? normalized : normalized + uriReference.copy(n);
278 case GeneralFailure:
279 return uriReference;
280 case SpecificFailure:
281 default:
282 break;
283 }
284 css::uno::Reference< css::uri::XUriReference > ref(
285 uriFactory->parse(uriReference));
286 if (!isAbsoluteHierarchicalUriReference(ref)) {
287 return uriReference;
288 }
289 sal_Int32 count = ref->getPathSegmentCount();
290 if (count < 2) {
291 return uriReference;
292 }
293 rtl::OUStringBuffer head(ref->getScheme());
294 head.append(static_cast< sal_Unicode >(':'));
295 if (ref->hasAuthority()) {
296 head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
297 head.append(ref->getAuthority());
298 }
299 for (sal_Int32 i = count - 1; i > 0; --i) {
300 rtl::OUStringBuffer buf(head);
301 for (sal_Int32 j = 0; j < i; ++j) {
302 buf.append(static_cast< sal_Unicode >('/'));
303 buf.append(ref->getPathSegment(j));
304 }
305 normalized = buf.makeStringAndClear();
306 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
307 {
308 buf.append(normalized);
309 css::uno::Reference< css::uri::XUriReference > preRef(
310 uriFactory->parse(normalized));
311 if (!isAbsoluteHierarchicalUriReference(preRef)) {
312 // This could only happen if something is inconsistent:
313 break;
314 }
315 sal_Int32 preCount = preRef->getPathSegmentCount();
316 // normalizePrefix may have added or removed a final slash:
317 if (preCount != i) {
318 if (preCount == i - 1) {
319 buf.append(static_cast< sal_Unicode >('/'));
320 } else if (preCount - 1 == i && buf.getLength() > 0
321 && buf.charAt(buf.getLength() - 1) == '/')
322 {
323 buf.setLength(buf.getLength() - 1);
324 } else {
325 // This could only happen if something is inconsistent:
326 break;
327 }
328 }
329 for (sal_Int32 j = i; j < count; ++j) {
330 buf.append(static_cast< sal_Unicode >('/'));
331 buf.append(ref->getPathSegment(j));
332 }
333 if (ref->hasQuery()) {
334 buf.append(static_cast< sal_Unicode >('?'));
335 buf.append(ref->getQuery());
336 }
337 if (ref->hasFragment()) {
338 buf.append(static_cast< sal_Unicode >('#'));
339 buf.append(ref->getFragment());
340 }
341 return buf.makeStringAndClear();
342 }
343 }
344 return uriReference;
345 }
346
347 }
348
349 css::uno::Reference< css::uri::XUriReference >
normalizedMakeRelative(css::uno::Reference<css::uno::XComponentContext> const & context,rtl::OUString const & baseUriReference,rtl::OUString const & uriReference)350 URIHelper::normalizedMakeRelative(
351 css::uno::Reference< css::uno::XComponentContext > const & context,
352 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
353 {
354 OSL_ASSERT(context.is());
355 css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory(
356 context->getServiceManager());
357 if (!componentFactory.is()) {
358 throw css::uno::RuntimeException(
359 rtl::OUString(
360 RTL_CONSTASCII_USTRINGPARAM(
361 "component context has no service manager")),
362 css::uno::Reference< css::uno::XInterface >());
363 }
364 css::uno::Sequence< css::uno::Any > args(2);
365 args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local"));
366 args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office"));
367 css::uno::Reference< css::ucb::XContentProvider > broker;
368 try {
369 broker = css::uno::Reference< css::ucb::XContentProvider >(
370 componentFactory->createInstanceWithArgumentsAndContext(
371 rtl::OUString(
372 RTL_CONSTASCII_USTRINGPARAM(
373 "com.sun.star.ucb.UniversalContentBroker")),
374 args, context),
375 css::uno::UNO_QUERY_THROW);
376 } catch (css::uno::RuntimeException &) {
377 throw;
378 } catch (css::uno::Exception &) {
379 css::uno::Any exception(cppu::getCaughtException());
380 throw css::lang::WrappedTargetRuntimeException(
381 rtl::OUString(
382 RTL_CONSTASCII_USTRINGPARAM(
383 "creating com.sun.star.ucb.UniversalContentBroker failed")),
384 css::uno::Reference< css::uno::XInterface >(),
385 exception);
386 }
387 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
388 css::uri::UriReferenceFactory::create(context));
389 return uriFactory->makeRelative(
390 uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
391 uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
392 true, false);
393 }
394
simpleNormalizedMakeRelative(rtl::OUString const & baseUriReference,rtl::OUString const & uriReference)395 rtl::OUString URIHelper::simpleNormalizedMakeRelative(
396 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
397 {
398 com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel(
399 URIHelper::normalizedMakeRelative(
400 com::sun::star::uno::Reference<
401 com::sun::star::uno::XComponentContext >(
402 (com::sun::star::uno::Reference<
403 com::sun::star::beans::XPropertySet >(
404 comphelper::getProcessServiceFactory(),
405 com::sun::star::uno::UNO_QUERY_THROW)->
406 getPropertyValue(
407 rtl::OUString(
408 RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))),
409 com::sun::star::uno::UNO_QUERY_THROW),
410 baseUriReference, uriReference));
411 return rel.is() ? rel->getUriReference() : uriReference;
412 }
413
414 //============================================================================
415 //
416 // FindFirstURLInText
417 //
418 //============================================================================
419
420 namespace unnamed_svl_urihelper {
421
nextChar(UniString const & rStr,xub_StrLen nPos)422 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos)
423 {
424 return INetMIME::isHighSurrogate(rStr.GetChar(nPos))
425 && rStr.Len() - nPos >= 2
426 && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ?
427 nPos + 2 : nPos + 1;
428 }
429
isBoundary1(CharClass const & rCharClass,UniString const & rStr,xub_StrLen nPos,xub_StrLen nEnd)430 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr,
431 xub_StrLen nPos, xub_StrLen nEnd)
432 {
433 if (nPos == nEnd)
434 return true;
435 if (rCharClass.isLetterNumeric(rStr, nPos))
436 return false;
437 switch (rStr.GetChar(nPos))
438 {
439 case '$':
440 case '%':
441 case '&':
442 case '-':
443 case '/':
444 case '@':
445 case '\\':
446 return false;
447 default:
448 return true;
449 }
450 }
451
isBoundary2(CharClass const & rCharClass,UniString const & rStr,xub_StrLen nPos,xub_StrLen nEnd)452 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr,
453 xub_StrLen nPos, xub_StrLen nEnd)
454 {
455 if (nPos == nEnd)
456 return true;
457 if (rCharClass.isLetterNumeric(rStr, nPos))
458 return false;
459 switch (rStr.GetChar(nPos))
460 {
461 case '!':
462 case '#':
463 case '$':
464 case '%':
465 case '&':
466 case '\'':
467 case '*':
468 case '+':
469 case '-':
470 case '/':
471 case '=':
472 case '?':
473 case '@':
474 case '^':
475 case '_':
476 case '`':
477 case '{':
478 case '|':
479 case '}':
480 case '~':
481 return false;
482 default:
483 return true;
484 }
485 }
486
checkWChar(CharClass const & rCharClass,UniString const & rStr,xub_StrLen * pPos,xub_StrLen * pEnd,bool bBackslash=false,bool bPipe=false)487 bool checkWChar(CharClass const & rCharClass, UniString const & rStr,
488 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false,
489 bool bPipe = false)
490 {
491 sal_Unicode c = rStr.GetChar(*pPos);
492 if (INetMIME::isUSASCII(c))
493 {
494 static sal_uInt8 const aMap[128]
495 = { 0, 0, 0, 0, 0, 0, 0, 0,
496 0, 0, 0, 0, 0, 0, 0, 0,
497 0, 0, 0, 0, 0, 0, 0, 0,
498 0, 0, 0, 0, 0, 0, 0, 0,
499 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&'
500 1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./
501 4, 4, 4, 4, 4, 4, 4, 4, // 01234567
502 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>?
503 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG
504 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO
505 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW
506 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_
507 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg
508 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno
509 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw
510 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
511 switch (aMap[c])
512 {
513 default: // not uric
514 return false;
515
516 case 1: // uric
517 ++(*pPos);
518 return true;
519
520 case 2: // "\"
521 if (bBackslash)
522 {
523 *pEnd = ++(*pPos);
524 return true;
525 }
526 else
527 return false;
528
529 case 3: // "|"
530 if (bPipe)
531 {
532 *pEnd = ++(*pPos);
533 return true;
534 }
535 else
536 return false;
537
538 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
539 // isBoundary1)
540 *pEnd = ++(*pPos);
541 return true;
542 }
543 }
544 else if (rCharClass.isLetterNumeric(rStr, *pPos))
545 {
546 *pEnd = *pPos = nextChar(rStr, *pPos);
547 return true;
548 }
549 else
550 return false;
551 }
552
scanDomain(UniString const & rStr,xub_StrLen * pPos,xub_StrLen nEnd)553 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos,
554 xub_StrLen nEnd)
555 {
556 sal_Unicode const * pBuffer = rStr.GetBuffer();
557 sal_Unicode const * p = pBuffer + *pPos;
558 sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
559 *pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer);
560 return nLabels;
561 }
562
563 }
564
565 UniString
FindFirstURLInText(UniString const & rText,xub_StrLen & rBegin,xub_StrLen & rEnd,CharClass const & rCharClass,INetURLObject::EncodeMechanism eMechanism,rtl_TextEncoding eCharset,INetURLObject::FSysStyle eStyle)566 URIHelper::FindFirstURLInText(UniString const & rText,
567 xub_StrLen & rBegin,
568 xub_StrLen & rEnd,
569 CharClass const & rCharClass,
570 INetURLObject::EncodeMechanism eMechanism,
571 rtl_TextEncoding eCharset,
572 INetURLObject::FSysStyle eStyle)
573 {
574 if (!(rBegin <= rEnd && rEnd <= rText.Len()))
575 return UniString();
576
577 // Search for the first substring of [rBegin..rEnd[ that matches any of the
578 // following productions (for which the appropriate style bit is set in
579 // eStyle, if applicable).
580 //
581 // 1st Production (known scheme):
582 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
583 // \B1
584 //
585 // 2nd Production (file):
586 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
587 //
588 // 3rd Production (ftp):
589 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
590 //
591 // 4th Production (http):
592 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
593 //
594 // 5th Production (mailto):
595 // \B2 local-part "@" domain \B1
596 //
597 // 6th Production (UNC file):
598 // \B1 "\\" domain "\" *(wchar / "\") \B1
599 //
600 // 7th Production (DOS file):
601 // \B1 ALPHA ":\" *(wchar / "\") \B1
602 //
603 // 8th Production (Unix-like DOS file):
604 // \B1 ALPHA ":/" *(wchar / "\") \B1
605 //
606 // The productions use the following auxiliary rules.
607 //
608 // local-part = atom *("." atom)
609 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
610 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
611 // / "~")
612 // domain = label *("." label)
613 // label = alphanum [*(alphanum / "-") alphanum]
614 // alphanum = ALPHA / DIGIT
615 // wchar = <any uric character (ignoring the escaped rule), or "%", or
616 // a letter or digit (according to rCharClass)>
617 //
618 // "\B1" (boundary 1) stands for the beginning or end of the block of text,
619 // or a character that is neither (a) a letter or digit (according to
620 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
621 // (FIXME: What was the rationale for this set of punctuation characters?)
622 //
623 // "\B2" (boundary 2) stands for the beginning or end of the block of text,
624 // or a character that is neither (a) a letter or digit (according to
625 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
626 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
627 // 822 <atom> character, or "@" from \B1's set above).
628 //
629 // Productions 1--4, and 6--8 try to find a maximum-length match, but they
630 // stop at the first <wchar> character that is a "\B1" character which is
631 // only followed by "\B1" characters (taking "\" and "|" characters into
632 // account appropriately). Production 5 simply tries to find a maximum-
633 // length match.
634 //
635 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9
636 // use ENCODE_ALL.
637 //
638 // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
639 // eStyle.
640
641 bool bBoundary1 = true;
642 bool bBoundary2 = true;
643 for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
644 {
645 sal_Unicode c = rText.GetChar(nPos);
646 if (bBoundary1)
647 {
648 if (INetMIME::isAlpha(c))
649 {
650 xub_StrLen i = nPos;
651 INetProtocol eScheme
652 = INetURLObject::CompareProtocolScheme(UniString(rText, i,
653 rEnd));
654 if (eScheme == INET_PROT_FILE) // 2nd
655 {
656 while (rText.GetChar(i++) != ':') ;
657 xub_StrLen nPrefixEnd = i;
658 xub_StrLen nUriEnd = i;
659 while (i != rEnd
660 && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
661 true)) ;
662 if (i != nPrefixEnd && rText.GetChar(i) == '#')
663 {
664 ++i;
665 while (i != rEnd
666 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
667 }
668 if (nUriEnd != nPrefixEnd
669 && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
670 {
671 INetURLObject aUri(UniString(rText, nPos,
672 nUriEnd - nPos),
673 INET_PROT_FILE, eMechanism, eCharset,
674 eStyle);
675 if (!aUri.HasError())
676 {
677 rBegin = nPos;
678 rEnd = nUriEnd;
679 return
680 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
681 }
682 }
683 }
684 else if (eScheme != INET_PROT_NOT_VALID) // 1st
685 {
686 while (rText.GetChar(i++) != ':') ;
687 xub_StrLen nPrefixEnd = i;
688 xub_StrLen nUriEnd = i;
689 while (i != rEnd
690 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
691 if (i != nPrefixEnd && rText.GetChar(i) == '#')
692 {
693 ++i;
694 while (i != rEnd
695 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
696 }
697 if (nUriEnd != nPrefixEnd
698 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
699 || rText.GetChar(nUriEnd) == '\\'))
700 {
701 INetURLObject aUri(UniString(rText, nPos,
702 nUriEnd - nPos),
703 INET_PROT_HTTP, eMechanism,
704 eCharset);
705 if (!aUri.HasError())
706 {
707 rBegin = nPos;
708 rEnd = nUriEnd;
709 return
710 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
711 }
712 }
713 }
714
715 // 3rd, 4th:
716 i = nPos;
717 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
718 if (nLabels >= 3
719 && rText.GetChar(nPos + 3) == '.'
720 && (((rText.GetChar(nPos) == 'w'
721 || rText.GetChar(nPos) == 'W')
722 && (rText.GetChar(nPos + 1) == 'w'
723 || rText.GetChar(nPos + 1) == 'W')
724 && (rText.GetChar(nPos + 2) == 'w'
725 || rText.GetChar(nPos + 2) == 'W'))
726 || ((rText.GetChar(nPos) == 'f'
727 || rText.GetChar(nPos) == 'F')
728 && (rText.GetChar(nPos + 1) == 't'
729 || rText.GetChar(nPos + 1) == 'T')
730 && (rText.GetChar(nPos + 2) == 'p'
731 || rText.GetChar(nPos + 2) == 'P'))))
732 // (note that rText.GetChar(nPos + 3) is guaranteed to be
733 // valid)
734 {
735 xub_StrLen nUriEnd = i;
736 if (i != rEnd && rText.GetChar(i) == '/')
737 {
738 nUriEnd = ++i;
739 while (i != rEnd
740 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
741 }
742 if (i != rEnd && rText.GetChar(i) == '#')
743 {
744 ++i;
745 while (i != rEnd
746 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
747 }
748 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
749 || rText.GetChar(nUriEnd) == '\\')
750 {
751 INetURLObject aUri(UniString(rText, nPos,
752 nUriEnd - nPos),
753 INET_PROT_HTTP, eMechanism,
754 eCharset);
755 if (!aUri.HasError())
756 {
757 rBegin = nPos;
758 rEnd = nUriEnd;
759 return
760 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
761 }
762 }
763 }
764
765 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3
766 && rText.GetChar(nPos + 1) == ':'
767 && (rText.GetChar(nPos + 2) == '/'
768 || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th
769 {
770 i = nPos + 3;
771 xub_StrLen nUriEnd = i;
772 while (i != rEnd
773 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
774 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
775 {
776 INetURLObject aUri(UniString(rText, nPos,
777 nUriEnd - nPos),
778 INET_PROT_FILE,
779 INetURLObject::ENCODE_ALL,
780 RTL_TEXTENCODING_UTF8,
781 INetURLObject::FSYS_DOS);
782 if (!aUri.HasError())
783 {
784 rBegin = nPos;
785 rEnd = nUriEnd;
786 return
787 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
788 }
789 }
790 }
791 }
792 else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2
793 && rText.GetChar(nPos) == '\\'
794 && rText.GetChar(nPos + 1) == '\\') // 6th
795 {
796 xub_StrLen i = nPos + 2;
797 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
798 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\')
799 {
800 xub_StrLen nUriEnd = ++i;
801 while (i != rEnd
802 && checkWChar(rCharClass, rText, &i, &nUriEnd,
803 true)) ;
804 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
805 {
806 INetURLObject aUri(UniString(rText, nPos,
807 nUriEnd - nPos),
808 INET_PROT_FILE,
809 INetURLObject::ENCODE_ALL,
810 RTL_TEXTENCODING_UTF8,
811 INetURLObject::FSYS_DOS);
812 if (!aUri.HasError())
813 {
814 rBegin = nPos;
815 rEnd = nUriEnd;
816 return
817 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
818 }
819 }
820 }
821 }
822 }
823 if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
824 {
825 bool bDot = false;
826 for (xub_StrLen i = nPos + 1; i != rEnd; ++i)
827 {
828 sal_Unicode c2 = rText.GetChar(i);
829 if (INetMIME::isAtomChar(c2))
830 bDot = false;
831 else if (bDot)
832 break;
833 else if (c2 == '.')
834 bDot = true;
835 else
836 {
837 if (c2 == '@')
838 {
839 ++i;
840 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
841 if (nLabels >= 1
842 && isBoundary1(rCharClass, rText, i, rEnd))
843 {
844 INetURLObject aUri(UniString(rText, nPos, i - nPos),
845 INET_PROT_MAILTO,
846 INetURLObject::ENCODE_ALL);
847 if (!aUri.HasError())
848 {
849 rBegin = nPos;
850 rEnd = i;
851 return aUri.GetMainURL(
852 INetURLObject::DECODE_TO_IURI);
853 }
854 }
855 }
856 break;
857 }
858 }
859 }
860 bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
861 bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
862 }
863 rBegin = rEnd;
864 return UniString();
865 }
866
867 //============================================================================
868 //
869 // removePassword
870 //
871 //============================================================================
872
873 UniString
removePassword(UniString const & rURI,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)874 URIHelper::removePassword(UniString const & rURI,
875 INetURLObject::EncodeMechanism eEncodeMechanism,
876 INetURLObject::DecodeMechanism eDecodeMechanism,
877 rtl_TextEncoding eCharset)
878 {
879 INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
880 return aObj.HasError() ?
881 rURI :
882 String(aObj.GetURLNoPass(eDecodeMechanism, eCharset));
883 }
884
885 //============================================================================
886 //
887 // queryFSysStyle
888 //
889 //============================================================================
890
queryFSysStyle(UniString const & rFileUrl,bool bAddConvenienceStyles)891 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl,
892 bool bAddConvenienceStyles)
893 throw (uno::RuntimeException)
894 {
895 ::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get();
896 uno::Reference< ucb::XContentProviderManager > xManager;
897 if (pBroker)
898 xManager = pBroker->getContentProviderManagerInterface();
899 uno::Reference< beans::XPropertySet > xProperties;
900 if (xManager.is())
901 xProperties
902 = uno::Reference< beans::XPropertySet >(
903 xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY);
904 sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION;
905 if (xProperties.is())
906 try
907 {
908 xProperties->getPropertyValue(rtl::OUString(
909 RTL_CONSTASCII_USTRINGPARAM(
910 "FileSystemNotation")))
911 >>= nNotation;
912 }
913 catch (beans::UnknownPropertyException const &) {}
914 catch (lang::WrappedTargetException const &) {}
915
916 // The following code depends on the fact that the
917 // com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to
918 // MAC, without any holes. The table below has two entries per notation,
919 // the first is used if bAddConvenienceStyles == false, while the second
920 // is used if bAddConvenienceStyles == true:
921 static INetURLObject::FSysStyle const aMap[][2]
922 = { { INetURLObject::FSysStyle(0),
923 INetURLObject::FSYS_DETECT },
924 // UNKNOWN
925 { INetURLObject::FSYS_UNX,
926 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
927 | INetURLObject::FSYS_UNX) },
928 // UNIX
929 { INetURLObject::FSYS_DOS,
930 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
931 | INetURLObject::FSYS_UNX
932 | INetURLObject::FSYS_DOS) },
933 // DOS
934 { INetURLObject::FSYS_MAC,
935 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
936 | INetURLObject::FSYS_UNX
937 | INetURLObject::FSYS_MAC) } };
938 return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION
939 || nNotation > ucb::FileSystemNotation::MAC_NOTATION ?
940 0 :
941 nNotation
942 - ucb::FileSystemNotation::UNKNOWN_NOTATION]
943 [bAddConvenienceStyles];
944 }
945