1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_svl.hxx" 30 #include <svl/urihelper.hxx> 31 #include <com/sun/star/beans/XPropertySet.hpp> 32 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp" 33 #include "com/sun/star/lang/XMultiComponentFactory.hpp" 34 #include "com/sun/star/ucb/Command.hpp" 35 #include <com/sun/star/ucb/FileSystemNotation.hpp> 36 #include "com/sun/star/ucb/IllegalIdentifierException.hpp" 37 #include "com/sun/star/ucb/UnsupportedCommandException.hpp" 38 #include "com/sun/star/ucb/XCommandEnvironment.hpp" 39 #include "com/sun/star/ucb/XCommandProcessor.hpp" 40 #include "com/sun/star/ucb/XContent.hpp" 41 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp" 42 #include "com/sun/star/ucb/XContentProvider.hpp" 43 #include <com/sun/star/ucb/XContentProviderManager.hpp> 44 #include "com/sun/star/uno/Any.hxx" 45 #include "com/sun/star/uno/Exception.hpp" 46 #include "com/sun/star/uno/Reference.hxx" 47 #include "com/sun/star/uno/RuntimeException.hpp" 48 #include "com/sun/star/uno/Sequence.hxx" 49 #include "com/sun/star/uno/XComponentContext.hpp" 50 #include "com/sun/star/uno/XInterface.hpp" 51 #include "com/sun/star/uri/UriReferenceFactory.hpp" 52 #include "com/sun/star/uri/XUriReference.hpp" 53 #include "com/sun/star/uri/XUriReferenceFactory.hpp" 54 #include "cppuhelper/exc_hlp.hxx" 55 #include "comphelper/processfactory.hxx" 56 #include "osl/diagnose.h" 57 #include "rtl/ustrbuf.hxx" 58 #include "rtl/ustring.h" 59 #include "rtl/ustring.hxx" 60 #include "sal/types.h" 61 #include <tools/debug.hxx> 62 #include <tools/inetmime.hxx> 63 #include <ucbhelper/contentbroker.hxx> 64 #include <unotools/charclass.hxx> 65 #include "rtl/instance.hxx" 66 67 namespace unnamed_svl_urihelper {} 68 using namespace unnamed_svl_urihelper; 69 // unnamed namespaces don't work well yet... 70 71 namespace css = com::sun::star; 72 using namespace com::sun::star; 73 74 //============================================================================ 75 // 76 // SmartRel2Abs 77 // 78 //============================================================================ 79 80 namespace unnamed_svl_urihelper { 81 82 inline UniString toUniString(ByteString const & rString) 83 { 84 return UniString(rString, RTL_TEXTENCODING_ISO_8859_1); 85 } 86 87 inline UniString toUniString(UniString const & rString) 88 { 89 return rString; 90 } 91 92 template< typename Str > 93 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef, 94 Str const & rTheRelURIRef, 95 Link const & rMaybeFileHdl, 96 bool bCheckFileExists, 97 bool bIgnoreFragment, 98 INetURLObject::EncodeMechanism 99 eEncodeMechanism, 100 INetURLObject::DecodeMechanism 101 eDecodeMechanism, 102 rtl_TextEncoding eCharset, 103 bool bRelativeNonURIs, 104 INetURLObject::FSysStyle eStyle) 105 { 106 // Backwards compatibility: 107 if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#') 108 return toUniString(rTheRelURIRef); 109 110 INetURLObject aAbsURIRef; 111 if (rTheBaseURIRef.HasError()) 112 aAbsURIRef. 113 SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle); 114 else 115 { 116 bool bWasAbsolute; 117 aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef, 118 bWasAbsolute, 119 bIgnoreFragment, 120 eEncodeMechanism, 121 eCharset, 122 bRelativeNonURIs, 123 eStyle); 124 if (bCheckFileExists 125 && !bWasAbsolute 126 && (aAbsURIRef.GetProtocol() == INET_PROT_FILE)) 127 { 128 INetURLObject aNonFileURIRef; 129 aNonFileURIRef.SetSmartURL(rTheRelURIRef, 130 eEncodeMechanism, 131 eCharset, 132 eStyle); 133 if (!aNonFileURIRef.HasError() 134 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE) 135 { 136 bool bMaybeFile = false; 137 if (rMaybeFileHdl.IsSet()) 138 { 139 UniString aFilePath(toUniString(rTheRelURIRef)); 140 bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0; 141 } 142 if (!bMaybeFile) 143 aAbsURIRef = aNonFileURIRef; 144 } 145 } 146 } 147 return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset); 148 } 149 150 } 151 152 UniString 153 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef, 154 ByteString const & rTheRelURIRef, 155 Link const & rMaybeFileHdl, 156 bool bCheckFileExists, 157 bool bIgnoreFragment, 158 INetURLObject::EncodeMechanism eEncodeMechanism, 159 INetURLObject::DecodeMechanism eDecodeMechanism, 160 rtl_TextEncoding eCharset, 161 bool bRelativeNonURIs, 162 INetURLObject::FSysStyle eStyle) 163 { 164 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl, 165 bCheckFileExists, bIgnoreFragment, 166 eEncodeMechanism, eDecodeMechanism, eCharset, 167 bRelativeNonURIs, eStyle); 168 } 169 170 UniString 171 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef, 172 UniString const & rTheRelURIRef, 173 Link const & rMaybeFileHdl, 174 bool bCheckFileExists, 175 bool bIgnoreFragment, 176 INetURLObject::EncodeMechanism eEncodeMechanism, 177 INetURLObject::DecodeMechanism eDecodeMechanism, 178 rtl_TextEncoding eCharset, 179 bool bRelativeNonURIs, 180 INetURLObject::FSysStyle eStyle) 181 { 182 return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl, 183 bCheckFileExists, bIgnoreFragment, 184 eEncodeMechanism, eDecodeMechanism, eCharset, 185 bRelativeNonURIs, eStyle); 186 } 187 188 //============================================================================ 189 // 190 // SetMaybeFileHdl 191 // 192 //============================================================================ 193 194 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; } 195 196 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl) 197 { 198 MaybeFileHdl::get() = rTheMaybeFileHdl; 199 } 200 201 //============================================================================ 202 // 203 // GetMaybeFileHdl 204 // 205 //============================================================================ 206 207 Link URIHelper::GetMaybeFileHdl() 208 { 209 return MaybeFileHdl::get(); 210 } 211 212 namespace { 213 214 bool isAbsoluteHierarchicalUriReference( 215 css::uno::Reference< css::uri::XUriReference > const & uriReference) 216 { 217 return uriReference.is() && uriReference->isAbsolute() 218 && uriReference->isHierarchical() && !uriReference->hasRelativePath(); 219 } 220 221 // To improve performance, assume that if for any prefix URL of a given 222 // hierarchical URL either a UCB content cannot be created, or the UCB content 223 // does not support the getCasePreservingURL command, then this will hold for 224 // any other prefix URL of the given URL, too: 225 enum Result { Success, GeneralFailure, SpecificFailure }; 226 227 Result normalizePrefix( 228 css::uno::Reference< css::ucb::XContentProvider > const & broker, 229 rtl::OUString const & uri, rtl::OUString * normalized) 230 { 231 OSL_ASSERT(broker.is() && normalized != 0); 232 css::uno::Reference< css::ucb::XContent > content; 233 try { 234 content = broker->queryContent( 235 css::uno::Reference< css::ucb::XContentIdentifierFactory >( 236 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier( 237 uri)); 238 } catch (css::ucb::IllegalIdentifierException &) {} 239 if (!content.is()) { 240 return GeneralFailure; 241 } 242 try { 243 #if OSL_DEBUG_LEVEL > 0 244 bool ok = 245 #endif 246 (css::uno::Reference< css::ucb::XCommandProcessor >( 247 content, css::uno::UNO_QUERY_THROW)->execute( 248 css::ucb::Command( 249 rtl::OUString( 250 RTL_CONSTASCII_USTRINGPARAM( 251 "getCasePreservingURL")), 252 -1, css::uno::Any()), 253 0, 254 css::uno::Reference< css::ucb::XCommandEnvironment >()) 255 >>= *normalized); 256 OSL_ASSERT(ok); 257 } catch (css::uno::RuntimeException &) { 258 throw; 259 } catch (css::ucb::UnsupportedCommandException &) { 260 return GeneralFailure; 261 } catch (css::uno::Exception &) { 262 return SpecificFailure; 263 } 264 return Success; 265 } 266 267 rtl::OUString normalize( 268 css::uno::Reference< css::ucb::XContentProvider > const & broker, 269 css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory, 270 rtl::OUString const & uriReference) 271 { 272 // normalizePrefix can potentially fail (a typically example being a file 273 // URL that denotes a non-existing resource); in such a case, try to 274 // normalize as long a prefix of the given URL as possible (i.e., normalize 275 // all the existing directories within the path): 276 rtl::OUString normalized; 277 sal_Int32 n = uriReference.indexOf('#'); 278 normalized = n == -1 ? uriReference : uriReference.copy(0, n); 279 switch (normalizePrefix(broker, normalized, &normalized)) { 280 case Success: 281 return n == -1 ? normalized : normalized + uriReference.copy(n); 282 case GeneralFailure: 283 return uriReference; 284 case SpecificFailure: 285 default: 286 break; 287 } 288 css::uno::Reference< css::uri::XUriReference > ref( 289 uriFactory->parse(uriReference)); 290 if (!isAbsoluteHierarchicalUriReference(ref)) { 291 return uriReference; 292 } 293 sal_Int32 count = ref->getPathSegmentCount(); 294 if (count < 2) { 295 return uriReference; 296 } 297 rtl::OUStringBuffer head(ref->getScheme()); 298 head.append(static_cast< sal_Unicode >(':')); 299 if (ref->hasAuthority()) { 300 head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//")); 301 head.append(ref->getAuthority()); 302 } 303 for (sal_Int32 i = count - 1; i > 0; --i) { 304 rtl::OUStringBuffer buf(head); 305 for (sal_Int32 j = 0; j < i; ++j) { 306 buf.append(static_cast< sal_Unicode >('/')); 307 buf.append(ref->getPathSegment(j)); 308 } 309 normalized = buf.makeStringAndClear(); 310 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure) 311 { 312 buf.append(normalized); 313 css::uno::Reference< css::uri::XUriReference > preRef( 314 uriFactory->parse(normalized)); 315 if (!isAbsoluteHierarchicalUriReference(preRef)) { 316 // This could only happen if something is inconsistent: 317 break; 318 } 319 sal_Int32 preCount = preRef->getPathSegmentCount(); 320 // normalizePrefix may have added or removed a final slash: 321 if (preCount != i) { 322 if (preCount == i - 1) { 323 buf.append(static_cast< sal_Unicode >('/')); 324 } else if (preCount - 1 == i && buf.getLength() > 0 325 && buf.charAt(buf.getLength() - 1) == '/') 326 { 327 buf.setLength(buf.getLength() - 1); 328 } else { 329 // This could only happen if something is inconsistent: 330 break; 331 } 332 } 333 for (sal_Int32 j = i; j < count; ++j) { 334 buf.append(static_cast< sal_Unicode >('/')); 335 buf.append(ref->getPathSegment(j)); 336 } 337 if (ref->hasQuery()) { 338 buf.append(static_cast< sal_Unicode >('?')); 339 buf.append(ref->getQuery()); 340 } 341 if (ref->hasFragment()) { 342 buf.append(static_cast< sal_Unicode >('#')); 343 buf.append(ref->getFragment()); 344 } 345 return buf.makeStringAndClear(); 346 } 347 } 348 return uriReference; 349 } 350 351 } 352 353 css::uno::Reference< css::uri::XUriReference > 354 URIHelper::normalizedMakeRelative( 355 css::uno::Reference< css::uno::XComponentContext > const & context, 356 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference) 357 { 358 OSL_ASSERT(context.is()); 359 css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory( 360 context->getServiceManager()); 361 if (!componentFactory.is()) { 362 throw css::uno::RuntimeException( 363 rtl::OUString( 364 RTL_CONSTASCII_USTRINGPARAM( 365 "component context has no service manager")), 366 css::uno::Reference< css::uno::XInterface >()); 367 } 368 css::uno::Sequence< css::uno::Any > args(2); 369 args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local")); 370 args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office")); 371 css::uno::Reference< css::ucb::XContentProvider > broker; 372 try { 373 broker = css::uno::Reference< css::ucb::XContentProvider >( 374 componentFactory->createInstanceWithArgumentsAndContext( 375 rtl::OUString( 376 RTL_CONSTASCII_USTRINGPARAM( 377 "com.sun.star.ucb.UniversalContentBroker")), 378 args, context), 379 css::uno::UNO_QUERY_THROW); 380 } catch (css::uno::RuntimeException &) { 381 throw; 382 } catch (css::uno::Exception &) { 383 css::uno::Any exception(cppu::getCaughtException()); 384 throw css::lang::WrappedTargetRuntimeException( 385 rtl::OUString( 386 RTL_CONSTASCII_USTRINGPARAM( 387 "creating com.sun.star.ucb.UniversalContentBroker failed")), 388 css::uno::Reference< css::uno::XInterface >(), 389 exception); 390 } 391 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory( 392 css::uri::UriReferenceFactory::create(context)); 393 return uriFactory->makeRelative( 394 uriFactory->parse(normalize(broker, uriFactory, baseUriReference)), 395 uriFactory->parse(normalize(broker, uriFactory, uriReference)), true, 396 true, false); 397 } 398 399 rtl::OUString URIHelper::simpleNormalizedMakeRelative( 400 rtl::OUString const & baseUriReference, rtl::OUString const & uriReference) 401 { 402 com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel( 403 URIHelper::normalizedMakeRelative( 404 com::sun::star::uno::Reference< 405 com::sun::star::uno::XComponentContext >( 406 (com::sun::star::uno::Reference< 407 com::sun::star::beans::XPropertySet >( 408 comphelper::getProcessServiceFactory(), 409 com::sun::star::uno::UNO_QUERY_THROW)-> 410 getPropertyValue( 411 rtl::OUString( 412 RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))), 413 com::sun::star::uno::UNO_QUERY_THROW), 414 baseUriReference, uriReference)); 415 return rel.is() ? rel->getUriReference() : uriReference; 416 } 417 418 //============================================================================ 419 // 420 // FindFirstURLInText 421 // 422 //============================================================================ 423 424 namespace unnamed_svl_urihelper { 425 426 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos) 427 { 428 return INetMIME::isHighSurrogate(rStr.GetChar(nPos)) 429 && rStr.Len() - nPos >= 2 430 && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ? 431 nPos + 2 : nPos + 1; 432 } 433 434 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr, 435 xub_StrLen nPos, xub_StrLen nEnd) 436 { 437 if (nPos == nEnd) 438 return true; 439 if (rCharClass.isLetterNumeric(rStr, nPos)) 440 return false; 441 switch (rStr.GetChar(nPos)) 442 { 443 case '$': 444 case '%': 445 case '&': 446 case '-': 447 case '/': 448 case '@': 449 case '\\': 450 return false; 451 default: 452 return true; 453 } 454 } 455 456 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr, 457 xub_StrLen nPos, xub_StrLen nEnd) 458 { 459 if (nPos == nEnd) 460 return true; 461 if (rCharClass.isLetterNumeric(rStr, nPos)) 462 return false; 463 switch (rStr.GetChar(nPos)) 464 { 465 case '!': 466 case '#': 467 case '$': 468 case '%': 469 case '&': 470 case '\'': 471 case '*': 472 case '+': 473 case '-': 474 case '/': 475 case '=': 476 case '?': 477 case '@': 478 case '^': 479 case '_': 480 case '`': 481 case '{': 482 case '|': 483 case '}': 484 case '~': 485 return false; 486 default: 487 return true; 488 } 489 } 490 491 bool checkWChar(CharClass const & rCharClass, UniString const & rStr, 492 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false, 493 bool bPipe = false) 494 { 495 sal_Unicode c = rStr.GetChar(*pPos); 496 if (INetMIME::isUSASCII(c)) 497 { 498 static sal_uInt8 const aMap[128] 499 = { 0, 0, 0, 0, 0, 0, 0, 0, 500 0, 0, 0, 0, 0, 0, 0, 0, 501 0, 0, 0, 0, 0, 0, 0, 0, 502 0, 0, 0, 0, 0, 0, 0, 0, 503 0, 1, 0, 0, 4, 4, 4, 1, // !"#$%&' 504 1, 1, 1, 1, 1, 4, 1, 4, // ()*+,-./ 505 4, 4, 4, 4, 4, 4, 4, 4, // 01234567 506 4, 4, 1, 1, 0, 1, 0, 1, // 89:;<=>? 507 4, 4, 4, 4, 4, 4, 4, 4, // @ABCDEFG 508 4, 4, 4, 4, 4, 4, 4, 4, // HIJKLMNO 509 4, 4, 4, 4, 4, 4, 4, 4, // PQRSTUVW 510 4, 4, 4, 1, 2, 1, 0, 1, // XYZ[\]^_ 511 0, 4, 4, 4, 4, 4, 4, 4, // `abcdefg 512 4, 4, 4, 4, 4, 4, 4, 4, // hijklmno 513 4, 4, 4, 4, 4, 4, 4, 4, // pqrstuvw 514 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~ 515 switch (aMap[c]) 516 { 517 default: // not uric 518 return false; 519 520 case 1: // uric 521 ++(*pPos); 522 return true; 523 524 case 2: // "\" 525 if (bBackslash) 526 { 527 *pEnd = ++(*pPos); 528 return true; 529 } 530 else 531 return false; 532 533 case 3: // "|" 534 if (bPipe) 535 { 536 *pEnd = ++(*pPos); 537 return true; 538 } 539 else 540 return false; 541 542 case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see 543 // isBoundary1) 544 *pEnd = ++(*pPos); 545 return true; 546 } 547 } 548 else if (rCharClass.isLetterNumeric(rStr, *pPos)) 549 { 550 *pEnd = *pPos = nextChar(rStr, *pPos); 551 return true; 552 } 553 else 554 return false; 555 } 556 557 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos, 558 xub_StrLen nEnd) 559 { 560 sal_Unicode const * pBuffer = rStr.GetBuffer(); 561 sal_Unicode const * p = pBuffer + *pPos; 562 sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false); 563 *pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer); 564 return nLabels; 565 } 566 567 } 568 569 UniString 570 URIHelper::FindFirstURLInText(UniString const & rText, 571 xub_StrLen & rBegin, 572 xub_StrLen & rEnd, 573 CharClass const & rCharClass, 574 INetURLObject::EncodeMechanism eMechanism, 575 rtl_TextEncoding eCharset, 576 INetURLObject::FSysStyle eStyle) 577 { 578 if (!(rBegin <= rEnd && rEnd <= rText.Len())) 579 return UniString(); 580 581 // Search for the first substring of [rBegin..rEnd[ that matches any of the 582 // following productions (for which the appropriate style bit is set in 583 // eStyle, if applicable). 584 // 585 // 1st Production (known scheme): 586 // \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar] 587 // \B1 588 // 589 // 2nd Production (file): 590 // \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1 591 // 592 // 3rd Production (ftp): 593 // \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1 594 // 595 // 4th Production (http): 596 // \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1 597 // 598 // 5th Production (mailto): 599 // \B2 local-part "@" domain \B1 600 // 601 // 6th Production (UNC file): 602 // \B1 "\\" domain "\" *(wchar / "\") \B1 603 // 604 // 7th Production (DOS file): 605 // \B1 ALPHA ":\" *(wchar / "\") \B1 606 // 607 // 8th Production (Unix-like DOS file): 608 // \B1 ALPHA ":/" *(wchar / "\") \B1 609 // 610 // The productions use the following auxiliary rules. 611 // 612 // local-part = atom *("." atom) 613 // atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" 614 // / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" 615 // / "~") 616 // domain = label *("." label) 617 // label = alphanum [*(alphanum / "-") alphanum] 618 // alphanum = ALPHA / DIGIT 619 // wchar = <any uric character (ignoring the escaped rule), or "%", or 620 // a letter or digit (according to rCharClass)> 621 // 622 // "\B1" (boundary 1) stands for the beginning or end of the block of text, 623 // or a character that is neither (a) a letter or digit (according to 624 // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\". 625 // (FIXME: What was the rationale for this set of punctuation characters?) 626 // 627 // "\B2" (boundary 2) stands for the beginning or end of the block of text, 628 // or a character that is neither (a) a letter or digit (according to 629 // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-", 630 // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC 631 // 822 <atom> character, or "@" from \B1's set above). 632 // 633 // Productions 1--4, and 6--8 try to find a maximum-length match, but they 634 // stop at the first <wchar> character that is a "\B1" character which is 635 // only followed by "\B1" characters (taking "\" and "|" characters into 636 // account appropriately). Production 5 simply tries to find a maximum- 637 // length match. 638 // 639 // Productions 1--4 use the given eMechanism and eCharset. Productions 5--9 640 // use ENCODE_ALL. 641 // 642 // Productions 6--9 are only applicable if the FSYS_DOS bit is set in 643 // eStyle. 644 645 bool bBoundary1 = true; 646 bool bBoundary2 = true; 647 for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos)) 648 { 649 sal_Unicode c = rText.GetChar(nPos); 650 if (bBoundary1) 651 { 652 if (INetMIME::isAlpha(c)) 653 { 654 xub_StrLen i = nPos; 655 INetProtocol eScheme 656 = INetURLObject::CompareProtocolScheme(UniString(rText, i, 657 rEnd)); 658 if (eScheme == INET_PROT_FILE) // 2nd 659 { 660 while (rText.GetChar(i++) != ':') ; 661 xub_StrLen nPrefixEnd = i; 662 xub_StrLen nUriEnd = i; 663 while (i != rEnd 664 && checkWChar(rCharClass, rText, &i, &nUriEnd, true, 665 true)) ; 666 if (i != nPrefixEnd && rText.GetChar(i) == '#') 667 { 668 ++i; 669 while (i != rEnd 670 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 671 } 672 if (nUriEnd != nPrefixEnd 673 && isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 674 { 675 INetURLObject aUri(UniString(rText, nPos, 676 nUriEnd - nPos), 677 INET_PROT_FILE, eMechanism, eCharset, 678 eStyle); 679 if (!aUri.HasError()) 680 { 681 rBegin = nPos; 682 rEnd = nUriEnd; 683 return 684 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 685 } 686 } 687 } 688 else if (eScheme != INET_PROT_NOT_VALID) // 1st 689 { 690 while (rText.GetChar(i++) != ':') ; 691 xub_StrLen nPrefixEnd = i; 692 xub_StrLen nUriEnd = i; 693 while (i != rEnd 694 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 695 if (i != nPrefixEnd && rText.GetChar(i) == '#') 696 { 697 ++i; 698 while (i != rEnd 699 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 700 } 701 if (nUriEnd != nPrefixEnd 702 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd) 703 || rText.GetChar(nUriEnd) == '\\')) 704 { 705 INetURLObject aUri(UniString(rText, nPos, 706 nUriEnd - nPos), 707 INET_PROT_HTTP, eMechanism, 708 eCharset); 709 if (!aUri.HasError()) 710 { 711 rBegin = nPos; 712 rEnd = nUriEnd; 713 return 714 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 715 } 716 } 717 } 718 719 // 3rd, 4th: 720 i = nPos; 721 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 722 if (nLabels >= 3 723 && rText.GetChar(nPos + 3) == '.' 724 && (((rText.GetChar(nPos) == 'w' 725 || rText.GetChar(nPos) == 'W') 726 && (rText.GetChar(nPos + 1) == 'w' 727 || rText.GetChar(nPos + 1) == 'W') 728 && (rText.GetChar(nPos + 2) == 'w' 729 || rText.GetChar(nPos + 2) == 'W')) 730 || ((rText.GetChar(nPos) == 'f' 731 || rText.GetChar(nPos) == 'F') 732 && (rText.GetChar(nPos + 1) == 't' 733 || rText.GetChar(nPos + 1) == 'T') 734 && (rText.GetChar(nPos + 2) == 'p' 735 || rText.GetChar(nPos + 2) == 'P')))) 736 // (note that rText.GetChar(nPos + 3) is guaranteed to be 737 // valid) 738 { 739 xub_StrLen nUriEnd = i; 740 if (i != rEnd && rText.GetChar(i) == '/') 741 { 742 nUriEnd = ++i; 743 while (i != rEnd 744 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 745 } 746 if (i != rEnd && rText.GetChar(i) == '#') 747 { 748 ++i; 749 while (i != rEnd 750 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 751 } 752 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd) 753 || rText.GetChar(nUriEnd) == '\\') 754 { 755 INetURLObject aUri(UniString(rText, nPos, 756 nUriEnd - nPos), 757 INET_PROT_HTTP, eMechanism, 758 eCharset); 759 if (!aUri.HasError()) 760 { 761 rBegin = nPos; 762 rEnd = nUriEnd; 763 return 764 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 765 } 766 } 767 } 768 769 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3 770 && rText.GetChar(nPos + 1) == ':' 771 && (rText.GetChar(nPos + 2) == '/' 772 || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th 773 { 774 i = nPos + 3; 775 xub_StrLen nUriEnd = i; 776 while (i != rEnd 777 && checkWChar(rCharClass, rText, &i, &nUriEnd)) ; 778 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 779 { 780 INetURLObject aUri(UniString(rText, nPos, 781 nUriEnd - nPos), 782 INET_PROT_FILE, 783 INetURLObject::ENCODE_ALL, 784 RTL_TEXTENCODING_UTF8, 785 INetURLObject::FSYS_DOS); 786 if (!aUri.HasError()) 787 { 788 rBegin = nPos; 789 rEnd = nUriEnd; 790 return 791 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 792 } 793 } 794 } 795 } 796 else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2 797 && rText.GetChar(nPos) == '\\' 798 && rText.GetChar(nPos + 1) == '\\') // 6th 799 { 800 xub_StrLen i = nPos + 2; 801 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 802 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\') 803 { 804 xub_StrLen nUriEnd = ++i; 805 while (i != rEnd 806 && checkWChar(rCharClass, rText, &i, &nUriEnd, 807 true)) ; 808 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)) 809 { 810 INetURLObject aUri(UniString(rText, nPos, 811 nUriEnd - nPos), 812 INET_PROT_FILE, 813 INetURLObject::ENCODE_ALL, 814 RTL_TEXTENCODING_UTF8, 815 INetURLObject::FSYS_DOS); 816 if (!aUri.HasError()) 817 { 818 rBegin = nPos; 819 rEnd = nUriEnd; 820 return 821 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI); 822 } 823 } 824 } 825 } 826 } 827 if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th 828 { 829 bool bDot = false; 830 for (xub_StrLen i = nPos + 1; i != rEnd; ++i) 831 { 832 sal_Unicode c2 = rText.GetChar(i); 833 if (INetMIME::isAtomChar(c2)) 834 bDot = false; 835 else if (bDot) 836 break; 837 else if (c2 == '.') 838 bDot = true; 839 else 840 { 841 if (c2 == '@') 842 { 843 ++i; 844 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd); 845 if (nLabels >= 1 846 && isBoundary1(rCharClass, rText, i, rEnd)) 847 { 848 INetURLObject aUri(UniString(rText, nPos, i - nPos), 849 INET_PROT_MAILTO, 850 INetURLObject::ENCODE_ALL); 851 if (!aUri.HasError()) 852 { 853 rBegin = nPos; 854 rEnd = i; 855 return aUri.GetMainURL( 856 INetURLObject::DECODE_TO_IURI); 857 } 858 } 859 } 860 break; 861 } 862 } 863 } 864 bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd); 865 bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd); 866 } 867 rBegin = rEnd; 868 return UniString(); 869 } 870 871 //============================================================================ 872 // 873 // removePassword 874 // 875 //============================================================================ 876 877 UniString 878 URIHelper::removePassword(UniString const & rURI, 879 INetURLObject::EncodeMechanism eEncodeMechanism, 880 INetURLObject::DecodeMechanism eDecodeMechanism, 881 rtl_TextEncoding eCharset) 882 { 883 INetURLObject aObj(rURI, eEncodeMechanism, eCharset); 884 return aObj.HasError() ? 885 rURI : 886 String(aObj.GetURLNoPass(eDecodeMechanism, eCharset)); 887 } 888 889 //============================================================================ 890 // 891 // queryFSysStyle 892 // 893 //============================================================================ 894 895 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl, 896 bool bAddConvenienceStyles) 897 throw (uno::RuntimeException) 898 { 899 ::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get(); 900 uno::Reference< ucb::XContentProviderManager > xManager; 901 if (pBroker) 902 xManager = pBroker->getContentProviderManagerInterface(); 903 uno::Reference< beans::XPropertySet > xProperties; 904 if (xManager.is()) 905 xProperties 906 = uno::Reference< beans::XPropertySet >( 907 xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY); 908 sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION; 909 if (xProperties.is()) 910 try 911 { 912 xProperties->getPropertyValue(rtl::OUString( 913 RTL_CONSTASCII_USTRINGPARAM( 914 "FileSystemNotation"))) 915 >>= nNotation; 916 } 917 catch (beans::UnknownPropertyException const &) {} 918 catch (lang::WrappedTargetException const &) {} 919 920 // The following code depends on the fact that the 921 // com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to 922 // MAC, without any holes. The table below has two entries per notation, 923 // the first is used if bAddConvenienceStyles == false, while the second 924 // is used if bAddConvenienceStyles == true: 925 static INetURLObject::FSysStyle const aMap[][2] 926 = { { INetURLObject::FSysStyle(0), 927 INetURLObject::FSYS_DETECT }, 928 // UNKNOWN 929 { INetURLObject::FSYS_UNX, 930 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 931 | INetURLObject::FSYS_UNX) }, 932 // UNIX 933 { INetURLObject::FSYS_DOS, 934 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 935 | INetURLObject::FSYS_UNX 936 | INetURLObject::FSYS_DOS) }, 937 // DOS 938 { INetURLObject::FSYS_MAC, 939 INetURLObject::FSysStyle(INetURLObject::FSYS_VOS 940 | INetURLObject::FSYS_UNX 941 | INetURLObject::FSYS_MAC) } }; 942 return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION 943 || nNotation > ucb::FileSystemNotation::MAC_NOTATION ? 944 0 : 945 nNotation 946 - ucb::FileSystemNotation::UNKNOWN_NOTATION] 947 [bAddConvenienceStyles]; 948 } 949