1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 // MARKER(update_precomp.py): autogen include statement, do not remove 29 #include "precompiled_tools.hxx" 30 31 #include <cstddef> 32 #include <limits> 33 34 #include "rtl/tencinfo.h" 35 #include <tools/datetime.hxx> 36 #include <tools/inetmime.hxx> 37 38 namespace unnamed_tools_inetmime {} using namespace unnamed_tools_inetmime; 39 // unnamed namespaces don't work well yet 40 41 //============================================================================ 42 namespace unnamed_tools_inetmime { 43 44 class Charset 45 { 46 rtl_TextEncoding m_eEncoding; 47 const sal_uInt32 * m_pRanges; 48 49 public: 50 inline Charset(rtl_TextEncoding eTheEncoding, 51 const sal_uInt32 * pTheRanges); 52 53 rtl_TextEncoding getEncoding() const { return m_eEncoding; } 54 55 bool contains(sal_uInt32 nChar) const; 56 }; 57 58 inline Charset::Charset(rtl_TextEncoding eTheEncoding, 59 const sal_uInt32 * pTheRanges): 60 m_eEncoding(eTheEncoding), 61 m_pRanges(pTheRanges) 62 { 63 DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges"); 64 } 65 66 //============================================================================ 67 void appendISO88591(UniString & rText, sal_Char const * pBegin, 68 sal_Char const * pEnd); 69 70 } 71 72 //============================================================================ 73 class INetMIMECharsetList_Impl 74 { 75 struct Node 76 { 77 Charset m_aCharset; 78 bool m_bDisabled; 79 Node * m_pNext; 80 81 inline Node(const Charset & rTheCharset, bool bTheDisabled, 82 Node * pTheNext); 83 }; 84 85 Node * m_pFirst; 86 87 public: 88 INetMIMECharsetList_Impl(): m_pFirst(0) {} 89 90 ~INetMIMECharsetList_Impl(); 91 92 void prepend(const Charset & rCharset) 93 { m_pFirst = new Node(rCharset, false, m_pFirst); } 94 95 void includes(sal_uInt32 nChar); 96 97 rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault 98 = RTL_TEXTENCODING_DONTKNOW) 99 const; 100 101 void reset(); 102 }; 103 104 inline INetMIMECharsetList_Impl::Node::Node(const Charset & rTheCharset, 105 bool bTheDisabled, 106 Node * pTheNext): 107 m_aCharset(rTheCharset), 108 m_bDisabled(bTheDisabled), 109 m_pNext(pTheNext) 110 {} 111 112 //============================================================================ 113 namespace unnamed_tools_inetmime { 114 115 struct Parameter 116 { 117 Parameter * m_pNext; 118 ByteString m_aAttribute; 119 ByteString m_aCharset; 120 ByteString m_aLanguage; 121 ByteString m_aValue; 122 sal_uInt32 m_nSection; 123 bool m_bExtended; 124 125 inline Parameter(Parameter * pTheNext, ByteString const & rTheAttribute, 126 ByteString const & rTheCharset, 127 ByteString const & rTheLanguage, 128 ByteString const & rTheValue, sal_uInt32 nTheSection, 129 bool bTheExtended); 130 }; 131 132 inline Parameter::Parameter(Parameter * pTheNext, 133 ByteString const & rTheAttribute, 134 ByteString const & rTheCharset, 135 ByteString const & rTheLanguage, 136 ByteString const & rTheValue, 137 sal_uInt32 nTheSection, bool bTheExtended): 138 m_pNext(pTheNext), 139 m_aAttribute(rTheAttribute), 140 m_aCharset(rTheCharset), 141 m_aLanguage(rTheLanguage), 142 m_aValue(rTheValue), 143 m_nSection(nTheSection), 144 m_bExtended(bTheExtended) 145 {} 146 147 //============================================================================ 148 struct ParameterList 149 { 150 Parameter * m_pList; 151 152 ParameterList(): m_pList(0) {} 153 154 inline ~ParameterList(); 155 156 Parameter ** find(ByteString const & rAttribute, sal_uInt32 nSection, 157 bool & rPresent); 158 }; 159 160 inline ParameterList::~ParameterList() 161 { 162 while (m_pList) 163 { 164 Parameter * pNext = m_pList->m_pNext; 165 delete m_pList; 166 m_pList = pNext; 167 } 168 } 169 170 //============================================================================ 171 bool parseParameters(ParameterList const & rInput, 172 INetContentTypeParameterList * pOutput); 173 174 } 175 176 //============================================================================ 177 // 178 // Charset 179 // 180 //============================================================================ 181 182 bool Charset::contains(sal_uInt32 nChar) const 183 { 184 for (const sal_uInt32 * p = m_pRanges;;) 185 { 186 if (nChar < *p++) 187 return false; 188 if (nChar <= *p++) 189 return true; 190 } 191 } 192 193 //============================================================================ 194 // 195 // appendISO88591 196 // 197 //============================================================================ 198 199 namespace unnamed_tools_inetmime { 200 201 void appendISO88591(UniString & rText, sal_Char const * pBegin, 202 sal_Char const * pEnd) 203 { 204 xub_StrLen nLength = static_cast< xub_StrLen >(pEnd - pBegin); 205 sal_Unicode * pBuffer = new sal_Unicode[nLength]; 206 for (sal_Unicode * p = pBuffer; pBegin != pEnd;) 207 *p++ = sal_uChar(*pBegin++); 208 rText.Append(pBuffer, nLength); 209 delete[] pBuffer; 210 } 211 212 } 213 214 //============================================================================ 215 // 216 // INetMIMECharsetList_Impl 217 // 218 //============================================================================ 219 220 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl() 221 { 222 while (m_pFirst) 223 { 224 Node * pRemove = m_pFirst; 225 m_pFirst = m_pFirst->m_pNext; 226 delete pRemove; 227 } 228 } 229 230 //============================================================================ 231 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar) 232 { 233 for (Node * p = m_pFirst; p; p = p->m_pNext) 234 if (!(p->m_bDisabled || p->m_aCharset.contains(nChar))) 235 p->m_bDisabled = true; 236 } 237 238 //============================================================================ 239 rtl_TextEncoding 240 INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault) 241 const 242 { 243 for (Node * p = m_pFirst; p; p = p->m_pNext) 244 if (!p->m_bDisabled) 245 return p->m_aCharset.getEncoding(); 246 return eDefault; 247 } 248 249 //============================================================================ 250 void INetMIMECharsetList_Impl::reset() 251 { 252 for (Node * p = m_pFirst; p; p = p->m_pNext) 253 p->m_bDisabled = false; 254 } 255 256 //============================================================================ 257 // 258 // ParameterList 259 // 260 //============================================================================ 261 262 Parameter ** ParameterList::find(ByteString const & rAttribute, 263 sal_uInt32 nSection, bool & rPresent) 264 { 265 Parameter ** p = &m_pList; 266 for (; *p; p = &(*p)->m_pNext) 267 { 268 StringCompare eCompare = rAttribute.CompareTo((*p)->m_aAttribute); 269 if (eCompare == COMPARE_GREATER) 270 break; 271 else if (eCompare == COMPARE_EQUAL) 272 { 273 if (nSection > (*p)->m_nSection) 274 break; 275 else if (nSection == (*p)->m_nSection) 276 { 277 rPresent = true; 278 return p; 279 } 280 } 281 } 282 rPresent = false; 283 return p; 284 } 285 286 //============================================================================ 287 // 288 // parseParameters 289 // 290 //============================================================================ 291 292 namespace unnamed_tools_inetmime { 293 294 bool parseParameters(ParameterList const & rInput, 295 INetContentTypeParameterList * pOutput) 296 { 297 if (pOutput) 298 pOutput->Clear(); 299 300 Parameter * pPrev = 0; 301 for (Parameter * p = rInput.m_pList; p; p = p->m_pNext) 302 { 303 if (p->m_nSection > 0 304 && (!pPrev 305 || pPrev->m_nSection != p->m_nSection - 1 306 || pPrev->m_aAttribute != p->m_aAttribute)) 307 return false; 308 pPrev = p; 309 } 310 311 if (pOutput) 312 for (Parameter * p = rInput.m_pList; p;) 313 { 314 bool bCharset = p->m_aCharset.Len() != 0; 315 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; 316 if (bCharset) 317 eEncoding 318 = INetMIME::getCharsetEncoding(p->m_aCharset.GetBuffer(), 319 p->m_aCharset.GetBuffer() 320 + rInput.m_pList-> 321 m_aCharset. 322 Len()); 323 UniString aValue; 324 bool bBadEncoding = false; 325 Parameter * pNext = p; 326 do 327 { 328 sal_Size nSize; 329 sal_Unicode * pUnicode 330 = INetMIME::convertToUnicode(pNext->m_aValue.GetBuffer(), 331 pNext->m_aValue.GetBuffer() 332 + pNext->m_aValue.Len(), 333 bCharset && p->m_bExtended ? 334 eEncoding : 335 RTL_TEXTENCODING_UTF8, 336 nSize); 337 if (!pUnicode && !(bCharset && p->m_bExtended)) 338 pUnicode = INetMIME::convertToUnicode( 339 pNext->m_aValue.GetBuffer(), 340 pNext->m_aValue.GetBuffer() 341 + pNext->m_aValue.Len(), 342 RTL_TEXTENCODING_ISO_8859_1, nSize); 343 if (!pUnicode) 344 { 345 bBadEncoding = true; 346 break; 347 } 348 aValue += UniString(pUnicode, static_cast< xub_StrLen >(nSize)); 349 delete[] pUnicode; 350 pNext = pNext->m_pNext; 351 } 352 while (pNext && pNext->m_nSection > 0); 353 if (bBadEncoding) 354 { 355 aValue.Erase(); 356 for (pNext = p;;) 357 { 358 if (pNext->m_bExtended) 359 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i) 360 aValue += sal_Unicode( 361 sal_Unicode( 362 sal_uChar(pNext->m_aValue.GetChar(i))) 363 | 0xF800); 364 else 365 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i) 366 aValue 367 += sal_Unicode(sal_uChar 368 (pNext-> 369 m_aValue.GetChar(i))); 370 pNext = pNext->m_pNext; 371 if (!pNext || pNext->m_nSection == 0) 372 break; 373 }; 374 } 375 pOutput->Insert(new INetContentTypeParameter(p->m_aAttribute, 376 p->m_aCharset, 377 p->m_aLanguage, 378 aValue, 379 !bBadEncoding), 380 LIST_APPEND); 381 p = pNext; 382 } 383 return true; 384 } 385 386 } 387 388 //============================================================================ 389 // 390 // INetMIME 391 // 392 //============================================================================ 393 394 // static 395 bool INetMIME::isAtomChar(sal_uInt32 nChar) 396 { 397 static const bool aMap[128] 398 = { false, false, false, false, false, false, false, false, 399 false, false, false, false, false, false, false, false, 400 false, false, false, false, false, false, false, false, 401 false, false, false, false, false, false, false, false, 402 false, true, false, true, true, true, true, true, // !"#$%&' 403 false, false, true, true, false, true, false, true, //()*+,-./ 404 true, true, true, true, true, true, true, true, //01234567 405 true, true, false, false, false, true, false, true, //89:;<=>? 406 false, true, true, true, true, true, true, true, //@ABCDEFG 407 true, true, true, true, true, true, true, true, //HIJKLMNO 408 true, true, true, true, true, true, true, true, //PQRSTUVW 409 true, true, true, false, false, false, true, true, //XYZ[\]^_ 410 true, true, true, true, true, true, true, true, //`abcdefg 411 true, true, true, true, true, true, true, true, //hijklmno 412 true, true, true, true, true, true, true, true, //pqrstuvw 413 true, true, true, true, true, true, true, false //xyz{|}~ 414 }; 415 return isUSASCII(nChar) && aMap[nChar]; 416 } 417 418 //============================================================================ 419 // static 420 bool INetMIME::isTokenChar(sal_uInt32 nChar) 421 { 422 static const sal_Char aMap[128] 423 = { false, false, false, false, false, false, false, false, 424 false, false, false, false, false, false, false, false, 425 false, false, false, false, false, false, false, false, 426 false, false, false, false, false, false, false, false, 427 false, true, false, true, true, true, true, true, // !"#$%&' 428 false, false, true, true, false, true, true, false, //()*+,-./ 429 true, true, true, true, true, true, true, true, //01234567 430 true, true, false, false, false, false, false, false, //89:;<=>? 431 false, true, true, true, true, true, true, true, //@ABCDEFG 432 true, true, true, true, true, true, true, true, //HIJKLMNO 433 true, true, true, true, true, true, true, true, //PQRSTUVW 434 true, true, true, false, false, false, true, true, //XYZ[\]^_ 435 true, true, true, true, true, true, true, true, //`abcdefg 436 true, true, true, true, true, true, true, true, //hijklmno 437 true, true, true, true, true, true, true, true, //pqrstuvw 438 true, true, true, true, true, true, true, false //xyz{|}~ 439 }; 440 return isUSASCII(nChar) && aMap[nChar]; 441 } 442 443 //============================================================================ 444 // static 445 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar) 446 { 447 static const sal_Char aMap[128] 448 = { false, false, false, false, false, false, false, false, 449 false, false, false, false, false, false, false, false, 450 false, false, false, false, false, false, false, false, 451 false, false, false, false, false, false, false, false, 452 false, true, false, true, true, true, true, true, // !"#$%&' 453 false, false, true, true, false, true, false, false, //()*+,-./ 454 true, true, true, true, true, true, true, true, //01234567 455 true, true, false, false, false, false, false, false, //89:;<=>? 456 false, true, true, true, true, true, true, true, //@ABCDEFG 457 true, true, true, true, true, true, true, true, //HIJKLMNO 458 true, true, true, true, true, true, true, true, //PQRSTUVW 459 true, true, true, false, false, false, true, true, //XYZ[\]^_ 460 true, true, true, true, true, true, true, true, //`abcdefg 461 true, true, true, true, true, true, true, true, //hijklmno 462 true, true, true, true, true, true, true, true, //pqrstuvw 463 true, true, true, true, true, true, true, false //xyz{|}~ 464 }; 465 return isUSASCII(nChar) && aMap[nChar]; 466 } 467 468 //============================================================================ 469 // static 470 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar) 471 { 472 static const sal_Char aMap[128] 473 = { false, false, false, false, false, false, false, false, 474 false, false, false, false, false, false, false, false, 475 false, false, false, false, false, false, false, false, 476 false, false, false, false, false, false, false, false, 477 false, true, false, true, true, false, true, true, // !"#$%&' 478 false, false, false, true, true, true, true, true, //()*+,-./ 479 true, true, true, true, true, true, true, true, //01234567 480 true, true, true, true, true, true, true, true, //89:;<=>? 481 true, true, true, true, true, true, true, true, //@ABCDEFG 482 true, true, true, true, true, true, true, true, //HIJKLMNO 483 true, true, true, true, true, true, true, true, //PQRSTUVW 484 true, true, true, true, false, true, true, true, //XYZ[\]^_ 485 true, true, true, true, true, true, true, true, //`abcdefg 486 true, true, true, true, true, true, true, true, //hijklmno 487 true, true, true, true, true, true, true, true, //pqrstuvw 488 true, true, true, false, true, true, true, false //xyz{|}~ 489 }; 490 return isUSASCII(nChar) && aMap[nChar]; 491 } 492 493 //============================================================================ 494 // static 495 sal_uInt32 INetMIME::getDigit(int nWeight) 496 { 497 DBG_ASSERT(nWeight >= 0 && nWeight < 10, 498 "INetMIME::getDigit(): Bad weight"); 499 500 static const sal_Char aDigits[16] 501 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; 502 return aDigits[nWeight]; 503 } 504 505 //============================================================================ 506 // static 507 sal_uInt32 INetMIME::getHexDigit(int nWeight) 508 { 509 DBG_ASSERT(nWeight >= 0 && nWeight < 16, 510 "INetMIME::getHexDigit(): Bad weight"); 511 512 static const sal_Char aDigits[16] 513 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 514 'D', 'E', 'F' }; 515 return aDigits[nWeight]; 516 } 517 518 //============================================================================ 519 // static 520 sal_uInt32 INetMIME::getBase64Digit(int nWeight) 521 { 522 DBG_ASSERT(nWeight >= 0 && nWeight < 64, 523 "INetMIME::getBase64Digit(): Bad weight"); 524 525 static const sal_Char aDigits[64] 526 = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 527 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 528 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 529 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 530 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; 531 return aDigits[nWeight]; 532 } 533 534 //============================================================================ 535 // static 536 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1, 537 const sal_Char * pEnd1, 538 const sal_Char * pBegin2, 539 const sal_Char * pEnd2) 540 { 541 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pBegin2 && pBegin2 <= pEnd2, 542 "INetMIME::equalIgnoreCase(): Bad sequences"); 543 544 if (pEnd1 - pBegin1 != pEnd2 - pBegin2) 545 return false; 546 while (pBegin1 != pEnd1) 547 if (toUpperCase(*pBegin1++) != toUpperCase(*pBegin2++)) 548 return false; 549 return true; 550 } 551 552 //============================================================================ 553 // static 554 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1, 555 const sal_Char * pEnd1, 556 const sal_Char * pString2) 557 { 558 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, 559 "INetMIME::equalIgnoreCase(): Bad sequences"); 560 561 while (*pString2 != 0) 562 if (pBegin1 == pEnd1 563 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++)) 564 return false; 565 return pBegin1 == pEnd1; 566 } 567 568 //============================================================================ 569 // static 570 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1, 571 const sal_Unicode * pEnd1, 572 const sal_Char * pString2) 573 { 574 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, 575 "INetMIME::equalIgnoreCase(): Bad sequences"); 576 577 while (*pString2 != 0) 578 if (pBegin1 == pEnd1 579 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++)) 580 return false; 581 return pBegin1 == pEnd1; 582 } 583 584 //============================================================================ 585 // static 586 const sal_Char * INetMIME::skipLinearWhiteSpace(const sal_Char * pBegin, 587 const sal_Char * pEnd) 588 { 589 DBG_ASSERT(pBegin && pBegin <= pEnd, 590 "INetMIME::skipLinearWhiteSpace(): Bad sequence"); 591 592 while (pBegin != pEnd) 593 switch (*pBegin) 594 { 595 case '\t': 596 case ' ': 597 ++pBegin; 598 break; 599 600 case 0x0D: // CR 601 if (startsWithLineFolding(pBegin, pEnd)) 602 pBegin += 3; 603 else 604 return pBegin; 605 break; 606 607 default: 608 return pBegin; 609 } 610 return pBegin; 611 } 612 613 //============================================================================ 614 // static 615 const sal_Unicode * INetMIME::skipLinearWhiteSpace(const sal_Unicode * pBegin, 616 const sal_Unicode * pEnd) 617 { 618 DBG_ASSERT(pBegin && pBegin <= pEnd, 619 "INetMIME::skipLinearWhiteSpace(): Bad sequence"); 620 621 while (pBegin != pEnd) 622 switch (*pBegin) 623 { 624 case '\t': 625 case ' ': 626 ++pBegin; 627 break; 628 629 case 0x0D: // CR 630 if (startsWithLineFolding(pBegin, pEnd)) 631 pBegin += 3; 632 else 633 return pBegin; 634 break; 635 636 default: 637 return pBegin; 638 } 639 return pBegin; 640 } 641 642 //============================================================================ 643 // static 644 const sal_Char * INetMIME::skipComment(const sal_Char * pBegin, 645 const sal_Char * pEnd) 646 { 647 DBG_ASSERT(pBegin && pBegin <= pEnd, 648 "INetMIME::skipComment(): Bad sequence"); 649 650 if (pBegin != pEnd && *pBegin == '(') 651 { 652 sal_uInt32 nLevel = 0; 653 for (const sal_Char * p = pBegin; p != pEnd;) 654 switch (*p++) 655 { 656 case '(': 657 ++nLevel; 658 break; 659 660 case ')': 661 if (--nLevel == 0) 662 return p; 663 break; 664 665 case '\\': 666 if (p != pEnd) 667 ++p; 668 break; 669 } 670 } 671 return pBegin; 672 } 673 674 //============================================================================ 675 // static 676 const sal_Unicode * INetMIME::skipComment(const sal_Unicode * pBegin, 677 const sal_Unicode * pEnd) 678 { 679 DBG_ASSERT(pBegin && pBegin <= pEnd, 680 "INetMIME::skipComment(): Bad sequence"); 681 682 if (pBegin != pEnd && *pBegin == '(') 683 { 684 sal_uInt32 nLevel = 0; 685 for (const sal_Unicode * p = pBegin; p != pEnd;) 686 switch (*p++) 687 { 688 case '(': 689 ++nLevel; 690 break; 691 692 case ')': 693 if (--nLevel == 0) 694 return p; 695 break; 696 697 case '\\': 698 if (p != pEnd) 699 ++p; 700 break; 701 } 702 } 703 return pBegin; 704 } 705 706 //============================================================================ 707 // static 708 const sal_Char * INetMIME::skipLinearWhiteSpaceComment(const sal_Char * 709 pBegin, 710 const sal_Char * pEnd) 711 { 712 DBG_ASSERT(pBegin && pBegin <= pEnd, 713 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence"); 714 715 while (pBegin != pEnd) 716 switch (*pBegin) 717 { 718 case '\t': 719 case ' ': 720 ++pBegin; 721 break; 722 723 case 0x0D: // CR 724 if (startsWithLineFolding(pBegin, pEnd)) 725 pBegin += 3; 726 else 727 return pBegin; 728 break; 729 730 case '(': 731 { 732 const sal_Char * p = skipComment(pBegin, pEnd); 733 if (p == pBegin) 734 return pBegin; 735 pBegin = p; 736 break; 737 } 738 739 default: 740 return pBegin; 741 } 742 return pBegin; 743 } 744 745 //============================================================================ 746 // static 747 const sal_Unicode * INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode * 748 pBegin, 749 const sal_Unicode * 750 pEnd) 751 { 752 DBG_ASSERT(pBegin && pBegin <= pEnd, 753 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence"); 754 755 while (pBegin != pEnd) 756 switch (*pBegin) 757 { 758 case '\t': 759 case ' ': 760 ++pBegin; 761 break; 762 763 case 0x0D: // CR 764 if (startsWithLineFolding(pBegin, pEnd)) 765 pBegin += 3; 766 else 767 return pBegin; 768 break; 769 770 case '(': 771 { 772 const sal_Unicode * p = skipComment(pBegin, pEnd); 773 if (p == pBegin) 774 return pBegin; 775 pBegin = p; 776 break; 777 } 778 779 default: 780 return pBegin; 781 } 782 return pBegin; 783 } 784 785 //============================================================================ 786 // static 787 const sal_Char * INetMIME::skipQuotedString(const sal_Char * pBegin, 788 const sal_Char * pEnd) 789 { 790 DBG_ASSERT(pBegin && pBegin <= pEnd, 791 "INetMIME::skipQuotedString(): Bad sequence"); 792 793 if (pBegin != pEnd && *pBegin == '"') 794 for (const sal_Char * p = pBegin + 1; p != pEnd;) 795 switch (*p++) 796 { 797 case 0x0D: // CR 798 if (pEnd - p < 2 || *p++ != 0x0A // LF 799 || !isWhiteSpace(*p++)) 800 return pBegin; 801 break; 802 803 case '"': 804 return p; 805 806 case '\\': 807 if (p != pEnd) 808 ++p; 809 break; 810 } 811 return pBegin; 812 } 813 814 //============================================================================ 815 // static 816 const sal_Unicode * INetMIME::skipQuotedString(const sal_Unicode * pBegin, 817 const sal_Unicode * pEnd) 818 { 819 DBG_ASSERT(pBegin && pBegin <= pEnd, 820 "INetMIME::skipQuotedString(): Bad sequence"); 821 822 if (pBegin != pEnd && *pBegin == '"') 823 for (const sal_Unicode * p = pBegin + 1; p != pEnd;) 824 switch (*p++) 825 { 826 case 0x0D: // CR 827 if (pEnd - p < 2 || *p++ != 0x0A // LF 828 || !isWhiteSpace(*p++)) 829 return pBegin; 830 break; 831 832 case '"': 833 return p; 834 835 case '\\': 836 if (p != pEnd) 837 ++p; 838 break; 839 } 840 return pBegin; 841 } 842 843 //============================================================================ 844 // static 845 const sal_Char * INetMIME::scanAtom(const sal_Char * pBegin, 846 const sal_Char * pEnd) 847 { 848 while (pBegin != pEnd && isAtomChar(*pBegin)) 849 ++pBegin; 850 return pBegin; 851 } 852 853 //============================================================================ 854 // static 855 const sal_Unicode * INetMIME::scanAtom(const sal_Unicode * pBegin, 856 const sal_Unicode * pEnd) 857 { 858 while (pBegin != pEnd && isAtomChar(*pBegin)) 859 ++pBegin; 860 return pBegin; 861 } 862 863 //============================================================================ 864 // static 865 bool INetMIME::scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd, 866 bool bLeadingZeroes, sal_uInt32 & rValue) 867 { 868 sal_uInt64 nTheValue = 0; 869 const sal_Char * p = rBegin; 870 for ( ; p != pEnd; ++p) 871 { 872 int nWeight = getWeight(*p); 873 if (nWeight < 0) 874 break; 875 nTheValue = 10 * nTheValue + nWeight; 876 if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) 877 return false; 878 } 879 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) 880 return false; 881 rBegin = p; 882 rValue = sal_uInt32(nTheValue); 883 return true; 884 } 885 886 //============================================================================ 887 // static 888 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin, 889 const sal_Unicode * pEnd, bool bLeadingZeroes, 890 sal_uInt32 & rValue) 891 { 892 sal_uInt64 nTheValue = 0; 893 const sal_Unicode * p = rBegin; 894 for ( ; p != pEnd; ++p) 895 { 896 int nWeight = getWeight(*p); 897 if (nWeight < 0) 898 break; 899 nTheValue = 10 * nTheValue + nWeight; 900 if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) 901 return false; 902 } 903 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) 904 return false; 905 rBegin = p; 906 rValue = sal_uInt32(nTheValue); 907 return true; 908 } 909 910 //============================================================================ 911 // static 912 bool INetMIME::scanUnsignedHex(const sal_Char *& rBegin, 913 const sal_Char * pEnd, bool bLeadingZeroes, 914 sal_uInt32 & rValue) 915 { 916 sal_uInt64 nTheValue = 0; 917 const sal_Char * p = rBegin; 918 for ( p = rBegin; p != pEnd; ++p) 919 { 920 int nWeight = getHexWeight(*p); 921 if (nWeight < 0) 922 break; 923 nTheValue = nTheValue << 4 | nWeight; 924 if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) 925 return false; 926 } 927 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) 928 return false; 929 rBegin = p; 930 rValue = sal_uInt32(nTheValue); 931 return true; 932 } 933 934 //============================================================================ 935 // static 936 bool INetMIME::scanUnsignedHex(const sal_Unicode *& rBegin, 937 const sal_Unicode * pEnd, bool bLeadingZeroes, 938 sal_uInt32 & rValue) 939 { 940 sal_uInt64 nTheValue = 0; 941 const sal_Unicode * p = rBegin; 942 for ( ; p != pEnd; ++p) 943 { 944 int nWeight = getHexWeight(*p); 945 if (nWeight < 0) 946 break; 947 nTheValue = nTheValue << 4 | nWeight; 948 if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) 949 return false; 950 } 951 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) 952 return false; 953 rBegin = p; 954 rValue = sal_uInt32(nTheValue); 955 return true; 956 } 957 958 //============================================================================ 959 // static 960 const sal_Char * INetMIME::scanQuotedBlock(const sal_Char * pBegin, 961 const sal_Char * pEnd, 962 sal_uInt32 nOpening, 963 sal_uInt32 nClosing, 964 sal_Size & rLength, 965 bool & rModify) 966 { 967 DBG_ASSERT(pBegin && pBegin <= pEnd, 968 "INetMIME::scanQuotedBlock(): Bad sequence"); 969 970 if (pBegin != pEnd && static_cast< unsigned char >(*pBegin) == nOpening) 971 { 972 ++rLength; 973 ++pBegin; 974 while (pBegin != pEnd) 975 if (static_cast< unsigned char >(*pBegin) == nClosing) 976 { 977 ++rLength; 978 return ++pBegin; 979 } 980 else 981 { 982 sal_uInt32 c = *pBegin++; 983 switch (c) 984 { 985 case 0x0D: // CR 986 if (pBegin != pEnd && *pBegin == 0x0A) // LF 987 if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1])) 988 { 989 ++rLength; 990 rModify = true; 991 pBegin += 2; 992 } 993 else 994 { 995 rLength += 3; 996 rModify = true; 997 ++pBegin; 998 } 999 else 1000 ++rLength; 1001 break; 1002 1003 case '\\': 1004 ++rLength; 1005 if (pBegin != pEnd) 1006 { 1007 if (startsWithLineBreak(pBegin, pEnd) 1008 && (pEnd - pBegin < 3 1009 || !isWhiteSpace(pBegin[2]))) 1010 { 1011 rLength += 3; 1012 rModify = true; 1013 pBegin += 2; 1014 } 1015 else 1016 ++pBegin; 1017 } 1018 break; 1019 1020 default: 1021 ++rLength; 1022 if (!isUSASCII(c)) 1023 rModify = true; 1024 break; 1025 } 1026 } 1027 } 1028 return pBegin; 1029 } 1030 1031 //============================================================================ 1032 // static 1033 const sal_Unicode * INetMIME::scanQuotedBlock(const sal_Unicode * pBegin, 1034 const sal_Unicode * pEnd, 1035 sal_uInt32 nOpening, 1036 sal_uInt32 nClosing, 1037 sal_Size & rLength, 1038 bool & rModify) 1039 { 1040 DBG_ASSERT(pBegin && pBegin <= pEnd, 1041 "INetMIME::scanQuotedBlock(): Bad sequence"); 1042 1043 if (pBegin != pEnd && *pBegin == nOpening) 1044 { 1045 ++rLength; 1046 ++pBegin; 1047 while (pBegin != pEnd) 1048 if (*pBegin == nClosing) 1049 { 1050 ++rLength; 1051 return ++pBegin; 1052 } 1053 else 1054 { 1055 sal_uInt32 c = *pBegin++; 1056 switch (c) 1057 { 1058 case 0x0D: // CR 1059 if (pBegin != pEnd && *pBegin == 0x0A) // LF 1060 if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1])) 1061 { 1062 ++rLength; 1063 rModify = true; 1064 pBegin += 2; 1065 } 1066 else 1067 { 1068 rLength += 3; 1069 rModify = true; 1070 ++pBegin; 1071 } 1072 else 1073 ++rLength; 1074 break; 1075 1076 case '\\': 1077 ++rLength; 1078 if (pBegin != pEnd) 1079 { 1080 if (startsWithLineBreak(pBegin, pEnd) 1081 && (pEnd - pBegin < 3 1082 || !isWhiteSpace(pBegin[2]))) 1083 { 1084 rLength += 3; 1085 rModify = true; 1086 pBegin += 2; 1087 } 1088 else 1089 ++pBegin; 1090 } 1091 break; 1092 1093 default: 1094 ++rLength; 1095 if (!isUSASCII(c)) 1096 rModify = true; 1097 break; 1098 } 1099 } 1100 } 1101 return pBegin; 1102 } 1103 1104 //============================================================================ 1105 // static 1106 sal_Char const * INetMIME::scanParameters(sal_Char const * pBegin, 1107 sal_Char const * pEnd, 1108 INetContentTypeParameterList * 1109 pParameters) 1110 { 1111 ParameterList aList; 1112 sal_Char const * pParameterBegin = pBegin; 1113 for (sal_Char const * p = pParameterBegin;; pParameterBegin = p) 1114 { 1115 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd); 1116 if (pParameterBegin == pEnd || *pParameterBegin != ';') 1117 break; 1118 p = pParameterBegin + 1; 1119 1120 sal_Char const * pAttributeBegin = skipLinearWhiteSpaceComment(p, 1121 pEnd); 1122 p = pAttributeBegin; 1123 bool bDowncaseAttribute = false; 1124 while (p != pEnd && isTokenChar(*p) && *p != '*') 1125 { 1126 bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p); 1127 ++p; 1128 } 1129 if (p == pAttributeBegin) 1130 break; 1131 ByteString aAttribute( 1132 pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin)); 1133 if (bDowncaseAttribute) 1134 aAttribute.ToLowerAscii(); 1135 1136 sal_uInt32 nSection = 0; 1137 if (p != pEnd && *p == '*') 1138 { 1139 ++p; 1140 if (p != pEnd && isDigit(*p) 1141 && !scanUnsigned(p, pEnd, false, nSection)) 1142 break; 1143 } 1144 1145 bool bPresent; 1146 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent); 1147 if (bPresent) 1148 break; 1149 1150 bool bExtended = false; 1151 if (p != pEnd && *p == '*') 1152 { 1153 ++p; 1154 bExtended = true; 1155 } 1156 1157 p = skipLinearWhiteSpaceComment(p, pEnd); 1158 1159 if (p == pEnd || *p != '=') 1160 break; 1161 1162 p = skipLinearWhiteSpaceComment(p + 1, pEnd); 1163 1164 ByteString aCharset; 1165 ByteString aLanguage; 1166 ByteString aValue; 1167 if (bExtended) 1168 { 1169 if (nSection == 0) 1170 { 1171 sal_Char const * pCharsetBegin = p; 1172 bool bDowncaseCharset = false; 1173 while (p != pEnd && isTokenChar(*p) && *p != '\'') 1174 { 1175 bDowncaseCharset = bDowncaseCharset || isUpperCase(*p); 1176 ++p; 1177 } 1178 if (p == pCharsetBegin) 1179 break; 1180 if (pParameters) 1181 { 1182 aCharset = ByteString( 1183 pCharsetBegin, 1184 static_cast< xub_StrLen >(p - pCharsetBegin)); 1185 if (bDowncaseCharset) 1186 aCharset.ToLowerAscii(); 1187 } 1188 1189 if (p == pEnd || *p != '\'') 1190 break; 1191 ++p; 1192 1193 sal_Char const * pLanguageBegin = p; 1194 bool bDowncaseLanguage = false; 1195 int nLetters = 0; 1196 for (; p != pEnd; ++p) 1197 if (isAlpha(*p)) 1198 { 1199 if (++nLetters > 8) 1200 break; 1201 bDowncaseLanguage = bDowncaseLanguage 1202 || isUpperCase(*p); 1203 } 1204 else if (*p == '-') 1205 { 1206 if (nLetters == 0) 1207 break; 1208 nLetters = 0; 1209 } 1210 else 1211 break; 1212 if (nLetters == 0 || nLetters > 8) 1213 break; 1214 if (pParameters) 1215 { 1216 aLanguage = ByteString( 1217 pLanguageBegin, 1218 static_cast< xub_StrLen >(p - pLanguageBegin)); 1219 if (bDowncaseLanguage) 1220 aLanguage.ToLowerAscii(); 1221 } 1222 1223 if (p == pEnd || *p != '\'') 1224 break; 1225 ++p; 1226 } 1227 if (pParameters) 1228 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p))) 1229 { 1230 if (*p == '%') 1231 { 1232 if (p + 2 < pEnd) 1233 { 1234 int nWeight1 = getHexWeight(p[1]); 1235 int nWeight2 = getHexWeight(p[2]); 1236 if (nWeight1 >= 0 && nWeight2 >= 0) 1237 { 1238 aValue += sal_Char(nWeight1 << 4 | nWeight2); 1239 p += 3; 1240 continue; 1241 } 1242 } 1243 } 1244 aValue += *p++; 1245 } 1246 else 1247 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p))) 1248 ++p; 1249 } 1250 else if (p != pEnd && *p == '"') 1251 if (pParameters) 1252 { 1253 bool bInvalid = false; 1254 for (++p;;) 1255 { 1256 if (p == pEnd) 1257 { 1258 bInvalid = true; 1259 break; 1260 } 1261 else if (*p == '"') 1262 { 1263 ++p; 1264 break; 1265 } 1266 else if (*p == 0x0D) // CR 1267 { 1268 if (pEnd - p < 3 || p[1] != 0x0A // LF 1269 || !isWhiteSpace(p[2])) 1270 { 1271 bInvalid = true; 1272 break; 1273 } 1274 p += 2; 1275 } 1276 else if (*p == '\\' && ++p == pEnd) 1277 { 1278 bInvalid = true; 1279 break; 1280 } 1281 aValue += *p++; 1282 } 1283 if (bInvalid) 1284 break; 1285 } 1286 else 1287 { 1288 sal_Char const * pStringEnd = skipQuotedString(p, pEnd); 1289 if (p == pStringEnd) 1290 break; 1291 p = pStringEnd; 1292 } 1293 else 1294 { 1295 sal_Char const * pTokenBegin = p; 1296 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p))) 1297 ++p; 1298 if (p == pTokenBegin) 1299 break; 1300 if (pParameters) 1301 aValue = ByteString( 1302 pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin)); 1303 } 1304 1305 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue, 1306 nSection, bExtended); 1307 } 1308 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; 1309 } 1310 1311 //============================================================================ 1312 // static 1313 sal_Unicode const * INetMIME::scanParameters(sal_Unicode const * pBegin, 1314 sal_Unicode const * pEnd, 1315 INetContentTypeParameterList * 1316 pParameters) 1317 { 1318 ParameterList aList; 1319 sal_Unicode const * pParameterBegin = pBegin; 1320 for (sal_Unicode const * p = pParameterBegin;; pParameterBegin = p) 1321 { 1322 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd); 1323 if (pParameterBegin == pEnd || *pParameterBegin != ';') 1324 break; 1325 p = pParameterBegin + 1; 1326 1327 sal_Unicode const * pAttributeBegin 1328 = skipLinearWhiteSpaceComment(p, pEnd); 1329 p = pAttributeBegin; 1330 bool bDowncaseAttribute = false; 1331 while (p != pEnd && isTokenChar(*p) && *p != '*') 1332 { 1333 bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p); 1334 ++p; 1335 } 1336 if (p == pAttributeBegin) 1337 break; 1338 ByteString aAttribute = ByteString( 1339 pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin), 1340 RTL_TEXTENCODING_ASCII_US); 1341 if (bDowncaseAttribute) 1342 aAttribute.ToLowerAscii(); 1343 1344 sal_uInt32 nSection = 0; 1345 if (p != pEnd && *p == '*') 1346 { 1347 ++p; 1348 if (p != pEnd && isDigit(*p) 1349 && !scanUnsigned(p, pEnd, false, nSection)) 1350 break; 1351 } 1352 1353 bool bPresent; 1354 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent); 1355 if (bPresent) 1356 break; 1357 1358 bool bExtended = false; 1359 if (p != pEnd && *p == '*') 1360 { 1361 ++p; 1362 bExtended = true; 1363 } 1364 1365 p = skipLinearWhiteSpaceComment(p, pEnd); 1366 1367 if (p == pEnd || *p != '=') 1368 break; 1369 1370 p = skipLinearWhiteSpaceComment(p + 1, pEnd); 1371 1372 ByteString aCharset; 1373 ByteString aLanguage; 1374 ByteString aValue; 1375 if (bExtended) 1376 { 1377 if (nSection == 0) 1378 { 1379 sal_Unicode const * pCharsetBegin = p; 1380 bool bDowncaseCharset = false; 1381 while (p != pEnd && isTokenChar(*p) && *p != '\'') 1382 { 1383 bDowncaseCharset = bDowncaseCharset || isUpperCase(*p); 1384 ++p; 1385 } 1386 if (p == pCharsetBegin) 1387 break; 1388 if (pParameters) 1389 { 1390 aCharset = ByteString( 1391 pCharsetBegin, 1392 static_cast< xub_StrLen >(p - pCharsetBegin), 1393 RTL_TEXTENCODING_ASCII_US); 1394 if (bDowncaseCharset) 1395 aCharset.ToLowerAscii(); 1396 } 1397 1398 if (p == pEnd || *p != '\'') 1399 break; 1400 ++p; 1401 1402 sal_Unicode const * pLanguageBegin = p; 1403 bool bDowncaseLanguage = false; 1404 int nLetters = 0; 1405 for (; p != pEnd; ++p) 1406 if (isAlpha(*p)) 1407 { 1408 if (++nLetters > 8) 1409 break; 1410 bDowncaseLanguage = bDowncaseLanguage 1411 || isUpperCase(*p); 1412 } 1413 else if (*p == '-') 1414 { 1415 if (nLetters == 0) 1416 break; 1417 nLetters = 0; 1418 } 1419 else 1420 break; 1421 if (nLetters == 0 || nLetters > 8) 1422 break; 1423 if (pParameters) 1424 { 1425 aLanguage = ByteString( 1426 pLanguageBegin, 1427 static_cast< xub_StrLen >(p - pLanguageBegin), 1428 RTL_TEXTENCODING_ASCII_US); 1429 if (bDowncaseLanguage) 1430 aLanguage.ToLowerAscii(); 1431 } 1432 1433 if (p == pEnd || *p != '\'') 1434 break; 1435 ++p; 1436 } 1437 if (pParameters) 1438 { 1439 INetMIMEStringOutputSink 1440 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT); 1441 while (p != pEnd) 1442 { 1443 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd); 1444 if (isUSASCII(nChar) && !isTokenChar(nChar)) 1445 break; 1446 if (nChar == '%' && p + 1 < pEnd) 1447 { 1448 int nWeight1 = getHexWeight(p[0]); 1449 int nWeight2 = getHexWeight(p[1]); 1450 if (nWeight1 >= 0 && nWeight2 >= 0) 1451 { 1452 aSink << sal_Char(nWeight1 << 4 | nWeight2); 1453 p += 2; 1454 continue; 1455 } 1456 } 1457 INetMIME::writeUTF8(aSink, nChar); 1458 } 1459 aValue = aSink.takeBuffer(); 1460 } 1461 else 1462 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p))) 1463 ++p; 1464 } 1465 else if (p != pEnd && *p == '"') 1466 if (pParameters) 1467 { 1468 INetMIMEStringOutputSink 1469 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT); 1470 bool bInvalid = false; 1471 for (++p;;) 1472 { 1473 if (p == pEnd) 1474 { 1475 bInvalid = true; 1476 break; 1477 } 1478 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd); 1479 if (nChar == '"') 1480 break; 1481 else if (nChar == 0x0D) // CR 1482 { 1483 if (pEnd - p < 2 || *p++ != 0x0A // LF 1484 || !isWhiteSpace(*p)) 1485 { 1486 bInvalid = true; 1487 break; 1488 } 1489 nChar = sal_uChar(*p++); 1490 } 1491 else if (nChar == '\\') 1492 { 1493 if (p == pEnd) 1494 { 1495 bInvalid = true; 1496 break; 1497 } 1498 nChar = INetMIME::getUTF32Character(p, pEnd); 1499 } 1500 INetMIME::writeUTF8(aSink, nChar); 1501 } 1502 if (bInvalid) 1503 break; 1504 aValue = aSink.takeBuffer(); 1505 } 1506 else 1507 { 1508 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd); 1509 if (p == pStringEnd) 1510 break; 1511 p = pStringEnd; 1512 } 1513 else 1514 { 1515 sal_Unicode const * pTokenBegin = p; 1516 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p))) 1517 ++p; 1518 if (p == pTokenBegin) 1519 break; 1520 if (pParameters) 1521 aValue = ByteString( 1522 pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin), 1523 RTL_TEXTENCODING_UTF8); 1524 } 1525 1526 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue, 1527 nSection, bExtended); 1528 } 1529 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; 1530 } 1531 1532 //============================================================================ 1533 // static 1534 const sal_Char * INetMIME::getCharsetName(rtl_TextEncoding eEncoding) 1535 { 1536 if (rtl_isOctetTextEncoding(eEncoding)) 1537 { 1538 char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding); 1539 DBG_ASSERT(p, "INetMIME::getCharsetName(): Unsupported encoding"); 1540 return p; 1541 } 1542 else 1543 switch (eEncoding) 1544 { 1545 case RTL_TEXTENCODING_UCS4: 1546 return "ISO-10646-UCS-4"; 1547 1548 case RTL_TEXTENCODING_UCS2: 1549 return "ISO-10646-UCS-2"; 1550 1551 default: 1552 DBG_ERROR("INetMIME::getCharsetName(): Unsupported encoding"); 1553 return 0; 1554 } 1555 } 1556 1557 //============================================================================ 1558 namespace unnamed_tools_inetmime { 1559 1560 struct EncodingEntry 1561 { 1562 sal_Char const * m_aName; 1563 rtl_TextEncoding m_eEncoding; 1564 }; 1565 1566 //============================================================================ 1567 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/ 1568 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise 1569 // noted: 1570 EncodingEntry const aEncodingMap[] 1571 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, 1572 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US }, 1573 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US }, 1574 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US }, 1575 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US }, 1576 { "ASCII", RTL_TEXTENCODING_ASCII_US }, 1577 { "ISO646-US", RTL_TEXTENCODING_ASCII_US }, 1578 { "US", RTL_TEXTENCODING_ASCII_US }, 1579 { "IBM367", RTL_TEXTENCODING_ASCII_US }, 1580 { "CP367", RTL_TEXTENCODING_ASCII_US }, 1581 { "CSASCII", RTL_TEXTENCODING_ASCII_US }, 1582 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, 1583 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 }, 1584 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 }, 1585 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 }, 1586 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 }, 1587 { "L1", RTL_TEXTENCODING_ISO_8859_1 }, 1588 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 }, 1589 { "CP819", RTL_TEXTENCODING_ISO_8859_1 }, 1590 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 }, 1591 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, 1592 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 }, 1593 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 }, 1594 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 }, 1595 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 }, 1596 { "L2", RTL_TEXTENCODING_ISO_8859_2 }, 1597 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 }, 1598 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, 1599 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 }, 1600 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 }, 1601 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 }, 1602 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 }, 1603 { "L3", RTL_TEXTENCODING_ISO_8859_3 }, 1604 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 }, 1605 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, 1606 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 }, 1607 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 }, 1608 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 }, 1609 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 }, 1610 { "L4", RTL_TEXTENCODING_ISO_8859_4 }, 1611 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 }, 1612 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, 1613 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 }, 1614 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 }, 1615 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 }, 1616 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, 1617 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, 1618 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, 1619 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 }, 1620 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 }, 1621 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 }, 1622 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 }, 1623 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 }, 1624 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 }, 1625 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 }, 1626 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, 1627 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 }, 1628 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 }, 1629 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 }, 1630 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 }, 1631 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 }, 1632 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 }, 1633 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 }, 1634 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 }, 1635 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, 1636 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 }, 1637 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 }, 1638 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 }, 1639 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 }, 1640 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 }, 1641 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, 1642 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 }, 1643 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 }, 1644 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 }, 1645 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 }, 1646 { "L5", RTL_TEXTENCODING_ISO_8859_9 }, 1647 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 }, 1648 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047 1649 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 }, 1650 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047 1651 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, 1652 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN }, 1653 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, 1654 { "IBM437", RTL_TEXTENCODING_IBM_437 }, 1655 { "CP437", RTL_TEXTENCODING_IBM_437 }, 1656 { "437", RTL_TEXTENCODING_IBM_437 }, 1657 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 }, 1658 { "IBM850", RTL_TEXTENCODING_IBM_850 }, 1659 { "CP850", RTL_TEXTENCODING_IBM_850 }, 1660 { "850", RTL_TEXTENCODING_IBM_850 }, 1661 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 }, 1662 { "IBM860", RTL_TEXTENCODING_IBM_860 }, 1663 { "CP860", RTL_TEXTENCODING_IBM_860 }, 1664 { "860", RTL_TEXTENCODING_IBM_860 }, 1665 { "CSIBM860", RTL_TEXTENCODING_IBM_860 }, 1666 { "IBM861", RTL_TEXTENCODING_IBM_861 }, 1667 { "CP861", RTL_TEXTENCODING_IBM_861 }, 1668 { "861", RTL_TEXTENCODING_IBM_861 }, 1669 { "CP-IS", RTL_TEXTENCODING_IBM_861 }, 1670 { "CSIBM861", RTL_TEXTENCODING_IBM_861 }, 1671 { "IBM863", RTL_TEXTENCODING_IBM_863 }, 1672 { "CP863", RTL_TEXTENCODING_IBM_863 }, 1673 { "863", RTL_TEXTENCODING_IBM_863 }, 1674 { "CSIBM863", RTL_TEXTENCODING_IBM_863 }, 1675 { "IBM865", RTL_TEXTENCODING_IBM_865 }, 1676 { "CP865", RTL_TEXTENCODING_IBM_865 }, 1677 { "865", RTL_TEXTENCODING_IBM_865 }, 1678 { "CSIBM865", RTL_TEXTENCODING_IBM_865 }, 1679 { "IBM775", RTL_TEXTENCODING_IBM_775 }, 1680 { "CP775", RTL_TEXTENCODING_IBM_775 }, 1681 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 }, 1682 { "IBM852", RTL_TEXTENCODING_IBM_852 }, 1683 { "CP852", RTL_TEXTENCODING_IBM_852 }, 1684 { "852", RTL_TEXTENCODING_IBM_852 }, 1685 { "CSPCP852", RTL_TEXTENCODING_IBM_852 }, 1686 { "IBM855", RTL_TEXTENCODING_IBM_855 }, 1687 { "CP855", RTL_TEXTENCODING_IBM_855 }, 1688 { "855", RTL_TEXTENCODING_IBM_855 }, 1689 { "CSIBM855", RTL_TEXTENCODING_IBM_855 }, 1690 { "IBM857", RTL_TEXTENCODING_IBM_857 }, 1691 { "CP857", RTL_TEXTENCODING_IBM_857 }, 1692 { "857", RTL_TEXTENCODING_IBM_857 }, 1693 { "CSIBM857", RTL_TEXTENCODING_IBM_857 }, 1694 { "IBM862", RTL_TEXTENCODING_IBM_862 }, 1695 { "CP862", RTL_TEXTENCODING_IBM_862 }, 1696 { "862", RTL_TEXTENCODING_IBM_862 }, 1697 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 }, 1698 { "IBM864", RTL_TEXTENCODING_IBM_864 }, 1699 { "CP864", RTL_TEXTENCODING_IBM_864 }, 1700 { "CSIBM864", RTL_TEXTENCODING_IBM_864 }, 1701 { "IBM866", RTL_TEXTENCODING_IBM_866 }, 1702 { "CP866", RTL_TEXTENCODING_IBM_866 }, 1703 { "866", RTL_TEXTENCODING_IBM_866 }, 1704 { "CSIBM866", RTL_TEXTENCODING_IBM_866 }, 1705 { "IBM869", RTL_TEXTENCODING_IBM_869 }, 1706 { "CP869", RTL_TEXTENCODING_IBM_869 }, 1707 { "869", RTL_TEXTENCODING_IBM_869 }, 1708 { "CP-GR", RTL_TEXTENCODING_IBM_869 }, 1709 { "CSIBM869", RTL_TEXTENCODING_IBM_869 }, 1710 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 }, 1711 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 }, 1712 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 }, 1713 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 }, 1714 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 }, 1715 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 }, 1716 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 }, 1717 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 }, 1718 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS }, 1719 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS }, 1720 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS }, 1721 { "GB2312", RTL_TEXTENCODING_GB_2312 }, 1722 { "CSGB2312", RTL_TEXTENCODING_GB_2312 }, 1723 { "BIG5", RTL_TEXTENCODING_BIG5 }, 1724 { "CSBIG5", RTL_TEXTENCODING_BIG5 }, 1725 { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, 1726 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", 1727 RTL_TEXTENCODING_EUC_JP }, 1728 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP }, 1729 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP }, 1730 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP }, 1731 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN }, 1732 { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, 1733 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R }, 1734 { "UTF-7", RTL_TEXTENCODING_UTF7 }, 1735 { "UTF-8", RTL_TEXTENCODING_UTF8 }, 1736 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047 1737 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047 1738 { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, 1739 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR }, 1740 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR }, 1741 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR }, 1742 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 }, 1743 { "CSUCS4", RTL_TEXTENCODING_UCS4 }, 1744 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 }, 1745 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } }; 1746 1747 //============================================================================ 1748 template< typename T > 1749 inline rtl_TextEncoding getCharsetEncoding_Impl(T const * pBegin, 1750 T const * pEnd) 1751 { 1752 for (sal_Size i = 0; i < sizeof aEncodingMap / sizeof (EncodingEntry); 1753 ++i) 1754 if (INetMIME::equalIgnoreCase(pBegin, pEnd, aEncodingMap[i].m_aName)) 1755 return aEncodingMap[i].m_eEncoding; 1756 return RTL_TEXTENCODING_DONTKNOW; 1757 } 1758 1759 } 1760 1761 //============================================================================ 1762 // static 1763 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Char const * pBegin, 1764 sal_Char const * pEnd) 1765 { 1766 return getCharsetEncoding_Impl(pBegin, pEnd); 1767 } 1768 1769 //============================================================================ 1770 // static 1771 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Unicode const * pBegin, 1772 sal_Unicode const * pEnd) 1773 { 1774 return getCharsetEncoding_Impl(pBegin, pEnd); 1775 } 1776 1777 //============================================================================ 1778 // static 1779 INetMIMECharsetList_Impl * 1780 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding) 1781 { 1782 static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) }; 1783 1784 static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) }; 1785 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version 1786 // 1.0 of 1999 July 27 1787 1788 static const sal_uInt32 aISO88592Ranges[] 1789 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0, 1790 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7, 1791 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7, 1792 0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4, 1793 0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4, 1794 0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111, 1795 0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144, 1796 0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B, 1797 0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7, 1798 0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) }; 1799 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version 1800 // 1.0 of 1999 July 27 1801 1802 static const sal_uInt32 aISO88593Ranges[] 1803 = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0, 1804 0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4, 1805 0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2, 1806 0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC, 1807 0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131, 1808 0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C, 1809 0x2D8, 0x2D9, sal_uInt32(-1) }; 1810 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version 1811 // 1.0 of 1999 July 27 1812 1813 static const sal_uInt32 aISO88594Ranges[] 1814 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0, 1815 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 1816 0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6, 1817 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC, 1818 0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 1819 0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 1820 0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 1821 0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173, 1822 0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB, 1823 sal_uInt32(-1) }; 1824 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version 1825 // 1.0 of 1999 July 27 1826 1827 static const sal_uInt32 aISO88595Ranges[] 1828 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F, 1829 0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) }; 1830 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version 1831 // 1.0 of 1999 July 27 1832 1833 static const sal_uInt32 aISO88596Ranges[] 1834 = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B, 1835 0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) }; 1836 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version 1837 // 1.0 of 1999 July 27 1838 1839 static const sal_uInt32 aISO88597Ranges[] 1840 = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3, 1841 0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A, 1842 0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015, 1843 0x2018, 0x2019, sal_uInt32(-1) }; 1844 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version 1845 // 1.0 of 1999 July 27 1846 1847 static const sal_uInt32 aISO88598Ranges[] 1848 = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7, 1849 0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017, 1850 sal_uInt32(-1) }; 1851 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version 1852 // 1.1 of 2000-Jan-03 1853 1854 static const sal_uInt32 aISO88599Ranges[] 1855 = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF, 1856 0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) }; 1857 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version 1858 // 1.0 of 1999 July 27 1859 1860 static const sal_uInt32 aISO885910Ranges[] 1861 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7, 1862 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6, 1863 0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB, 1864 0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101, 1865 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119, 1866 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138, 1867 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161, 1868 0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015, 1869 sal_uInt32(-1) }; 1870 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version 1871 // 1.1 of 1999 October 11 1872 1873 static const sal_uInt32 aISO885913Ranges[] 1874 = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE, 1875 0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6, 1876 0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF, 1877 0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC, 1878 0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113, 1879 0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F, 1880 0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D, 1881 0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B, 1882 0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E, 1883 sal_uInt32(-1) }; 1884 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version 1885 // 1.0 of 1999 July 27 1886 1887 static const sal_uInt32 aISO885914Ranges[] 1888 = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE, 1889 0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF, 1890 0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121, 1891 0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F, 1892 0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B, 1893 0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) }; 1894 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version 1895 // 1.0 of 1999 July 27 1896 1897 static const sal_uInt32 aISO885915Ranges[] 1898 = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7, 1899 0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178, 1900 0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) }; 1901 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version 1902 // 1.0 of 1999 July 27 1903 1904 static const sal_uInt32 aKOI8RRanges[] 1905 = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2, 1906 0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451, 1907 0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321, 1908 0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510, 1909 0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524, 1910 0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C, 1911 0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C, 1912 0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) }; 1913 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT> 1914 // version 1.0 of 18 August 1999 1915 1916 #if defined WNT 1917 static const sal_uInt32 aWindows1252Ranges[] 1918 = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178, 1919 0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC, 1920 0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022, 1921 0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC, 1922 0x2122, 0x2122, sal_uInt32(-1) }; 1923 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/ 1924 // CP1252.TXT> version 2.01 of 04/15/98 1925 #endif // WNT 1926 1927 INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl; 1928 switch (eEncoding) 1929 { 1930 case RTL_TEXTENCODING_MS_1252: 1931 #if defined WNT 1932 pList->prepend(Charset(RTL_TEXTENCODING_MS_1252, 1933 aWindows1252Ranges)); 1934 #endif // WNT 1935 case RTL_TEXTENCODING_ISO_8859_1: 1936 case RTL_TEXTENCODING_UTF7: 1937 case RTL_TEXTENCODING_UTF8: 1938 break; 1939 1940 case RTL_TEXTENCODING_ISO_8859_2: 1941 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2, 1942 aISO88592Ranges)); 1943 break; 1944 1945 case RTL_TEXTENCODING_ISO_8859_3: 1946 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3, 1947 aISO88593Ranges)); 1948 break; 1949 1950 case RTL_TEXTENCODING_ISO_8859_4: 1951 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4, 1952 aISO88594Ranges)); 1953 break; 1954 1955 case RTL_TEXTENCODING_ISO_8859_5: 1956 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, 1957 aISO88595Ranges)); 1958 break; 1959 1960 case RTL_TEXTENCODING_ISO_8859_6: 1961 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6, 1962 aISO88596Ranges)); 1963 break; 1964 1965 case RTL_TEXTENCODING_ISO_8859_7: 1966 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7, 1967 aISO88597Ranges)); 1968 break; 1969 1970 case RTL_TEXTENCODING_ISO_8859_8: 1971 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8, 1972 aISO88598Ranges)); 1973 break; 1974 1975 case RTL_TEXTENCODING_ISO_8859_9: 1976 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9, 1977 aISO88599Ranges)); 1978 break; 1979 1980 case RTL_TEXTENCODING_ISO_8859_10: 1981 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10, 1982 aISO885910Ranges)); 1983 break; 1984 1985 case RTL_TEXTENCODING_ISO_8859_13: 1986 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13, 1987 aISO885913Ranges)); 1988 break; 1989 1990 case RTL_TEXTENCODING_ISO_8859_14: 1991 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14, 1992 aISO885914Ranges)); 1993 break; 1994 1995 case RTL_TEXTENCODING_ISO_8859_15: 1996 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15, 1997 aISO885915Ranges)); 1998 break; 1999 2000 case RTL_TEXTENCODING_MS_1250: 2001 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2, 2002 aISO88592Ranges)); 2003 break; 2004 2005 case RTL_TEXTENCODING_MS_1251: 2006 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, 2007 aISO88595Ranges)); 2008 break; 2009 2010 case RTL_TEXTENCODING_MS_1253: 2011 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7, 2012 aISO88597Ranges)); 2013 break; 2014 2015 case RTL_TEXTENCODING_MS_1254: 2016 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9, 2017 aISO88599Ranges)); 2018 break; 2019 2020 case RTL_TEXTENCODING_MS_1255: 2021 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8, 2022 aISO88598Ranges)); 2023 break; 2024 2025 case RTL_TEXTENCODING_MS_1256: 2026 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6, 2027 aISO88596Ranges)); 2028 break; 2029 2030 case RTL_TEXTENCODING_MS_1257: 2031 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4, 2032 aISO88594Ranges)); 2033 break; 2034 2035 case RTL_TEXTENCODING_KOI8_R: 2036 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5, 2037 aISO88595Ranges)); 2038 pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges)); 2039 break; 2040 2041 default: //@@@ more cases are missing! 2042 DBG_ERROR("INetMIME::createPreferredCharsetList():" 2043 " Unsupported encoding"); 2044 break; 2045 } 2046 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges)); 2047 pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges)); 2048 return pList; 2049 } 2050 2051 //============================================================================ 2052 // static 2053 sal_Unicode * INetMIME::convertToUnicode(const sal_Char * pBegin, 2054 const sal_Char * pEnd, 2055 rtl_TextEncoding eEncoding, 2056 sal_Size & rSize) 2057 { 2058 if (eEncoding == RTL_TEXTENCODING_DONTKNOW) 2059 return 0; 2060 rtl_TextToUnicodeConverter hConverter 2061 = rtl_createTextToUnicodeConverter(eEncoding); 2062 rtl_TextToUnicodeContext hContext 2063 = rtl_createTextToUnicodeContext(hConverter); 2064 sal_Unicode * pBuffer; 2065 sal_uInt32 nInfo; 2066 for (sal_Size nBufferSize = pEnd - pBegin;; 2067 nBufferSize += nBufferSize / 3 + 1) 2068 { 2069 pBuffer = new sal_Unicode[nBufferSize]; 2070 sal_Size nSrcCvtBytes; 2071 rSize = rtl_convertTextToUnicode( 2072 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer, 2073 nBufferSize, 2074 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR 2075 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR 2076 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, 2077 &nInfo, &nSrcCvtBytes); 2078 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL) 2079 break; 2080 delete[] pBuffer; 2081 rtl_resetTextToUnicodeContext(hConverter, hContext); 2082 } 2083 rtl_destroyTextToUnicodeContext(hConverter, hContext); 2084 rtl_destroyTextToUnicodeConverter(hConverter); 2085 if (nInfo != 0) 2086 { 2087 delete[] pBuffer; 2088 pBuffer = 0; 2089 } 2090 return pBuffer; 2091 } 2092 2093 //============================================================================ 2094 // static 2095 sal_Char * INetMIME::convertFromUnicode(const sal_Unicode * pBegin, 2096 const sal_Unicode * pEnd, 2097 rtl_TextEncoding eEncoding, 2098 sal_Size & rSize) 2099 { 2100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW) 2101 return 0; 2102 rtl_UnicodeToTextConverter hConverter 2103 = rtl_createUnicodeToTextConverter(eEncoding); 2104 rtl_UnicodeToTextContext hContext 2105 = rtl_createUnicodeToTextContext(hConverter); 2106 sal_Char * pBuffer; 2107 sal_uInt32 nInfo; 2108 for (sal_Size nBufferSize = pEnd - pBegin;; 2109 nBufferSize += nBufferSize / 3 + 1) 2110 { 2111 pBuffer = new sal_Char[nBufferSize]; 2112 sal_Size nSrcCvtBytes; 2113 rSize = rtl_convertUnicodeToText( 2114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer, 2115 nBufferSize, 2116 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR 2117 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR 2118 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE 2119 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR, 2120 &nInfo, &nSrcCvtBytes); 2121 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) 2122 break; 2123 delete[] pBuffer; 2124 rtl_resetUnicodeToTextContext(hConverter, hContext); 2125 } 2126 rtl_destroyUnicodeToTextContext(hConverter, hContext); 2127 rtl_destroyUnicodeToTextConverter(hConverter); 2128 if (nInfo != 0) 2129 { 2130 delete[] pBuffer; 2131 pBuffer = 0; 2132 } 2133 return pBuffer; 2134 } 2135 2136 //============================================================================ 2137 // static 2138 void INetMIME::writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar) 2139 { 2140 // See RFC 2279 for a discussion of UTF-8. 2141 DBG_ASSERT(nChar < 0x80000000, "INetMIME::writeUTF8(): Bad char"); 2142 2143 if (nChar < 0x80) 2144 rSink << sal_Char(nChar); 2145 else if (nChar < 0x800) 2146 rSink << sal_Char(nChar >> 6 | 0xC0) 2147 << sal_Char((nChar & 0x3F) | 0x80); 2148 else if (nChar < 0x10000) 2149 rSink << sal_Char(nChar >> 12 | 0xE0) 2150 << sal_Char((nChar >> 6 & 0x3F) | 0x80) 2151 << sal_Char((nChar & 0x3F) | 0x80); 2152 else if (nChar < 0x200000) 2153 rSink << sal_Char(nChar >> 18 | 0xF0) 2154 << sal_Char((nChar >> 12 & 0x3F) | 0x80) 2155 << sal_Char((nChar >> 6 & 0x3F) | 0x80) 2156 << sal_Char((nChar & 0x3F) | 0x80); 2157 else if (nChar < 0x4000000) 2158 rSink << sal_Char(nChar >> 24 | 0xF8) 2159 << sal_Char((nChar >> 18 & 0x3F) | 0x80) 2160 << sal_Char((nChar >> 12 & 0x3F) | 0x80) 2161 << sal_Char((nChar >> 6 & 0x3F) | 0x80) 2162 << sal_Char((nChar & 0x3F) | 0x80); 2163 else 2164 rSink << sal_Char(nChar >> 30 | 0xFC) 2165 << sal_Char((nChar >> 24 & 0x3F) | 0x80) 2166 << sal_Char((nChar >> 18 & 0x3F) | 0x80) 2167 << sal_Char((nChar >> 12 & 0x3F) | 0x80) 2168 << sal_Char((nChar >> 6 & 0x3F) | 0x80) 2169 << sal_Char((nChar & 0x3F) | 0x80); 2170 } 2171 2172 //============================================================================ 2173 // static 2174 void INetMIME::writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue, 2175 int nMinDigits) 2176 { 2177 sal_Char aBuffer[10]; 2178 // max unsigned 32 bit value (4294967295) has 10 places 2179 sal_Char * p = aBuffer; 2180 for (; nValue > 0; nValue /= 10) 2181 *p++ = sal_Char(getDigit(nValue % 10)); 2182 nMinDigits -= p - aBuffer; 2183 while (nMinDigits-- > 0) 2184 rSink << '0'; 2185 while (p != aBuffer) 2186 rSink << *--p; 2187 } 2188 2189 //============================================================================ 2190 // static 2191 void INetMIME::writeDateTime(INetMIMEOutputSink & rSink, 2192 const DateTime & rUTC) 2193 { 2194 static const sal_Char aDay[7][3] 2195 = { { 'M', 'o', 'n' }, 2196 { 'T', 'u', 'e' }, 2197 { 'W', 'e', 'd' }, 2198 { 'T', 'h', 'u' }, 2199 { 'F', 'r', 'i' }, 2200 { 'S', 'a', 't' }, 2201 { 'S', 'u', 'n' } }; 2202 const sal_Char * pTheDay = aDay[rUTC.GetDayOfWeek()]; 2203 rSink.write(pTheDay, pTheDay + 3); 2204 rSink << ", "; 2205 writeUnsigned(rSink, rUTC.GetDay()); 2206 rSink << ' '; 2207 static const sal_Char aMonth[12][3] 2208 = { { 'J', 'a', 'n' }, 2209 { 'F', 'e', 'b' }, 2210 { 'M', 'a', 'r' }, 2211 { 'A', 'p', 'r' }, 2212 { 'M', 'a', 'y' }, 2213 { 'J', 'u', 'n' }, 2214 { 'J', 'u', 'l' }, 2215 { 'A', 'u', 'g' }, 2216 { 'S', 'e', 'p' }, 2217 { 'O', 'c', 't' }, 2218 { 'N', 'o', 'v' }, 2219 { 'D', 'e', 'c' } }; 2220 const sal_Char * pTheMonth = aMonth[rUTC.GetMonth() - 1]; 2221 rSink.write(pTheMonth, pTheMonth + 3); 2222 rSink << ' '; 2223 writeUnsigned(rSink, rUTC.GetYear()); 2224 rSink << ' '; 2225 writeUnsigned(rSink, rUTC.GetHour(), 2); 2226 rSink << ':'; 2227 writeUnsigned(rSink, rUTC.GetMin(), 2); 2228 rSink << ':'; 2229 writeUnsigned(rSink, rUTC.GetSec(), 2); 2230 rSink << " +0000"; 2231 } 2232 2233 //============================================================================ 2234 // static 2235 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink, 2236 HeaderFieldType eType, 2237 const ByteString & rBody, 2238 rtl_TextEncoding ePreferredEncoding, 2239 bool bInitialSpace) 2240 { 2241 writeHeaderFieldBody(rSink, eType, 2242 UniString(rBody, RTL_TEXTENCODING_UTF8), 2243 ePreferredEncoding, bInitialSpace); 2244 } 2245 2246 //============================================================================ 2247 // static 2248 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink, 2249 HeaderFieldType eType, 2250 const UniString & rBody, 2251 rtl_TextEncoding ePreferredEncoding, 2252 bool bInitialSpace) 2253 { 2254 if (eType == HEADER_FIELD_TEXT) 2255 { 2256 INetMIMEEncodedWordOutputSink 2257 aOutput(rSink, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT, 2258 bInitialSpace ? 2259 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS : 2260 INetMIMEEncodedWordOutputSink::SPACE_NO, 2261 ePreferredEncoding); 2262 aOutput.write(rBody.GetBuffer(), rBody.GetBuffer() + rBody.Len()); 2263 aOutput.flush(); 2264 } 2265 else 2266 { 2267 enum Brackets { BRACKETS_OUTSIDE, BRACKETS_OPENING, BRACKETS_INSIDE }; 2268 Brackets eBrackets = BRACKETS_OUTSIDE; 2269 2270 const sal_Unicode * pBodyPtr = rBody.GetBuffer(); 2271 const sal_Unicode * pBodyEnd = pBodyPtr + rBody.Len(); 2272 while (pBodyPtr != pBodyEnd) 2273 switch (*pBodyPtr) 2274 { 2275 case '\t': 2276 case ' ': 2277 // A WSP adds to accumulated space: 2278 bInitialSpace = true; 2279 ++pBodyPtr; 2280 break; 2281 2282 case '(': 2283 { 2284 // Write a pending '<' if necessary: 2285 if (eBrackets == BRACKETS_OPENING) 2286 { 2287 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2288 >= rSink.getLineLengthLimit()) 2289 rSink << INetMIMEOutputSink::endl << ' '; 2290 else if (bInitialSpace) 2291 rSink << ' '; 2292 rSink << '<'; 2293 bInitialSpace = false; 2294 eBrackets = BRACKETS_INSIDE; 2295 } 2296 2297 // Write the comment, introducing encoded-words where 2298 // necessary: 2299 int nLevel = 0; 2300 INetMIMEEncodedWordOutputSink 2301 aOutput( 2302 rSink, 2303 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT, 2304 INetMIMEEncodedWordOutputSink::SPACE_NO, 2305 ePreferredEncoding); 2306 while (pBodyPtr != pBodyEnd) 2307 switch (*pBodyPtr) 2308 { 2309 case '(': 2310 aOutput.flush(); 2311 if (rSink.getColumn() 2312 + (bInitialSpace ? 1 : 0) 2313 >= rSink.getLineLengthLimit()) 2314 rSink << INetMIMEOutputSink::endl << ' '; 2315 else if (bInitialSpace) 2316 rSink << ' '; 2317 rSink << '('; 2318 bInitialSpace = false; 2319 ++nLevel; 2320 ++pBodyPtr; 2321 break; 2322 2323 case ')': 2324 aOutput.flush(); 2325 if (rSink.getColumn() 2326 >= rSink.getLineLengthLimit()) 2327 rSink << INetMIMEOutputSink::endl << ' '; 2328 rSink << ')'; 2329 ++pBodyPtr; 2330 if (--nLevel == 0) 2331 goto comment_done; 2332 break; 2333 2334 case '\\': 2335 if (++pBodyPtr == pBodyEnd) 2336 break; 2337 default: 2338 aOutput << *pBodyPtr++; 2339 break; 2340 } 2341 comment_done: 2342 break; 2343 } 2344 2345 case '<': 2346 // Write an already pending '<' if necessary: 2347 if (eBrackets == BRACKETS_OPENING) 2348 { 2349 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2350 >= rSink.getLineLengthLimit()) 2351 rSink << INetMIMEOutputSink::endl << ' '; 2352 else if (bInitialSpace) 2353 rSink << ' '; 2354 rSink << '<'; 2355 bInitialSpace = false; 2356 } 2357 2358 // Remember this '<' as pending, and open a bracketed 2359 // block: 2360 eBrackets = BRACKETS_OPENING; 2361 ++pBodyPtr; 2362 break; 2363 2364 case '>': 2365 // Write a pending '<' if necessary: 2366 if (eBrackets == BRACKETS_OPENING) 2367 { 2368 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2369 >= rSink.getLineLengthLimit()) 2370 rSink << INetMIMEOutputSink::endl << ' '; 2371 else if (bInitialSpace) 2372 rSink << ' '; 2373 rSink << '<'; 2374 bInitialSpace = false; 2375 } 2376 2377 // Write this '>', and close any bracketed block: 2378 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2379 >= rSink.getLineLengthLimit()) 2380 rSink << INetMIMEOutputSink::endl << ' '; 2381 else if (bInitialSpace) 2382 rSink << ' '; 2383 rSink << '>'; 2384 bInitialSpace = false; 2385 eBrackets = BRACKETS_OUTSIDE; 2386 ++pBodyPtr; 2387 break; 2388 2389 case ',': 2390 case ':': 2391 case ';': 2392 case '\\': 2393 case ']': 2394 // Write a pending '<' if necessary: 2395 if (eBrackets == BRACKETS_OPENING) 2396 { 2397 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2398 >= rSink.getLineLengthLimit()) 2399 rSink << INetMIMEOutputSink::endl << ' '; 2400 else if (bInitialSpace) 2401 rSink << ' '; 2402 rSink << '<'; 2403 bInitialSpace = false; 2404 eBrackets = BRACKETS_INSIDE; 2405 } 2406 2407 // Write this specials: 2408 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2409 >= rSink.getLineLengthLimit()) 2410 rSink << INetMIMEOutputSink::endl << ' '; 2411 else if (bInitialSpace) 2412 rSink << ' '; 2413 rSink << sal_Char(*pBodyPtr++); 2414 bInitialSpace = false; 2415 break; 2416 2417 case '\x0D': // CR 2418 // A <CRLF WSP> adds to accumulated space, a <CR> not 2419 // followed by <LF WSP> starts 'junk': 2420 if (startsWithLineFolding(pBodyPtr, pBodyEnd)) 2421 { 2422 bInitialSpace = true; 2423 pBodyPtr += 3; 2424 break; 2425 } 2426 default: 2427 { 2428 // The next token is either one of <"." / "@" / atom / 2429 // quoted-string / domain-literal>, or it's 'junk'; if it 2430 // is not 'junk', it is either a 'phrase' (i.e., it may 2431 // contain encoded-words) or a 'non-phrase' (i.e., it may 2432 // not contain encoded-words): 2433 enum Entity { ENTITY_JUNK, ENTITY_NON_PHRASE, 2434 ENTITY_PHRASE }; 2435 Entity eEntity = ENTITY_JUNK; 2436 switch (*pBodyPtr) 2437 { 2438 case '.': 2439 case '@': 2440 case '[': 2441 // A token of <"." / "@" / domain-literal> always 2442 // starts a 'non-phrase': 2443 eEntity = ENTITY_NON_PHRASE; 2444 break; 2445 2446 default: 2447 if (isUSASCII(*pBodyPtr) 2448 && !isAtomChar(*pBodyPtr)) 2449 { 2450 eEntity = ENTITY_JUNK; 2451 break; 2452 } 2453 case '"': 2454 // A token of <atom / quoted-string> can either be 2455 // a 'phrase' or a 'non-phrase': 2456 switch (eType) 2457 { 2458 case HEADER_FIELD_STRUCTURED: 2459 eEntity = ENTITY_NON_PHRASE; 2460 break; 2461 2462 case HEADER_FIELD_PHRASE: 2463 eEntity = ENTITY_PHRASE; 2464 break; 2465 2466 case HEADER_FIELD_MESSAGE_ID: 2467 // A 'phrase' if and only if outside any 2468 // bracketed block: 2469 eEntity 2470 = eBrackets == BRACKETS_OUTSIDE ? 2471 ENTITY_PHRASE : 2472 ENTITY_NON_PHRASE; 2473 break; 2474 2475 case HEADER_FIELD_ADDRESS: 2476 { 2477 // A 'non-phrase' if and only if, after 2478 // skipping this token and any following 2479 // <linear-white-space> and <comment>s, 2480 // there is no token left, or the next 2481 // token is any of <"." / "@" / ">" / "," 2482 // / ";">, or the next token is <":"> and 2483 // is within a bracketed block: 2484 const sal_Unicode * pLookAhead = pBodyPtr; 2485 if (*pLookAhead == '"') 2486 { 2487 pLookAhead 2488 = skipQuotedString(pLookAhead, 2489 pBodyEnd); 2490 if (pLookAhead == pBodyPtr) 2491 pLookAhead = pBodyEnd; 2492 } 2493 else 2494 while (pLookAhead != pBodyEnd 2495 && (isAtomChar(*pLookAhead) 2496 || !isUSASCII( 2497 *pLookAhead))) 2498 ++pLookAhead; 2499 while (pLookAhead != pBodyEnd) 2500 switch (*pLookAhead) 2501 { 2502 case '\t': 2503 case ' ': 2504 ++pLookAhead; 2505 break; 2506 2507 case '(': 2508 { 2509 const sal_Unicode * pPast 2510 = skipComment(pLookAhead, 2511 pBodyEnd); 2512 pLookAhead 2513 = pPast == pLookAhead ? 2514 pBodyEnd : pPast; 2515 break; 2516 } 2517 2518 case ',': 2519 case '.': 2520 case ';': 2521 case '>': 2522 case '@': 2523 eEntity = ENTITY_NON_PHRASE; 2524 goto entity_determined; 2525 2526 case ':': 2527 eEntity 2528 = eBrackets 2529 == BRACKETS_OUTSIDE ? 2530 ENTITY_PHRASE : 2531 ENTITY_NON_PHRASE; 2532 goto entity_determined; 2533 2534 case '\x0D': // CR 2535 if (startsWithLineFolding( 2536 pLookAhead, pBodyEnd)) 2537 { 2538 pLookAhead += 3; 2539 break; 2540 } 2541 default: 2542 eEntity = ENTITY_PHRASE; 2543 goto entity_determined; 2544 } 2545 eEntity = ENTITY_NON_PHRASE; 2546 entity_determined: 2547 break; 2548 } 2549 2550 case HEADER_FIELD_TEXT: 2551 OSL_ASSERT(false); 2552 break; 2553 } 2554 2555 // In a 'non-phrase', a non-US-ASCII character 2556 // cannot be part of an <atom>, but instead the 2557 // whole entity is 'junk' rather than 'non- 2558 // phrase': 2559 if (eEntity == ENTITY_NON_PHRASE 2560 && !isUSASCII(*pBodyPtr)) 2561 eEntity = ENTITY_JUNK; 2562 break; 2563 } 2564 2565 switch (eEntity) 2566 { 2567 case ENTITY_JUNK: 2568 { 2569 // Write a pending '<' if necessary: 2570 if (eBrackets == BRACKETS_OPENING) 2571 { 2572 if (rSink.getColumn() 2573 + (bInitialSpace ? 1 : 0) 2574 >= rSink.getLineLengthLimit()) 2575 rSink << INetMIMEOutputSink::endl << ' '; 2576 else if (bInitialSpace) 2577 rSink << ' '; 2578 rSink << '<'; 2579 bInitialSpace = false; 2580 eBrackets = BRACKETS_INSIDE; 2581 } 2582 2583 // Calculate the length of in- and output: 2584 const sal_Unicode * pStart = pBodyPtr; 2585 sal_Size nLength = 0; 2586 bool bModify = false; 2587 bool bEnd = false; 2588 while (pBodyPtr != pBodyEnd && !bEnd) 2589 switch (*pBodyPtr) 2590 { 2591 case '\x0D': // CR 2592 if (startsWithLineFolding(pBodyPtr, 2593 pBodyEnd)) 2594 bEnd = true; 2595 else if (startsWithLineBreak( 2596 pBodyPtr, pBodyEnd)) 2597 { 2598 nLength += 3; 2599 bModify = true; 2600 pBodyPtr += 2; 2601 } 2602 else 2603 { 2604 ++nLength; 2605 ++pBodyPtr; 2606 } 2607 break; 2608 2609 case '\t': 2610 case ' ': 2611 bEnd = true; 2612 break; 2613 2614 default: 2615 if (isVisible(*pBodyPtr)) 2616 bEnd = true; 2617 else if (isUSASCII(*pBodyPtr)) 2618 { 2619 ++nLength; 2620 ++pBodyPtr; 2621 } 2622 else 2623 { 2624 nLength += getUTF8OctetCount( 2625 *pBodyPtr++); 2626 bModify = true; 2627 } 2628 break; 2629 } 2630 2631 // Write the output: 2632 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2633 + nLength 2634 > rSink.getLineLengthLimit()) 2635 rSink << INetMIMEOutputSink::endl << ' '; 2636 else if (bInitialSpace) 2637 rSink << ' '; 2638 bInitialSpace = false; 2639 if (bModify) 2640 while (pStart != pBodyPtr) 2641 if (startsWithLineBreak(pStart, pBodyPtr)) 2642 { 2643 rSink << "\x0D\\\x0A"; // CR, '\', LF 2644 pStart += 2; 2645 } 2646 else 2647 writeUTF8(rSink, *pStart++); 2648 else 2649 rSink.write(pStart, pBodyPtr); 2650 break; 2651 } 2652 2653 case ENTITY_NON_PHRASE: 2654 { 2655 // Calculate the length of in- and output: 2656 const sal_Unicode * pStart = pBodyPtr; 2657 sal_Size nLength = 0; 2658 bool bBracketedBlock = false; 2659 bool bSymbol = *pStart != '.' && *pStart != '@'; 2660 bool bModify = false; 2661 bool bEnd = false; 2662 while (pBodyPtr != pBodyEnd && !bEnd) 2663 switch (*pBodyPtr) 2664 { 2665 case '\t': 2666 case ' ': 2667 case '\x0D': // CR 2668 { 2669 const sal_Unicode * pLookAhead 2670 = skipLinearWhiteSpace(pBodyPtr, 2671 pBodyEnd); 2672 if (pLookAhead < pBodyEnd 2673 && (bSymbol ? 2674 isAtomChar(*pLookAhead) 2675 || *pLookAhead == '"' 2676 || *pLookAhead == '[' : 2677 *pLookAhead == '.' 2678 || *pLookAhead == '@' 2679 || (*pLookAhead == '>' 2680 && eType 2681 >= HEADER_FIELD_MESSAGE_ID 2682 && eBrackets 2683 == BRACKETS_OPENING))) 2684 { 2685 bModify = true; 2686 pBodyPtr = pLookAhead; 2687 } 2688 else 2689 bEnd = true; 2690 break; 2691 } 2692 2693 case '"': 2694 if (bSymbol) 2695 { 2696 pBodyPtr 2697 = scanQuotedBlock(pBodyPtr, 2698 pBodyEnd, 2699 '"', '"', 2700 nLength, 2701 bModify); 2702 bSymbol = false; 2703 } 2704 else 2705 bEnd = true; 2706 break; 2707 2708 case '[': 2709 if (bSymbol) 2710 { 2711 pBodyPtr 2712 = scanQuotedBlock(pBodyPtr, 2713 pBodyEnd, 2714 '[', ']', 2715 nLength, 2716 bModify); 2717 bSymbol = false; 2718 } 2719 else 2720 bEnd = true; 2721 break; 2722 2723 case '.': 2724 case '@': 2725 if (bSymbol) 2726 bEnd = true; 2727 else 2728 { 2729 ++nLength; 2730 bSymbol = true; 2731 ++pBodyPtr; 2732 } 2733 break; 2734 2735 case '>': 2736 if (eBrackets == BRACKETS_OPENING 2737 && eType 2738 >= HEADER_FIELD_MESSAGE_ID) 2739 { 2740 ++nLength; 2741 bBracketedBlock = true; 2742 ++pBodyPtr; 2743 } 2744 bEnd = true; 2745 break; 2746 2747 default: 2748 if (isAtomChar(*pBodyPtr) && bSymbol) 2749 { 2750 while (pBodyPtr != pBodyEnd 2751 && isAtomChar(*pBodyPtr)) 2752 { 2753 ++nLength; 2754 ++pBodyPtr; 2755 } 2756 bSymbol = false; 2757 } 2758 else 2759 { 2760 if (!isUSASCII(*pBodyPtr)) 2761 bModify = true; 2762 bEnd = true; 2763 } 2764 break; 2765 } 2766 2767 // Write a pending '<' if necessary: 2768 if (eBrackets == BRACKETS_OPENING 2769 && !bBracketedBlock) 2770 { 2771 if (rSink.getColumn() 2772 + (bInitialSpace ? 1 : 0) 2773 >= rSink.getLineLengthLimit()) 2774 rSink << INetMIMEOutputSink::endl << ' '; 2775 else if (bInitialSpace) 2776 rSink << ' '; 2777 rSink << '<'; 2778 bInitialSpace = false; 2779 eBrackets = BRACKETS_INSIDE; 2780 } 2781 2782 // Write the output: 2783 if (rSink.getColumn() + (bInitialSpace ? 1 : 0) 2784 + nLength 2785 > rSink.getLineLengthLimit()) 2786 rSink << INetMIMEOutputSink::endl << ' '; 2787 else if (bInitialSpace) 2788 rSink << ' '; 2789 bInitialSpace = false; 2790 if (bBracketedBlock) 2791 { 2792 rSink << '<'; 2793 eBrackets = BRACKETS_OUTSIDE; 2794 } 2795 if (bModify) 2796 { 2797 enum Mode { MODE_PLAIN, MODE_QUOTED_STRING, 2798 MODE_DOMAIN_LITERAL }; 2799 Mode eMode = MODE_PLAIN; 2800 while (pStart != pBodyPtr) 2801 switch (*pStart) 2802 { 2803 case '\x0D': // CR 2804 if (startsWithLineFolding( 2805 pStart, pBodyPtr)) 2806 { 2807 if (eMode != MODE_PLAIN) 2808 rSink << sal_Char( 2809 pStart[2]); 2810 pStart += 3; 2811 } 2812 else if (startsWithLineBreak( 2813 pStart, pBodyPtr)) 2814 { 2815 rSink << "\x0D\\\x0A"; 2816 // CR, '\', LF 2817 pStart += 2; 2818 } 2819 else 2820 { 2821 rSink << '\x0D'; // CR 2822 ++pStart; 2823 } 2824 break; 2825 2826 case '\t': 2827 case ' ': 2828 if (eMode != MODE_PLAIN) 2829 rSink << sal_Char(*pStart); 2830 ++pStart; 2831 break; 2832 2833 case '"': 2834 if (eMode == MODE_PLAIN) 2835 eMode = MODE_QUOTED_STRING; 2836 else if (eMode 2837 == MODE_QUOTED_STRING) 2838 eMode = MODE_PLAIN; 2839 rSink << '"'; 2840 ++pStart; 2841 break; 2842 2843 case '[': 2844 if (eMode == MODE_PLAIN) 2845 eMode = MODE_DOMAIN_LITERAL; 2846 rSink << '['; 2847 ++pStart; 2848 break; 2849 2850 case ']': 2851 if (eMode == MODE_DOMAIN_LITERAL) 2852 eMode = MODE_PLAIN; 2853 rSink << ']'; 2854 ++pStart; 2855 break; 2856 2857 case '\\': 2858 rSink << '\\'; 2859 if (++pStart < pBodyPtr) 2860 writeUTF8(rSink, *pStart++); 2861 break; 2862 2863 default: 2864 writeUTF8(rSink, *pStart++); 2865 break; 2866 } 2867 } 2868 else 2869 rSink.write(pStart, pBodyPtr); 2870 break; 2871 } 2872 2873 case ENTITY_PHRASE: 2874 { 2875 // Write a pending '<' if necessary: 2876 if (eBrackets == BRACKETS_OPENING) 2877 { 2878 if (rSink.getColumn() 2879 + (bInitialSpace ? 1 : 0) 2880 >= rSink.getLineLengthLimit()) 2881 rSink << INetMIMEOutputSink::endl << ' '; 2882 else if (bInitialSpace) 2883 rSink << ' '; 2884 rSink << '<'; 2885 bInitialSpace = false; 2886 eBrackets = BRACKETS_INSIDE; 2887 } 2888 2889 // Calculate the length of in- and output: 2890 const sal_Unicode * pStart = pBodyPtr; 2891 bool bQuotedString = false; 2892 bool bEnd = false; 2893 while (pBodyPtr != pBodyEnd && !bEnd) 2894 switch (*pBodyPtr) 2895 { 2896 case '\t': 2897 case ' ': 2898 case '\x0D': // CR 2899 if (bQuotedString) 2900 ++pBodyPtr; 2901 else 2902 { 2903 const sal_Unicode * pLookAhead 2904 = skipLinearWhiteSpace( 2905 pBodyPtr, pBodyEnd); 2906 if (pLookAhead != pBodyEnd 2907 && (isAtomChar(*pLookAhead) 2908 || !isUSASCII(*pLookAhead) 2909 || *pLookAhead == '"')) 2910 pBodyPtr = pLookAhead; 2911 else 2912 bEnd = true; 2913 } 2914 break; 2915 2916 case '"': 2917 bQuotedString = !bQuotedString; 2918 ++pBodyPtr; 2919 break; 2920 2921 case '\\': 2922 if (bQuotedString) 2923 { 2924 if (++pBodyPtr != pBodyEnd) 2925 ++pBodyPtr; 2926 } 2927 else 2928 bEnd = true; 2929 break; 2930 2931 default: 2932 if (bQuotedString 2933 || isAtomChar(*pBodyPtr) 2934 || !isUSASCII(*pBodyPtr)) 2935 ++pBodyPtr; 2936 else 2937 bEnd = true; 2938 break; 2939 } 2940 2941 // Write the phrase, introducing encoded-words 2942 // where necessary: 2943 INetMIMEEncodedWordOutputSink 2944 aOutput( 2945 rSink, 2946 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, 2947 bInitialSpace ? 2948 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS : 2949 INetMIMEEncodedWordOutputSink::SPACE_ENCODED, 2950 ePreferredEncoding); 2951 while (pStart != pBodyPtr) 2952 switch (*pStart) 2953 { 2954 case '"': 2955 ++pStart; 2956 break; 2957 2958 case '\\': 2959 if (++pStart != pBodyPtr) 2960 aOutput << *pStart++; 2961 break; 2962 2963 case '\x0D': // CR 2964 pStart += 2; 2965 aOutput << *pStart++; 2966 break; 2967 2968 default: 2969 aOutput << *pStart++; 2970 break; 2971 } 2972 bInitialSpace = aOutput.flush(); 2973 break; 2974 } 2975 } 2976 break; 2977 } 2978 } 2979 } 2980 } 2981 2982 //============================================================================ 2983 // static 2984 bool INetMIME::translateUTF8Char(const sal_Char *& rBegin, 2985 const sal_Char * pEnd, 2986 rtl_TextEncoding eEncoding, 2987 sal_uInt32 & rCharacter) 2988 { 2989 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80 2990 || static_cast< unsigned char >(*rBegin) >= 0xFE) 2991 return false; 2992 2993 int nCount; 2994 sal_uInt32 nMin; 2995 sal_uInt32 nUCS4; 2996 const sal_Char * p = rBegin; 2997 if (static_cast< unsigned char >(*p) < 0xE0) 2998 { 2999 nCount = 1; 3000 nMin = 0x80; 3001 nUCS4 = static_cast< unsigned char >(*p) & 0x1F; 3002 } 3003 else if (static_cast< unsigned char >(*p) < 0xF0) 3004 { 3005 nCount = 2; 3006 nMin = 0x800; 3007 nUCS4 = static_cast< unsigned char >(*p) & 0xF; 3008 } 3009 else if (static_cast< unsigned char >(*p) < 0xF8) 3010 { 3011 nCount = 3; 3012 nMin = 0x10000; 3013 nUCS4 = static_cast< unsigned char >(*p) & 7; 3014 } 3015 else if (static_cast< unsigned char >(*p) < 0xFC) 3016 { 3017 nCount = 4; 3018 nMin = 0x200000; 3019 nUCS4 = static_cast< unsigned char >(*p) & 3; 3020 } 3021 else 3022 { 3023 nCount = 5; 3024 nMin = 0x4000000; 3025 nUCS4 = static_cast< unsigned char >(*p) & 1; 3026 } 3027 ++p; 3028 3029 for (; nCount-- > 0; ++p) 3030 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80) 3031 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F); 3032 else 3033 return false; 3034 3035 if (nUCS4 < nMin || nUCS4 > 0x10FFFF) 3036 return false; 3037 3038 if (eEncoding >= RTL_TEXTENCODING_UCS4) 3039 rCharacter = nUCS4; 3040 else 3041 { 3042 sal_Unicode aUTF16[2]; 3043 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4); 3044 sal_Size nSize; 3045 sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding, 3046 nSize); 3047 if (!pBuffer) 3048 return false; 3049 DBG_ASSERT(nSize == 1, 3050 "INetMIME::translateUTF8Char(): Bad conversion"); 3051 rCharacter = *pBuffer; 3052 delete[] pBuffer; 3053 } 3054 rBegin = p; 3055 return true; 3056 } 3057 3058 //============================================================================ 3059 // static 3060 ByteString INetMIME::decodeUTF8(const ByteString & rText, 3061 rtl_TextEncoding eEncoding) 3062 { 3063 const sal_Char * p = rText.GetBuffer(); 3064 const sal_Char * pEnd = p + rText.Len(); 3065 ByteString sDecoded; 3066 while (p != pEnd) 3067 { 3068 sal_uInt32 nCharacter; 3069 if (translateUTF8Char(p, pEnd, eEncoding, nCharacter)) 3070 sDecoded += sal_Char(nCharacter); 3071 else 3072 sDecoded += sal_Char(*p++); 3073 } 3074 return sDecoded; 3075 } 3076 3077 //============================================================================ 3078 // static 3079 UniString INetMIME::decodeHeaderFieldBody(HeaderFieldType eType, 3080 const ByteString & rBody) 3081 { 3082 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old 3083 // versions of StarOffice send mails with header fields where encoded 3084 // words can be preceded by '=', ',', '.', '"', or '(', and followed by 3085 // '=', ',', '.', '"', ')', without any required white space in between. 3086 // And there appear to exist some broken mailers that only encode single 3087 // letters within words, like "Appel 3088 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to 3089 // detect encoded words even when not propperly surrounded by white space. 3090 // 3091 // Non US-ASCII characters in rBody are treated as ISO-8859-1. 3092 // 3093 // encoded-word = "=?" 3094 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E) 3095 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?" 3096 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==") 3097 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG)) 3098 // "?=" 3099 // 3100 // base64 = ALPHA / DIGIT / "+" / "/" 3101 3102 const sal_Char * pBegin = rBody.GetBuffer(); 3103 const sal_Char * pEnd = pBegin + rBody.Len(); 3104 3105 UniString sDecoded; 3106 const sal_Char * pCopyBegin = pBegin; 3107 3108 /* bool bStartEncodedWord = true; */ 3109 const sal_Char * pWSPBegin = pBegin; 3110 UniString sEncodedText; 3111 bool bQuotedEncodedText = false; 3112 sal_uInt32 nCommentLevel = 0; 3113 3114 for (const sal_Char * p = pBegin; p != pEnd;) 3115 { 3116 if (p != pEnd && *p == '=' /* && bStartEncodedWord */) 3117 { 3118 const sal_Char * q = p + 1; 3119 bool bEncodedWord = q != pEnd && *q++ == '?'; 3120 3121 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW; 3122 if (bEncodedWord) 3123 { 3124 const sal_Char * pCharsetBegin = q; 3125 const sal_Char * pLanguageBegin = 0; 3126 int nAlphaCount = 0; 3127 for (bool bDone = false; !bDone;) 3128 if (q == pEnd) 3129 { 3130 bEncodedWord = false; 3131 bDone = true; 3132 } 3133 else 3134 { 3135 sal_Char cChar = *q++; 3136 switch (cChar) 3137 { 3138 case '*': 3139 pLanguageBegin = q - 1; 3140 nAlphaCount = 0; 3141 break; 3142 3143 case '-': 3144 if (pLanguageBegin != 0) 3145 { 3146 if (nAlphaCount == 0) 3147 pLanguageBegin = 0; 3148 else 3149 nAlphaCount = 0; 3150 } 3151 break; 3152 3153 case '?': 3154 if (pCharsetBegin == q - 1) 3155 bEncodedWord = false; 3156 else 3157 { 3158 eCharsetEncoding 3159 = getCharsetEncoding( 3160 pCharsetBegin, 3161 pLanguageBegin == 0 3162 || nAlphaCount == 0 ? 3163 q - 1 : pLanguageBegin); 3164 bEncodedWord = isMIMECharsetEncoding( 3165 eCharsetEncoding); 3166 eCharsetEncoding 3167 = translateFromMIME(eCharsetEncoding); 3168 } 3169 bDone = true; 3170 break; 3171 3172 default: 3173 if (pLanguageBegin != 0 3174 && (!isAlpha(cChar) || ++nAlphaCount > 8)) 3175 pLanguageBegin = 0; 3176 break; 3177 } 3178 } 3179 } 3180 3181 bool bEncodingB = false; 3182 if (bEncodedWord) 3183 { 3184 if (q == pEnd) 3185 bEncodedWord = false; 3186 else 3187 { 3188 switch (*q++) 3189 { 3190 case 'B': 3191 case 'b': 3192 bEncodingB = true; 3193 break; 3194 3195 case 'Q': 3196 case 'q': 3197 bEncodingB = false; 3198 break; 3199 3200 default: 3201 bEncodedWord = false; 3202 break; 3203 } 3204 } 3205 } 3206 3207 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?'; 3208 3209 ByteString sText; 3210 if (bEncodedWord) 3211 { 3212 if (bEncodingB) 3213 { 3214 for (bool bDone = false; !bDone;) 3215 { 3216 if (pEnd - q < 4) 3217 { 3218 bEncodedWord = false; 3219 bDone = true; 3220 } 3221 else 3222 { 3223 bool bFinal = false; 3224 int nCount = 3; 3225 sal_uInt32 nValue = 0; 3226 for (int nShift = 18; nShift >= 0; nShift -= 6) 3227 { 3228 int nWeight = getBase64Weight(*q++); 3229 if (nWeight == -2) 3230 { 3231 bEncodedWord = false; 3232 bDone = true; 3233 break; 3234 } 3235 if (nWeight == -1) 3236 { 3237 if (!bFinal) 3238 { 3239 if (nShift >= 12) 3240 { 3241 bEncodedWord = false; 3242 bDone = true; 3243 break; 3244 } 3245 bFinal = true; 3246 nCount = nShift == 6 ? 1 : 2; 3247 } 3248 } 3249 else 3250 nValue |= nWeight << nShift; 3251 } 3252 if (bEncodedWord) 3253 { 3254 for (int nShift = 16; nCount-- > 0; 3255 nShift -= 8) 3256 sText += sal_Char(nValue >> nShift 3257 & 0xFF); 3258 if (*q == '?') 3259 { 3260 ++q; 3261 bDone = true; 3262 } 3263 if (bFinal && !bDone) 3264 { 3265 bEncodedWord = false; 3266 bDone = true; 3267 } 3268 } 3269 } 3270 } 3271 } 3272 else 3273 { 3274 const sal_Char * pEncodedTextBegin = q; 3275 const sal_Char * pEncodedTextCopyBegin = q; 3276 for (bool bDone = false; !bDone;) 3277 if (q == pEnd) 3278 { 3279 bEncodedWord = false; 3280 bDone = true; 3281 } 3282 else 3283 { 3284 sal_uInt32 nChar = *q++; 3285 switch (nChar) 3286 { 3287 case '=': 3288 { 3289 if (pEnd - q < 2) 3290 { 3291 bEncodedWord = false; 3292 bDone = true; 3293 break; 3294 } 3295 int nDigit1 = getHexWeight(q[0]); 3296 int nDigit2 = getHexWeight(q[1]); 3297 if (nDigit1 < 0 || nDigit2 < 0) 3298 { 3299 bEncodedWord = false; 3300 bDone = true; 3301 break; 3302 } 3303 sText += rBody.Copy( 3304 static_cast< xub_StrLen >( 3305 pEncodedTextCopyBegin - pBegin), 3306 static_cast< xub_StrLen >( 3307 q - 1 - pEncodedTextCopyBegin)); 3308 sText += sal_Char(nDigit1 << 4 | nDigit2); 3309 q += 2; 3310 pEncodedTextCopyBegin = q; 3311 break; 3312 } 3313 3314 case '?': 3315 if (q - pEncodedTextBegin > 1) 3316 sText += rBody.Copy( 3317 static_cast< xub_StrLen >( 3318 pEncodedTextCopyBegin - pBegin), 3319 static_cast< xub_StrLen >( 3320 q - 1 - pEncodedTextCopyBegin)); 3321 else 3322 bEncodedWord = false; 3323 bDone = true; 3324 break; 3325 3326 case '_': 3327 sText += rBody.Copy( 3328 static_cast< xub_StrLen >( 3329 pEncodedTextCopyBegin - pBegin), 3330 static_cast< xub_StrLen >( 3331 q - 1 - pEncodedTextCopyBegin)); 3332 sText += ' '; 3333 pEncodedTextCopyBegin = q; 3334 break; 3335 3336 default: 3337 if (!isVisible(nChar)) 3338 { 3339 bEncodedWord = false; 3340 bDone = true; 3341 } 3342 break; 3343 } 3344 } 3345 } 3346 } 3347 3348 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '='; 3349 3350 // if (bEncodedWord && q != pEnd) 3351 // switch (*q) 3352 // { 3353 // case '\t': 3354 // case ' ': 3355 // case '"': 3356 // case ')': 3357 // case ',': 3358 // case '.': 3359 // case '=': 3360 // break; 3361 // 3362 // default: 3363 // bEncodedWord = false; 3364 // break; 3365 // } 3366 3367 sal_Unicode * pUnicodeBuffer = 0; 3368 sal_Size nUnicodeSize = 0; 3369 if (bEncodedWord) 3370 { 3371 pUnicodeBuffer 3372 = convertToUnicode(sText.GetBuffer(), 3373 sText.GetBuffer() + sText.Len(), 3374 eCharsetEncoding, nUnicodeSize); 3375 if (pUnicodeBuffer == 0) 3376 bEncodedWord = false; 3377 } 3378 3379 if (bEncodedWord) 3380 { 3381 appendISO88591(sDecoded, pCopyBegin, pWSPBegin); 3382 if (eType == HEADER_FIELD_TEXT) 3383 sDecoded.Append( 3384 pUnicodeBuffer, 3385 static_cast< xub_StrLen >(nUnicodeSize)); 3386 else if (nCommentLevel == 0) 3387 { 3388 sEncodedText.Append( 3389 pUnicodeBuffer, 3390 static_cast< xub_StrLen >(nUnicodeSize)); 3391 if (!bQuotedEncodedText) 3392 { 3393 const sal_Unicode * pTextPtr = pUnicodeBuffer; 3394 const sal_Unicode * pTextEnd = pTextPtr 3395 + nUnicodeSize; 3396 for (; pTextPtr != pTextEnd; ++pTextPtr) 3397 if (!isEncodedWordTokenChar(*pTextPtr)) 3398 { 3399 bQuotedEncodedText = true; 3400 break; 3401 } 3402 } 3403 } 3404 else 3405 { 3406 const sal_Unicode * pTextPtr = pUnicodeBuffer; 3407 const sal_Unicode * pTextEnd = pTextPtr + nUnicodeSize; 3408 for (; pTextPtr != pTextEnd; ++pTextPtr) 3409 { 3410 switch (*pTextPtr) 3411 { 3412 case '(': 3413 case ')': 3414 case '\\': 3415 case '\x0D': 3416 case '=': 3417 sDecoded += '\\'; 3418 break; 3419 } 3420 sDecoded += *pTextPtr; 3421 } 3422 } 3423 delete[] pUnicodeBuffer; 3424 p = q; 3425 pCopyBegin = p; 3426 3427 pWSPBegin = p; 3428 while (p != pEnd && isWhiteSpace(*p)) 3429 ++p; 3430 /* bStartEncodedWord = p != pWSPBegin; */ 3431 continue; 3432 } 3433 } 3434 3435 if (sEncodedText.Len() != 0) 3436 { 3437 if (bQuotedEncodedText) 3438 { 3439 sDecoded += '"'; 3440 const sal_Unicode * pTextPtr = sEncodedText.GetBuffer(); 3441 const sal_Unicode * pTextEnd = pTextPtr + sEncodedText.Len(); 3442 for (;pTextPtr != pTextEnd; ++pTextPtr) 3443 { 3444 switch (*pTextPtr) 3445 { 3446 case '"': 3447 case '\\': 3448 case '\x0D': 3449 sDecoded += '\\'; 3450 break; 3451 } 3452 sDecoded += *pTextPtr; 3453 } 3454 sDecoded += '"'; 3455 } 3456 else 3457 sDecoded += sEncodedText; 3458 sEncodedText.Erase(); 3459 bQuotedEncodedText = false; 3460 } 3461 3462 if (p == pEnd) 3463 break; 3464 3465 switch (*p++) 3466 { 3467 // case '\t': 3468 // case ' ': 3469 // case ',': 3470 // case '.': 3471 // case '=': 3472 // bStartEncodedWord = true; 3473 // break; 3474 3475 case '"': 3476 if (eType != HEADER_FIELD_TEXT && nCommentLevel == 0) 3477 { 3478 const sal_Char * pQuotedStringEnd 3479 = skipQuotedString(p - 1, pEnd); 3480 p = pQuotedStringEnd == p - 1 ? pEnd : pQuotedStringEnd; 3481 } 3482 /* bStartEncodedWord = true; */ 3483 break; 3484 3485 case '(': 3486 if (eType != HEADER_FIELD_TEXT) 3487 ++nCommentLevel; 3488 /* bStartEncodedWord = true; */ 3489 break; 3490 3491 case ')': 3492 if (nCommentLevel > 0) 3493 --nCommentLevel; 3494 /* bStartEncodedWord = false; */ 3495 break; 3496 3497 default: 3498 { 3499 const sal_Char * pUTF8Begin = p - 1; 3500 const sal_Char * pUTF8End = pUTF8Begin; 3501 sal_uInt32 nCharacter; 3502 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4, 3503 nCharacter)) 3504 { 3505 appendISO88591(sDecoded, pCopyBegin, p - 1); 3506 sal_Unicode aUTF16Buf[2]; 3507 xub_StrLen nUTF16Len = static_cast< xub_StrLen >( 3508 putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf); 3509 sDecoded.Append(aUTF16Buf, nUTF16Len); 3510 p = pUTF8End; 3511 pCopyBegin = p; 3512 } 3513 /* bStartEncodedWord = false; */ 3514 break; 3515 } 3516 } 3517 pWSPBegin = p; 3518 } 3519 3520 appendISO88591(sDecoded, pCopyBegin, pEnd); 3521 return sDecoded; 3522 } 3523 3524 //============================================================================ 3525 // 3526 // INetMIMEOutputSink 3527 // 3528 //============================================================================ 3529 3530 // virtual 3531 sal_Size INetMIMEOutputSink::writeSequence(const sal_Char * pSequence) 3532 { 3533 sal_Size nLength = rtl_str_getLength(pSequence); 3534 writeSequence(pSequence, pSequence + nLength); 3535 return nLength; 3536 } 3537 3538 //============================================================================ 3539 // virtual 3540 void INetMIMEOutputSink::writeSequence(const sal_uInt32 * pBegin, 3541 const sal_uInt32 * pEnd) 3542 { 3543 DBG_ASSERT(pBegin && pBegin <= pEnd, 3544 "INetMIMEOutputSink::writeSequence(): Bad sequence"); 3545 3546 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin]; 3547 sal_Char * pBufferEnd = pBufferBegin; 3548 while (pBegin != pEnd) 3549 { 3550 DBG_ASSERT(*pBegin < 256, 3551 "INetMIMEOutputSink::writeSequence(): Bad octet"); 3552 *pBufferEnd++ = sal_Char(*pBegin++); 3553 } 3554 writeSequence(pBufferBegin, pBufferEnd); 3555 delete[] pBufferBegin; 3556 } 3557 3558 //============================================================================ 3559 // virtual 3560 void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin, 3561 const sal_Unicode * pEnd) 3562 { 3563 DBG_ASSERT(pBegin && pBegin <= pEnd, 3564 "INetMIMEOutputSink::writeSequence(): Bad sequence"); 3565 3566 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin]; 3567 sal_Char * pBufferEnd = pBufferBegin; 3568 while (pBegin != pEnd) 3569 { 3570 DBG_ASSERT(*pBegin < 256, 3571 "INetMIMEOutputSink::writeSequence(): Bad octet"); 3572 *pBufferEnd++ = sal_Char(*pBegin++); 3573 } 3574 writeSequence(pBufferBegin, pBufferEnd); 3575 delete[] pBufferBegin; 3576 } 3577 3578 //============================================================================ 3579 // virtual 3580 ErrCode INetMIMEOutputSink::getError() const 3581 { 3582 return ERRCODE_NONE; 3583 } 3584 3585 //============================================================================ 3586 void INetMIMEOutputSink::writeLineEnd() 3587 { 3588 static const sal_Char aCRLF[2] = { 0x0D, 0x0A }; 3589 writeSequence(aCRLF, aCRLF + 2); 3590 m_nColumn = 0; 3591 } 3592 3593 //============================================================================ 3594 // 3595 // INetMIMEStringOutputSink 3596 // 3597 //============================================================================ 3598 3599 // virtual 3600 void INetMIMEStringOutputSink::writeSequence(const sal_Char * pBegin, 3601 const sal_Char * pEnd) 3602 { 3603 DBG_ASSERT(pBegin && pBegin <= pEnd, 3604 "INetMIMEStringOutputSink::writeSequence(): Bad sequence"); 3605 3606 m_bOverflow = m_bOverflow 3607 || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len(); 3608 if (!m_bOverflow) 3609 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin)); 3610 } 3611 3612 //============================================================================ 3613 // virtual 3614 ErrCode INetMIMEStringOutputSink::getError() const 3615 { 3616 return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE; 3617 } 3618 3619 //============================================================================ 3620 // 3621 // INetMIMEUnicodeOutputSink 3622 // 3623 //============================================================================ 3624 3625 // virtual 3626 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Char * pBegin, 3627 const sal_Char * pEnd) 3628 { 3629 DBG_ASSERT(pBegin && pBegin <= pEnd, 3630 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence"); 3631 3632 sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin]; 3633 sal_Unicode * pBufferEnd = pBufferBegin; 3634 while (pBegin != pEnd) 3635 *pBufferEnd++ = sal_uChar(*pBegin++); 3636 writeSequence(pBufferBegin, pBufferEnd); 3637 delete[] pBufferBegin; 3638 } 3639 3640 //============================================================================ 3641 // virtual 3642 void INetMIMEUnicodeOutputSink::writeSequence(const sal_uInt32 * pBegin, 3643 const sal_uInt32 * pEnd) 3644 { 3645 DBG_ASSERT(pBegin && pBegin <= pEnd, 3646 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence"); 3647 3648 sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin]; 3649 sal_Unicode * pBufferEnd = pBufferBegin; 3650 while (pBegin != pEnd) 3651 { 3652 DBG_ASSERT(*pBegin < 256, 3653 "INetMIMEOutputSink::writeSequence(): Bad octet"); 3654 *pBufferEnd++ = sal_Unicode(*pBegin++); 3655 } 3656 writeSequence(pBufferBegin, pBufferEnd); 3657 delete[] pBufferBegin; 3658 } 3659 3660 //============================================================================ 3661 // virtual 3662 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Unicode * pBegin, 3663 const sal_Unicode * pEnd) 3664 { 3665 DBG_ASSERT(pBegin && pBegin <= pEnd, 3666 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence"); 3667 3668 m_bOverflow = m_bOverflow 3669 || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len(); 3670 if (!m_bOverflow) 3671 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin)); 3672 } 3673 3674 //============================================================================ 3675 // virtual 3676 ErrCode INetMIMEUnicodeOutputSink::getError() const 3677 { 3678 return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE; 3679 } 3680 3681 //============================================================================ 3682 // 3683 // INetMIMEEncodedWordOutputSink 3684 // 3685 //============================================================================ 3686 3687 static const sal_Char aEscape[128] 3688 = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x00 3689 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x01 3690 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x02 3691 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x03 3692 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x04 3693 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x05 3694 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x06 3695 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x07 3696 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x08 3697 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x09 3698 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0A 3699 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0B 3700 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0C 3701 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0D 3702 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0E 3703 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0F 3704 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x10 3705 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x11 3706 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x12 3707 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x13 3708 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x14 3709 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x15 3710 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x16 3711 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x17 3712 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x18 3713 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x19 3714 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1A 3715 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1B 3716 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1C 3717 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1D 3718 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1E 3719 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1F 3720 0, // ' ' 3721 0, // '!' 3722 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '"' 3723 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '#' 3724 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '$' 3725 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '%' 3726 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '&' 3727 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ''' 3728 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '(' 3729 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ')' 3730 0, // '*' 3731 0, // '+' 3732 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ',' 3733 0, // '-' 3734 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '.' 3735 0, // '/' 3736 0, // '0' 3737 0, // '1' 3738 0, // '2' 3739 0, // '3' 3740 0, // '4' 3741 0, // '5' 3742 0, // '6' 3743 0, // '7' 3744 0, // '8' 3745 0, // '9' 3746 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ':' 3747 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ';' 3748 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '<' 3749 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '=' 3750 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '>' 3751 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '?' 3752 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '@' 3753 0, // 'A' 3754 0, // 'B' 3755 0, // 'C' 3756 0, // 'D' 3757 0, // 'E' 3758 0, // 'F' 3759 0, // 'G' 3760 0, // 'H' 3761 0, // 'I' 3762 0, // 'J' 3763 0, // 'K' 3764 0, // 'L' 3765 0, // 'M' 3766 0, // 'N' 3767 0, // 'O' 3768 0, // 'P' 3769 0, // 'Q' 3770 0, // 'R' 3771 0, // 'S' 3772 0, // 'T' 3773 0, // 'U' 3774 0, // 'V' 3775 0, // 'W' 3776 0, // 'X' 3777 0, // 'Y' 3778 0, // 'Z' 3779 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '[' 3780 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '\' 3781 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ']' 3782 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '^' 3783 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '_' 3784 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '`' 3785 0, // 'a' 3786 0, // 'b' 3787 0, // 'c' 3788 0, // 'd' 3789 0, // 'e' 3790 0, // 'f' 3791 0, // 'g' 3792 0, // 'h' 3793 0, // 'i' 3794 0, // 'j' 3795 0, // 'k' 3796 0, // 'l' 3797 0, // 'm' 3798 0, // 'n' 3799 0, // 'o' 3800 0, // 'p' 3801 0, // 'q' 3802 0, // 'r' 3803 0, // 's' 3804 0, // 't' 3805 0, // 'u' 3806 0, // 'v' 3807 0, // 'w' 3808 0, // 'x' 3809 0, // 'y' 3810 0, // 'z' 3811 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '{' 3812 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '|' 3813 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '}' 3814 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '~' 3815 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE }; // DEL 3816 3817 inline bool 3818 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar) const 3819 { 3820 return !INetMIME::isUSASCII(nChar) || aEscape[nChar] & m_eContext; 3821 } 3822 3823 //============================================================================ 3824 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer) 3825 { 3826 if (m_eInitialSpace == SPACE_ALWAYS && m_nExtraSpaces == 0) 3827 m_nExtraSpaces = 1; 3828 3829 if (m_eEncodedWordState == STATE_SECOND_EQUALS) 3830 { 3831 // If the text is already an encoded word, copy it verbatim: 3832 sal_uInt32 nSize = m_pBufferEnd - m_pBuffer; 3833 switch (m_ePrevCoding) 3834 { 3835 case CODING_QUOTED: 3836 m_rSink << '"'; 3837 case CODING_NONE: 3838 if (m_eInitialSpace == SPACE_ENCODED && m_nExtraSpaces == 0) 3839 m_nExtraSpaces = 1; 3840 for (; m_nExtraSpaces > 1; --m_nExtraSpaces) 3841 { 3842 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit()) 3843 m_rSink << INetMIMEOutputSink::endl; 3844 m_rSink << ' '; 3845 } 3846 if (m_nExtraSpaces == 1) 3847 { 3848 if (m_rSink.getColumn() + nSize 3849 >= m_rSink.getLineLengthLimit()) 3850 m_rSink << INetMIMEOutputSink::endl; 3851 m_rSink << ' '; 3852 } 3853 break; 3854 3855 case CODING_ENCODED: 3856 { 3857 const sal_Char * pCharsetName 3858 = INetMIME::getCharsetName(m_ePrevMIMEEncoding); 3859 while (m_nExtraSpaces-- > 0) 3860 { 3861 if (m_rSink.getColumn() 3862 > m_rSink.getLineLengthLimit() - 3) 3863 m_rSink << "?=" << INetMIMEOutputSink::endl << " =?" 3864 << pCharsetName << "?Q?"; 3865 m_rSink << '_'; 3866 } 3867 m_rSink << "?="; 3868 } 3869 case CODING_ENCODED_TERMINATED: 3870 if (m_rSink.getColumn() + nSize 3871 > m_rSink.getLineLengthLimit() - 1) 3872 m_rSink << INetMIMEOutputSink::endl; 3873 m_rSink << ' '; 3874 break; 3875 } 3876 m_rSink.write(m_pBuffer, m_pBufferEnd); 3877 m_eCoding = CODING_ENCODED_TERMINATED; 3878 } 3879 else 3880 { 3881 // If the text itself is too long to fit into a single line, make it 3882 // into multiple encoded words: 3883 switch (m_eCoding) 3884 { 3885 case CODING_NONE: 3886 if (m_nExtraSpaces == 0) 3887 { 3888 DBG_ASSERT(m_ePrevCoding == CODING_NONE 3889 || m_pBuffer == m_pBufferEnd, 3890 "INetMIMEEncodedWordOutputSink::finish():" 3891 " Bad state"); 3892 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer) 3893 > m_rSink.getLineLengthLimit()) 3894 m_eCoding = CODING_ENCODED; 3895 } 3896 else 3897 { 3898 OSL_ASSERT(m_pBufferEnd >= m_pBuffer); 3899 if (static_cast< std::size_t >(m_pBufferEnd - m_pBuffer) 3900 > m_rSink.getLineLengthLimit() - 1) 3901 { 3902 m_eCoding = CODING_ENCODED; 3903 } 3904 } 3905 break; 3906 3907 case CODING_QUOTED: 3908 if (m_nExtraSpaces == 0) 3909 { 3910 DBG_ASSERT(m_ePrevCoding == CODING_NONE, 3911 "INetMIMEEncodedWordOutputSink::finish():" 3912 " Bad state"); 3913 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer) 3914 + m_nQuotedEscaped 3915 > m_rSink.getLineLengthLimit() - 2) 3916 m_eCoding = CODING_ENCODED; 3917 } 3918 else if ((m_pBufferEnd - m_pBuffer) + m_nQuotedEscaped 3919 > m_rSink.getLineLengthLimit() - 3) 3920 m_eCoding = CODING_ENCODED; 3921 break; 3922 3923 default: 3924 break; 3925 } 3926 3927 switch (m_eCoding) 3928 { 3929 case CODING_NONE: 3930 switch (m_ePrevCoding) 3931 { 3932 case CODING_QUOTED: 3933 if (m_rSink.getColumn() + m_nExtraSpaces 3934 + (m_pBufferEnd - m_pBuffer) 3935 < m_rSink.getLineLengthLimit()) 3936 m_eCoding = CODING_QUOTED; 3937 else 3938 m_rSink << '"'; 3939 break; 3940 3941 case CODING_ENCODED: 3942 m_rSink << "?="; 3943 break; 3944 3945 default: 3946 break; 3947 } 3948 for (; m_nExtraSpaces > 1; --m_nExtraSpaces) 3949 { 3950 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit()) 3951 m_rSink << INetMIMEOutputSink::endl; 3952 m_rSink << ' '; 3953 } 3954 if (m_nExtraSpaces == 1) 3955 { 3956 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer) 3957 >= m_rSink.getLineLengthLimit()) 3958 m_rSink << INetMIMEOutputSink::endl; 3959 m_rSink << ' '; 3960 } 3961 m_rSink.write(m_pBuffer, m_pBufferEnd); 3962 if (m_eCoding == CODING_QUOTED && bWriteTrailer) 3963 { 3964 m_rSink << '"'; 3965 m_eCoding = CODING_NONE; 3966 } 3967 break; 3968 3969 case CODING_QUOTED: 3970 { 3971 bool bInsertLeadingQuote = true; 3972 sal_uInt32 nSize = (m_pBufferEnd - m_pBuffer) 3973 + m_nQuotedEscaped + 2; 3974 switch (m_ePrevCoding) 3975 { 3976 case CODING_QUOTED: 3977 if (m_rSink.getColumn() + m_nExtraSpaces + nSize - 1 3978 < m_rSink.getLineLengthLimit()) 3979 { 3980 bInsertLeadingQuote = false; 3981 --nSize; 3982 } 3983 else 3984 m_rSink << '"'; 3985 break; 3986 3987 case CODING_ENCODED: 3988 m_rSink << "?="; 3989 break; 3990 3991 default: 3992 break; 3993 } 3994 for (; m_nExtraSpaces > 1; --m_nExtraSpaces) 3995 { 3996 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit()) 3997 m_rSink << INetMIMEOutputSink::endl; 3998 m_rSink << ' '; 3999 } 4000 if (m_nExtraSpaces == 1) 4001 { 4002 if (m_rSink.getColumn() + nSize 4003 >= m_rSink.getLineLengthLimit()) 4004 m_rSink << INetMIMEOutputSink::endl; 4005 m_rSink << ' '; 4006 } 4007 if (bInsertLeadingQuote) 4008 m_rSink << '"'; 4009 for (const sal_Unicode * p = m_pBuffer; p != m_pBufferEnd; 4010 ++p) 4011 { 4012 if (INetMIME::needsQuotedStringEscape(*p)) 4013 m_rSink << '\\'; 4014 m_rSink << sal_Char(*p); 4015 } 4016 if (bWriteTrailer) 4017 { 4018 m_rSink << '"'; 4019 m_eCoding = CODING_NONE; 4020 } 4021 break; 4022 } 4023 4024 case CODING_ENCODED: 4025 { 4026 rtl_TextEncoding eCharsetEncoding 4027 = m_pEncodingList-> 4028 getPreferredEncoding(RTL_TEXTENCODING_UTF8); 4029 rtl_TextEncoding eMIMEEncoding 4030 = INetMIME::translateToMIME(eCharsetEncoding); 4031 4032 // The non UTF-8 code will only work for stateless single byte 4033 // character encodings (see also below): 4034 sal_Char * pTargetBuffer = NULL; 4035 sal_Size nTargetSize = 0; 4036 sal_uInt32 nSize; 4037 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8) 4038 { 4039 nSize = 0; 4040 for (sal_Unicode const * p = m_pBuffer; 4041 p != m_pBufferEnd;) 4042 { 4043 sal_uInt32 nUTF32 4044 = INetMIME::getUTF32Character(p, m_pBufferEnd); 4045 nSize += needsEncodedWordEscape(nUTF32) ? 4046 3 * INetMIME::getUTF8OctetCount(nUTF32) : 4047 1; 4048 // only US-ASCII characters (that are converted to 4049 // a single byte by UTF-8) need no encoded word 4050 // escapes... 4051 } 4052 } 4053 else 4054 { 4055 rtl_UnicodeToTextConverter hConverter 4056 = rtl_createUnicodeToTextConverter(eCharsetEncoding); 4057 rtl_UnicodeToTextContext hContext 4058 = rtl_createUnicodeToTextContext(hConverter); 4059 for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;; 4060 nBufferSize += nBufferSize / 3 + 1) 4061 { 4062 pTargetBuffer = new sal_Char[nBufferSize]; 4063 sal_uInt32 nInfo; 4064 sal_Size nSrcCvtBytes; 4065 nTargetSize 4066 = rtl_convertUnicodeToText( 4067 hConverter, hContext, m_pBuffer, 4068 m_pBufferEnd - m_pBuffer, pTargetBuffer, 4069 nBufferSize, 4070 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE 4071 | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE, 4072 &nInfo, &nSrcCvtBytes); 4073 if (!(nInfo 4074 & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 4075 break; 4076 delete[] pTargetBuffer; 4077 pTargetBuffer = NULL; 4078 rtl_resetUnicodeToTextContext(hConverter, hContext); 4079 } 4080 rtl_destroyUnicodeToTextContext(hConverter, hContext); 4081 rtl_destroyUnicodeToTextConverter(hConverter); 4082 4083 nSize = nTargetSize; 4084 for (sal_Size k = 0; k < nTargetSize; ++k) 4085 if (needsEncodedWordEscape(sal_uChar( 4086 pTargetBuffer[k]))) 4087 nSize += 2; 4088 } 4089 4090 const sal_Char * pCharsetName 4091 = INetMIME::getCharsetName(eMIMEEncoding); 4092 sal_uInt32 nWrapperSize = rtl_str_getLength(pCharsetName) + 7; 4093 // '=?', '?Q?', '?=' 4094 4095 switch (m_ePrevCoding) 4096 { 4097 case CODING_QUOTED: 4098 m_rSink << '"'; 4099 case CODING_NONE: 4100 if (m_eInitialSpace == SPACE_ENCODED 4101 && m_nExtraSpaces == 0) 4102 m_nExtraSpaces = 1; 4103 nSize += nWrapperSize; 4104 for (; m_nExtraSpaces > 1; --m_nExtraSpaces) 4105 { 4106 if (m_rSink.getColumn() 4107 >= m_rSink.getLineLengthLimit()) 4108 m_rSink << INetMIMEOutputSink::endl; 4109 m_rSink << ' '; 4110 } 4111 if (m_nExtraSpaces == 1) 4112 { 4113 if (m_rSink.getColumn() + nSize 4114 >= m_rSink.getLineLengthLimit()) 4115 m_rSink << INetMIMEOutputSink::endl; 4116 m_rSink << ' '; 4117 } 4118 m_rSink << "=?" << pCharsetName << "?Q?"; 4119 break; 4120 4121 case CODING_ENCODED: 4122 if (m_ePrevMIMEEncoding != eMIMEEncoding 4123 || m_rSink.getColumn() + m_nExtraSpaces + nSize 4124 > m_rSink.getLineLengthLimit() - 2) 4125 { 4126 m_rSink << "?="; 4127 if (m_rSink.getColumn() + nWrapperSize 4128 + m_nExtraSpaces + nSize 4129 > m_rSink.getLineLengthLimit() - 1) 4130 m_rSink << INetMIMEOutputSink::endl; 4131 m_rSink << " =?" << pCharsetName << "?Q?"; 4132 } 4133 while (m_nExtraSpaces-- > 0) 4134 { 4135 if (m_rSink.getColumn() 4136 > m_rSink.getLineLengthLimit() - 3) 4137 m_rSink << "?=" << INetMIMEOutputSink::endl 4138 << " =?" << pCharsetName << "?Q?"; 4139 m_rSink << '_'; 4140 } 4141 break; 4142 4143 case CODING_ENCODED_TERMINATED: 4144 if (m_rSink.getColumn() + nWrapperSize 4145 + m_nExtraSpaces + nSize 4146 > m_rSink.getLineLengthLimit() - 1) 4147 m_rSink << INetMIMEOutputSink::endl; 4148 m_rSink << " =?" << pCharsetName << "?Q?"; 4149 while (m_nExtraSpaces-- > 0) 4150 { 4151 if (m_rSink.getColumn() 4152 > m_rSink.getLineLengthLimit() - 3) 4153 m_rSink << "?=" << INetMIMEOutputSink::endl 4154 << " =?" << pCharsetName << "?Q?"; 4155 m_rSink << '_'; 4156 } 4157 break; 4158 } 4159 4160 // The non UTF-8 code will only work for stateless single byte 4161 // character encodings (see also above): 4162 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8) 4163 { 4164 bool bInitial = true; 4165 for (sal_Unicode const * p = m_pBuffer; 4166 p != m_pBufferEnd;) 4167 { 4168 sal_uInt32 nUTF32 4169 = INetMIME::getUTF32Character(p, m_pBufferEnd); 4170 bool bEscape = needsEncodedWordEscape(nUTF32); 4171 sal_uInt32 nWidth 4172 = bEscape ? 4173 3 * INetMIME::getUTF8OctetCount(nUTF32) : 1; 4174 // only US-ASCII characters (that are converted to 4175 // a single byte by UTF-8) need no encoded word 4176 // escapes... 4177 if (!bInitial 4178 && m_rSink.getColumn() + nWidth + 2 4179 > m_rSink.getLineLengthLimit()) 4180 m_rSink << "?=" << INetMIMEOutputSink::endl 4181 << " =?" << pCharsetName << "?Q?"; 4182 if (bEscape) 4183 { 4184 DBG_ASSERT( 4185 nUTF32 < 0x10FFFF, 4186 "INetMIMEEncodedWordOutputSink::finish():" 4187 " Bad char"); 4188 if (nUTF32 < 0x80) 4189 INetMIME::writeEscapeSequence(m_rSink, 4190 nUTF32); 4191 else if (nUTF32 < 0x800) 4192 { 4193 INetMIME::writeEscapeSequence(m_rSink, 4194 (nUTF32 >> 6) 4195 | 0xC0); 4196 INetMIME::writeEscapeSequence(m_rSink, 4197 (nUTF32 & 0x3F) 4198 | 0x80); 4199 } 4200 else if (nUTF32 < 0x10000) 4201 { 4202 INetMIME::writeEscapeSequence(m_rSink, 4203 (nUTF32 >> 12) 4204 | 0xE0); 4205 INetMIME::writeEscapeSequence(m_rSink, 4206 ((nUTF32 >> 6) 4207 & 0x3F) 4208 | 0x80); 4209 INetMIME::writeEscapeSequence(m_rSink, 4210 (nUTF32 & 0x3F) 4211 | 0x80); 4212 } 4213 else 4214 { 4215 INetMIME::writeEscapeSequence(m_rSink, 4216 (nUTF32 >> 18) 4217 | 0xF0); 4218 INetMIME::writeEscapeSequence(m_rSink, 4219 ((nUTF32 >> 12) 4220 & 0x3F) 4221 | 0x80); 4222 INetMIME::writeEscapeSequence(m_rSink, 4223 ((nUTF32 >> 6) 4224 & 0x3F) 4225 | 0x80); 4226 INetMIME::writeEscapeSequence(m_rSink, 4227 (nUTF32 & 0x3F) 4228 | 0x80); 4229 } 4230 } 4231 else 4232 m_rSink << sal_Char(nUTF32); 4233 bInitial = false; 4234 } 4235 } 4236 else 4237 { 4238 for (sal_Size k = 0; k < nTargetSize; ++k) 4239 { 4240 sal_uInt32 nUCS4 = sal_uChar(pTargetBuffer[k]); 4241 bool bEscape = needsEncodedWordEscape(nUCS4); 4242 if (k > 0 4243 && m_rSink.getColumn() + (bEscape ? 5 : 3) 4244 > m_rSink.getLineLengthLimit()) 4245 m_rSink << "?=" << INetMIMEOutputSink::endl 4246 << " =?" << pCharsetName << "?Q?"; 4247 if (bEscape) 4248 INetMIME::writeEscapeSequence(m_rSink, nUCS4); 4249 else 4250 m_rSink << sal_Char(nUCS4); 4251 } 4252 delete[] pTargetBuffer; 4253 } 4254 4255 if (bWriteTrailer) 4256 { 4257 m_rSink << "?="; 4258 m_eCoding = CODING_ENCODED_TERMINATED; 4259 } 4260 4261 m_ePrevMIMEEncoding = eMIMEEncoding; 4262 break; 4263 } 4264 4265 default: 4266 OSL_ASSERT(false); 4267 break; 4268 } 4269 } 4270 4271 m_eInitialSpace = SPACE_NO; 4272 m_nExtraSpaces = 0; 4273 m_pEncodingList->reset(); 4274 m_pBufferEnd = m_pBuffer; 4275 m_ePrevCoding = m_eCoding; 4276 m_eCoding = CODING_NONE; 4277 m_nQuotedEscaped = 0; 4278 m_eEncodedWordState = STATE_INITIAL; 4279 } 4280 4281 //============================================================================ 4282 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink() 4283 { 4284 rtl_freeMemory(m_pBuffer); 4285 delete m_pEncodingList; 4286 } 4287 4288 //============================================================================ 4289 INetMIMEEncodedWordOutputSink & 4290 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar) 4291 { 4292 if (nChar == ' ') 4293 { 4294 if (m_pBufferEnd != m_pBuffer) 4295 finish(false); 4296 ++m_nExtraSpaces; 4297 } 4298 else 4299 { 4300 // Check for an already encoded word: 4301 switch (m_eEncodedWordState) 4302 { 4303 case STATE_INITIAL: 4304 if (nChar == '=') 4305 m_eEncodedWordState = STATE_FIRST_EQUALS; 4306 else 4307 m_eEncodedWordState = STATE_BAD; 4308 break; 4309 4310 case STATE_FIRST_EQUALS: 4311 if (nChar == '?') 4312 m_eEncodedWordState = STATE_FIRST_EQUALS; 4313 else 4314 m_eEncodedWordState = STATE_BAD; 4315 break; 4316 4317 case STATE_FIRST_QUESTION: 4318 if (INetMIME::isEncodedWordTokenChar(nChar)) 4319 m_eEncodedWordState = STATE_CHARSET; 4320 else 4321 m_eEncodedWordState = STATE_BAD; 4322 break; 4323 4324 case STATE_CHARSET: 4325 if (nChar == '?') 4326 m_eEncodedWordState = STATE_SECOND_QUESTION; 4327 else if (!INetMIME::isEncodedWordTokenChar(nChar)) 4328 m_eEncodedWordState = STATE_BAD; 4329 break; 4330 4331 case STATE_SECOND_QUESTION: 4332 if (nChar == 'B' || nChar == 'Q' 4333 || nChar == 'b' || nChar == 'q') 4334 m_eEncodedWordState = STATE_ENCODING; 4335 else 4336 m_eEncodedWordState = STATE_BAD; 4337 break; 4338 4339 case STATE_ENCODING: 4340 if (nChar == '?') 4341 m_eEncodedWordState = STATE_THIRD_QUESTION; 4342 else 4343 m_eEncodedWordState = STATE_BAD; 4344 break; 4345 4346 case STATE_THIRD_QUESTION: 4347 if (INetMIME::isVisible(nChar) && nChar != '?') 4348 m_eEncodedWordState = STATE_ENCODED_TEXT; 4349 else 4350 m_eEncodedWordState = STATE_BAD; 4351 break; 4352 4353 case STATE_ENCODED_TEXT: 4354 if (nChar == '?') 4355 m_eEncodedWordState = STATE_FOURTH_QUESTION; 4356 else if (!INetMIME::isVisible(nChar)) 4357 m_eEncodedWordState = STATE_BAD; 4358 break; 4359 4360 case STATE_FOURTH_QUESTION: 4361 if (nChar == '=') 4362 m_eEncodedWordState = STATE_SECOND_EQUALS; 4363 else 4364 m_eEncodedWordState = STATE_BAD; 4365 break; 4366 4367 case STATE_SECOND_EQUALS: 4368 m_eEncodedWordState = STATE_BAD; 4369 break; 4370 4371 case STATE_BAD: 4372 break; 4373 } 4374 4375 // Update encoding: 4376 m_pEncodingList->includes(nChar); 4377 4378 // Update coding: 4379 enum { TENQ = 1, // CONTEXT_TEXT, CODING_ENCODED 4380 CENQ = 2, // CONTEXT_COMMENT, CODING_ENCODED 4381 PQTD = 4, // CONTEXT_PHRASE, CODING_QUOTED 4382 PENQ = 8 }; // CONTEXT_PHRASE, CODING_ENCODED 4383 static const sal_Char aMinimal[128] 4384 = { TENQ | CENQ | PENQ, // 0x00 4385 TENQ | CENQ | PENQ, // 0x01 4386 TENQ | CENQ | PENQ, // 0x02 4387 TENQ | CENQ | PENQ, // 0x03 4388 TENQ | CENQ | PENQ, // 0x04 4389 TENQ | CENQ | PENQ, // 0x05 4390 TENQ | CENQ | PENQ, // 0x06 4391 TENQ | CENQ | PENQ, // 0x07 4392 TENQ | CENQ | PENQ, // 0x08 4393 TENQ | CENQ | PENQ, // 0x09 4394 TENQ | CENQ | PENQ, // 0x0A 4395 TENQ | CENQ | PENQ, // 0x0B 4396 TENQ | CENQ | PENQ, // 0x0C 4397 TENQ | CENQ | PENQ, // 0x0D 4398 TENQ | CENQ | PENQ, // 0x0E 4399 TENQ | CENQ | PENQ, // 0x0F 4400 TENQ | CENQ | PENQ, // 0x10 4401 TENQ | CENQ | PENQ, // 0x11 4402 TENQ | CENQ | PENQ, // 0x12 4403 TENQ | CENQ | PENQ, // 0x13 4404 TENQ | CENQ | PENQ, // 0x14 4405 TENQ | CENQ | PENQ, // 0x15 4406 TENQ | CENQ | PENQ, // 0x16 4407 TENQ | CENQ | PENQ, // 0x17 4408 TENQ | CENQ | PENQ, // 0x18 4409 TENQ | CENQ | PENQ, // 0x19 4410 TENQ | CENQ | PENQ, // 0x1A 4411 TENQ | CENQ | PENQ, // 0x1B 4412 TENQ | CENQ | PENQ, // 0x1C 4413 TENQ | CENQ | PENQ, // 0x1D 4414 TENQ | CENQ | PENQ, // 0x1E 4415 TENQ | CENQ | PENQ, // 0x1F 4416 0, // ' ' 4417 0, // '!' 4418 PQTD , // '"' 4419 0, // '#' 4420 0, // '$' 4421 0, // '%' 4422 0, // '&' 4423 0, // ''' 4424 CENQ | PQTD , // '(' 4425 CENQ | PQTD , // ')' 4426 0, // '*' 4427 0, // '+' 4428 PQTD , // ',' 4429 0, // '-' 4430 PQTD , // '.' 4431 0, // '/' 4432 0, // '0' 4433 0, // '1' 4434 0, // '2' 4435 0, // '3' 4436 0, // '4' 4437 0, // '5' 4438 0, // '6' 4439 0, // '7' 4440 0, // '8' 4441 0, // '9' 4442 PQTD , // ':' 4443 PQTD , // ';' 4444 PQTD , // '<' 4445 0, // '=' 4446 PQTD , // '>' 4447 0, // '?' 4448 PQTD , // '@' 4449 0, // 'A' 4450 0, // 'B' 4451 0, // 'C' 4452 0, // 'D' 4453 0, // 'E' 4454 0, // 'F' 4455 0, // 'G' 4456 0, // 'H' 4457 0, // 'I' 4458 0, // 'J' 4459 0, // 'K' 4460 0, // 'L' 4461 0, // 'M' 4462 0, // 'N' 4463 0, // 'O' 4464 0, // 'P' 4465 0, // 'Q' 4466 0, // 'R' 4467 0, // 'S' 4468 0, // 'T' 4469 0, // 'U' 4470 0, // 'V' 4471 0, // 'W' 4472 0, // 'X' 4473 0, // 'Y' 4474 0, // 'Z' 4475 PQTD , // '[' 4476 CENQ | PQTD , // '\' 4477 PQTD , // ']' 4478 0, // '^' 4479 0, // '_' 4480 0, // '`' 4481 0, // 'a' 4482 0, // 'b' 4483 0, // 'c' 4484 0, // 'd' 4485 0, // 'e' 4486 0, // 'f' 4487 0, // 'g' 4488 0, // 'h' 4489 0, // 'i' 4490 0, // 'j' 4491 0, // 'k' 4492 0, // 'l' 4493 0, // 'm' 4494 0, // 'n' 4495 0, // 'o' 4496 0, // 'p' 4497 0, // 'q' 4498 0, // 'r' 4499 0, // 's' 4500 0, // 't' 4501 0, // 'u' 4502 0, // 'v' 4503 0, // 'w' 4504 0, // 'x' 4505 0, // 'y' 4506 0, // 'z' 4507 0, // '{' 4508 0, // '|' 4509 0, // '}' 4510 0, // '~' 4511 TENQ | CENQ | PENQ }; // DEL 4512 Coding eNewCoding = !INetMIME::isUSASCII(nChar) ? CODING_ENCODED : 4513 m_eContext == CONTEXT_PHRASE ? 4514 Coding(aMinimal[nChar] >> 2) : 4515 aMinimal[nChar] & m_eContext ? CODING_ENCODED : 4516 CODING_NONE; 4517 if (eNewCoding > m_eCoding) 4518 m_eCoding = eNewCoding; 4519 if (m_eCoding == CODING_QUOTED 4520 && INetMIME::needsQuotedStringEscape(nChar)) 4521 ++m_nQuotedEscaped; 4522 4523 // Append to buffer: 4524 if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize) 4525 { 4526 m_pBuffer 4527 = static_cast< sal_Unicode * >( 4528 rtl_reallocateMemory(m_pBuffer, 4529 (m_nBufferSize + BUFFER_SIZE) 4530 * sizeof (sal_Unicode))); 4531 m_pBufferEnd = m_pBuffer + m_nBufferSize; 4532 m_nBufferSize += BUFFER_SIZE; 4533 } 4534 *m_pBufferEnd++ = sal_Unicode(nChar); 4535 } 4536 return *this; 4537 } 4538 4539 //============================================================================ 4540 // 4541 // INetContentTypeParameterList 4542 // 4543 //============================================================================ 4544 4545 void INetContentTypeParameterList::Clear() 4546 { 4547 while (Count() > 0) 4548 delete static_cast< INetContentTypeParameter * >(Remove(Count() - 1)); 4549 } 4550 4551 //============================================================================ 4552 const INetContentTypeParameter * 4553 INetContentTypeParameterList::find(const ByteString & rAttribute) const 4554 { 4555 for (sal_uIntPtr i = 0; i < Count(); ++i) 4556 { 4557 const INetContentTypeParameter * pParameter = GetObject(i); 4558 if (pParameter->m_sAttribute.EqualsIgnoreCaseAscii(rAttribute)) 4559 return pParameter; 4560 } 4561 return 0; 4562 } 4563 4564