1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_tools.hxx"
26
27 #include <cstddef>
28 #include <limits>
29
30 #include "rtl/tencinfo.h"
31 #include <tools/datetime.hxx>
32 #include <tools/inetmime.hxx>
33
34 namespace unnamed_tools_inetmime {} using namespace unnamed_tools_inetmime;
35 // unnamed namespaces don't work well yet
36
37 //============================================================================
38 namespace unnamed_tools_inetmime {
39
40 class Charset
41 {
42 rtl_TextEncoding m_eEncoding;
43 const sal_uInt32 * m_pRanges;
44
45 public:
46 inline Charset(rtl_TextEncoding eTheEncoding,
47 const sal_uInt32 * pTheRanges);
48
getEncoding() const49 rtl_TextEncoding getEncoding() const { return m_eEncoding; }
50
51 bool contains(sal_uInt32 nChar) const;
52 };
53
Charset(rtl_TextEncoding eTheEncoding,const sal_uInt32 * pTheRanges)54 inline Charset::Charset(rtl_TextEncoding eTheEncoding,
55 const sal_uInt32 * pTheRanges):
56 m_eEncoding(eTheEncoding),
57 m_pRanges(pTheRanges)
58 {
59 DBG_ASSERT(m_pRanges, "Charset::Charset(): Bad ranges");
60 }
61
62 //============================================================================
63 void appendISO88591(UniString & rText, sal_Char const * pBegin,
64 sal_Char const * pEnd);
65
66 }
67
68 //============================================================================
69 class INetMIMECharsetList_Impl
70 {
71 struct Node
72 {
73 Charset m_aCharset;
74 bool m_bDisabled;
75 Node * m_pNext;
76
77 inline Node(const Charset & rTheCharset, bool bTheDisabled,
78 Node * pTheNext);
79 };
80
81 Node * m_pFirst;
82
83 public:
INetMIMECharsetList_Impl()84 INetMIMECharsetList_Impl(): m_pFirst(0) {}
85
86 ~INetMIMECharsetList_Impl();
87
prepend(const Charset & rCharset)88 void prepend(const Charset & rCharset)
89 { m_pFirst = new Node(rCharset, false, m_pFirst); }
90
91 void includes(sal_uInt32 nChar);
92
93 rtl_TextEncoding getPreferredEncoding(rtl_TextEncoding eDefault
94 = RTL_TEXTENCODING_DONTKNOW)
95 const;
96
97 void reset();
98 };
99
Node(const Charset & rTheCharset,bool bTheDisabled,Node * pTheNext)100 inline INetMIMECharsetList_Impl::Node::Node(const Charset & rTheCharset,
101 bool bTheDisabled,
102 Node * pTheNext):
103 m_aCharset(rTheCharset),
104 m_bDisabled(bTheDisabled),
105 m_pNext(pTheNext)
106 {}
107
108 //============================================================================
109 namespace unnamed_tools_inetmime {
110
111 struct Parameter
112 {
113 Parameter * m_pNext;
114 ByteString m_aAttribute;
115 ByteString m_aCharset;
116 ByteString m_aLanguage;
117 ByteString m_aValue;
118 sal_uInt32 m_nSection;
119 bool m_bExtended;
120
121 inline Parameter(Parameter * pTheNext, ByteString const & rTheAttribute,
122 ByteString const & rTheCharset,
123 ByteString const & rTheLanguage,
124 ByteString const & rTheValue, sal_uInt32 nTheSection,
125 bool bTheExtended);
126 };
127
Parameter(Parameter * pTheNext,ByteString const & rTheAttribute,ByteString const & rTheCharset,ByteString const & rTheLanguage,ByteString const & rTheValue,sal_uInt32 nTheSection,bool bTheExtended)128 inline Parameter::Parameter(Parameter * pTheNext,
129 ByteString const & rTheAttribute,
130 ByteString const & rTheCharset,
131 ByteString const & rTheLanguage,
132 ByteString const & rTheValue,
133 sal_uInt32 nTheSection, bool bTheExtended):
134 m_pNext(pTheNext),
135 m_aAttribute(rTheAttribute),
136 m_aCharset(rTheCharset),
137 m_aLanguage(rTheLanguage),
138 m_aValue(rTheValue),
139 m_nSection(nTheSection),
140 m_bExtended(bTheExtended)
141 {}
142
143 //============================================================================
144 struct ParameterList
145 {
146 Parameter * m_pList;
147
ParameterListunnamed_tools_inetmime::ParameterList148 ParameterList(): m_pList(0) {}
149
150 inline ~ParameterList();
151
152 Parameter ** find(ByteString const & rAttribute, sal_uInt32 nSection,
153 bool & rPresent);
154 };
155
~ParameterList()156 inline ParameterList::~ParameterList()
157 {
158 while (m_pList)
159 {
160 Parameter * pNext = m_pList->m_pNext;
161 delete m_pList;
162 m_pList = pNext;
163 }
164 }
165
166 //============================================================================
167 bool parseParameters(ParameterList const & rInput,
168 INetContentTypeParameterList * pOutput);
169
170 }
171
172 //============================================================================
173 //
174 // Charset
175 //
176 //============================================================================
177
contains(sal_uInt32 nChar) const178 bool Charset::contains(sal_uInt32 nChar) const
179 {
180 for (const sal_uInt32 * p = m_pRanges;;)
181 {
182 if (nChar < *p++)
183 return false;
184 if (nChar <= *p++)
185 return true;
186 }
187 }
188
189 //============================================================================
190 //
191 // appendISO88591
192 //
193 //============================================================================
194
195 namespace unnamed_tools_inetmime {
196
appendISO88591(UniString & rText,sal_Char const * pBegin,sal_Char const * pEnd)197 void appendISO88591(UniString & rText, sal_Char const * pBegin,
198 sal_Char const * pEnd)
199 {
200 xub_StrLen nLength = static_cast< xub_StrLen >(pEnd - pBegin);
201 sal_Unicode * pBuffer = new sal_Unicode[nLength];
202 for (sal_Unicode * p = pBuffer; pBegin != pEnd;)
203 *p++ = sal_uChar(*pBegin++);
204 rText.Append(pBuffer, nLength);
205 delete[] pBuffer;
206 }
207
208 }
209
210 //============================================================================
211 //
212 // INetMIMECharsetList_Impl
213 //
214 //============================================================================
215
~INetMIMECharsetList_Impl()216 INetMIMECharsetList_Impl::~INetMIMECharsetList_Impl()
217 {
218 while (m_pFirst)
219 {
220 Node * pRemove = m_pFirst;
221 m_pFirst = m_pFirst->m_pNext;
222 delete pRemove;
223 }
224 }
225
226 //============================================================================
includes(sal_uInt32 nChar)227 void INetMIMECharsetList_Impl::includes(sal_uInt32 nChar)
228 {
229 for (Node * p = m_pFirst; p; p = p->m_pNext)
230 if (!(p->m_bDisabled || p->m_aCharset.contains(nChar)))
231 p->m_bDisabled = true;
232 }
233
234 //============================================================================
235 rtl_TextEncoding
getPreferredEncoding(rtl_TextEncoding eDefault) const236 INetMIMECharsetList_Impl::getPreferredEncoding(rtl_TextEncoding eDefault)
237 const
238 {
239 for (Node * p = m_pFirst; p; p = p->m_pNext)
240 if (!p->m_bDisabled)
241 return p->m_aCharset.getEncoding();
242 return eDefault;
243 }
244
245 //============================================================================
reset()246 void INetMIMECharsetList_Impl::reset()
247 {
248 for (Node * p = m_pFirst; p; p = p->m_pNext)
249 p->m_bDisabled = false;
250 }
251
252 //============================================================================
253 //
254 // ParameterList
255 //
256 //============================================================================
257
find(ByteString const & rAttribute,sal_uInt32 nSection,bool & rPresent)258 Parameter ** ParameterList::find(ByteString const & rAttribute,
259 sal_uInt32 nSection, bool & rPresent)
260 {
261 Parameter ** p = &m_pList;
262 for (; *p; p = &(*p)->m_pNext)
263 {
264 StringCompare eCompare = rAttribute.CompareTo((*p)->m_aAttribute);
265 if (eCompare == COMPARE_GREATER)
266 break;
267 else if (eCompare == COMPARE_EQUAL)
268 {
269 if (nSection > (*p)->m_nSection)
270 break;
271 else if (nSection == (*p)->m_nSection)
272 {
273 rPresent = true;
274 return p;
275 }
276 }
277 }
278 rPresent = false;
279 return p;
280 }
281
282 //============================================================================
283 //
284 // parseParameters
285 //
286 //============================================================================
287
288 namespace unnamed_tools_inetmime {
289
parseParameters(ParameterList const & rInput,INetContentTypeParameterList * pOutput)290 bool parseParameters(ParameterList const & rInput,
291 INetContentTypeParameterList * pOutput)
292 {
293 if (pOutput)
294 pOutput->Clear();
295
296 Parameter * pPrev = 0;
297 for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
298 {
299 if (p->m_nSection > 0
300 && (!pPrev
301 || pPrev->m_nSection != p->m_nSection - 1
302 || pPrev->m_aAttribute != p->m_aAttribute))
303 return false;
304 pPrev = p;
305 }
306
307 if (pOutput)
308 for (Parameter * p = rInput.m_pList; p;)
309 {
310 bool bCharset = p->m_aCharset.Len() != 0;
311 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
312 if (bCharset)
313 eEncoding
314 = INetMIME::getCharsetEncoding(p->m_aCharset.GetBuffer(),
315 p->m_aCharset.GetBuffer()
316 + rInput.m_pList->
317 m_aCharset.
318 Len());
319 UniString aValue;
320 bool bBadEncoding = false;
321 Parameter * pNext = p;
322 do
323 {
324 sal_Size nSize;
325 sal_Unicode * pUnicode
326 = INetMIME::convertToUnicode(pNext->m_aValue.GetBuffer(),
327 pNext->m_aValue.GetBuffer()
328 + pNext->m_aValue.Len(),
329 bCharset && p->m_bExtended ?
330 eEncoding :
331 RTL_TEXTENCODING_UTF8,
332 nSize);
333 if (!pUnicode && !(bCharset && p->m_bExtended))
334 pUnicode = INetMIME::convertToUnicode(
335 pNext->m_aValue.GetBuffer(),
336 pNext->m_aValue.GetBuffer()
337 + pNext->m_aValue.Len(),
338 RTL_TEXTENCODING_ISO_8859_1, nSize);
339 if (!pUnicode)
340 {
341 bBadEncoding = true;
342 break;
343 }
344 aValue += UniString(pUnicode, static_cast< xub_StrLen >(nSize));
345 delete[] pUnicode;
346 pNext = pNext->m_pNext;
347 }
348 while (pNext && pNext->m_nSection > 0);
349 if (bBadEncoding)
350 {
351 aValue.Erase();
352 for (pNext = p;;)
353 {
354 if (pNext->m_bExtended)
355 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
356 aValue += sal_Unicode(
357 sal_Unicode(
358 sal_uChar(pNext->m_aValue.GetChar(i)))
359 | 0xF800);
360 else
361 for (xub_StrLen i = 0; i < pNext->m_aValue.Len(); ++i)
362 aValue
363 += sal_Unicode(sal_uChar
364 (pNext->
365 m_aValue.GetChar(i)));
366 pNext = pNext->m_pNext;
367 if (!pNext || pNext->m_nSection == 0)
368 break;
369 };
370 }
371 pOutput->Insert(new INetContentTypeParameter(p->m_aAttribute,
372 p->m_aCharset,
373 p->m_aLanguage,
374 aValue,
375 !bBadEncoding),
376 LIST_APPEND);
377 p = pNext;
378 }
379 return true;
380 }
381
382 }
383
384 //============================================================================
385 //
386 // INetMIME
387 //
388 //============================================================================
389
390 // static
isAtomChar(sal_uInt32 nChar)391 bool INetMIME::isAtomChar(sal_uInt32 nChar)
392 {
393 static const bool aMap[128]
394 = { false, false, false, false, false, false, false, false,
395 false, false, false, false, false, false, false, false,
396 false, false, false, false, false, false, false, false,
397 false, false, false, false, false, false, false, false,
398 false, true, false, true, true, true, true, true, // !"#$%&'
399 false, false, true, true, false, true, false, true, //()*+,-./
400 true, true, true, true, true, true, true, true, //01234567
401 true, true, false, false, false, true, false, true, //89:;<=>?
402 false, true, true, true, true, true, true, true, //@ABCDEFG
403 true, true, true, true, true, true, true, true, //HIJKLMNO
404 true, true, true, true, true, true, true, true, //PQRSTUVW
405 true, true, true, false, false, false, true, true, //XYZ[\]^_
406 true, true, true, true, true, true, true, true, //`abcdefg
407 true, true, true, true, true, true, true, true, //hijklmno
408 true, true, true, true, true, true, true, true, //pqrstuvw
409 true, true, true, true, true, true, true, false //xyz{|}~
410 };
411 return isUSASCII(nChar) && aMap[nChar];
412 }
413
414 //============================================================================
415 // static
isTokenChar(sal_uInt32 nChar)416 bool INetMIME::isTokenChar(sal_uInt32 nChar)
417 {
418 static const sal_Char aMap[128]
419 = { false, false, false, false, false, false, false, false,
420 false, false, false, false, false, false, false, false,
421 false, false, false, false, false, false, false, false,
422 false, false, false, false, false, false, false, false,
423 false, true, false, true, true, true, true, true, // !"#$%&'
424 false, false, true, true, false, true, true, false, //()*+,-./
425 true, true, true, true, true, true, true, true, //01234567
426 true, true, false, false, false, false, false, false, //89:;<=>?
427 false, true, true, true, true, true, true, true, //@ABCDEFG
428 true, true, true, true, true, true, true, true, //HIJKLMNO
429 true, true, true, true, true, true, true, true, //PQRSTUVW
430 true, true, true, false, false, false, true, true, //XYZ[\]^_
431 true, true, true, true, true, true, true, true, //`abcdefg
432 true, true, true, true, true, true, true, true, //hijklmno
433 true, true, true, true, true, true, true, true, //pqrstuvw
434 true, true, true, true, true, true, true, false //xyz{|}~
435 };
436 return isUSASCII(nChar) && aMap[nChar];
437 }
438
439 //============================================================================
440 // static
isEncodedWordTokenChar(sal_uInt32 nChar)441 bool INetMIME::isEncodedWordTokenChar(sal_uInt32 nChar)
442 {
443 static const sal_Char aMap[128]
444 = { false, false, false, false, false, false, false, false,
445 false, false, false, false, false, false, false, false,
446 false, false, false, false, false, false, false, false,
447 false, false, false, false, false, false, false, false,
448 false, true, false, true, true, true, true, true, // !"#$%&'
449 false, false, true, true, false, true, false, false, //()*+,-./
450 true, true, true, true, true, true, true, true, //01234567
451 true, true, false, false, false, false, false, false, //89:;<=>?
452 false, true, true, true, true, true, true, true, //@ABCDEFG
453 true, true, true, true, true, true, true, true, //HIJKLMNO
454 true, true, true, true, true, true, true, true, //PQRSTUVW
455 true, true, true, false, false, false, true, true, //XYZ[\]^_
456 true, true, true, true, true, true, true, true, //`abcdefg
457 true, true, true, true, true, true, true, true, //hijklmno
458 true, true, true, true, true, true, true, true, //pqrstuvw
459 true, true, true, true, true, true, true, false //xyz{|}~
460 };
461 return isUSASCII(nChar) && aMap[nChar];
462 }
463
464 //============================================================================
465 // static
isIMAPAtomChar(sal_uInt32 nChar)466 bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
467 {
468 static const sal_Char aMap[128]
469 = { false, false, false, false, false, false, false, false,
470 false, false, false, false, false, false, false, false,
471 false, false, false, false, false, false, false, false,
472 false, false, false, false, false, false, false, false,
473 false, true, false, true, true, false, true, true, // !"#$%&'
474 false, false, false, true, true, true, true, true, //()*+,-./
475 true, true, true, true, true, true, true, true, //01234567
476 true, true, true, true, true, true, true, true, //89:;<=>?
477 true, true, true, true, true, true, true, true, //@ABCDEFG
478 true, true, true, true, true, true, true, true, //HIJKLMNO
479 true, true, true, true, true, true, true, true, //PQRSTUVW
480 true, true, true, true, false, true, true, true, //XYZ[\]^_
481 true, true, true, true, true, true, true, true, //`abcdefg
482 true, true, true, true, true, true, true, true, //hijklmno
483 true, true, true, true, true, true, true, true, //pqrstuvw
484 true, true, true, false, true, true, true, false //xyz{|}~
485 };
486 return isUSASCII(nChar) && aMap[nChar];
487 }
488
489 //============================================================================
490 // static
getDigit(int nWeight)491 sal_uInt32 INetMIME::getDigit(int nWeight)
492 {
493 DBG_ASSERT(nWeight >= 0 && nWeight < 10,
494 "INetMIME::getDigit(): Bad weight");
495
496 static const sal_Char aDigits[16]
497 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
498 return aDigits[nWeight];
499 }
500
501 //============================================================================
502 // static
getHexDigit(int nWeight)503 sal_uInt32 INetMIME::getHexDigit(int nWeight)
504 {
505 DBG_ASSERT(nWeight >= 0 && nWeight < 16,
506 "INetMIME::getHexDigit(): Bad weight");
507
508 static const sal_Char aDigits[16]
509 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
510 'D', 'E', 'F' };
511 return aDigits[nWeight];
512 }
513
514 //============================================================================
515 // static
getBase64Digit(int nWeight)516 sal_uInt32 INetMIME::getBase64Digit(int nWeight)
517 {
518 DBG_ASSERT(nWeight >= 0 && nWeight < 64,
519 "INetMIME::getBase64Digit(): Bad weight");
520
521 static const sal_Char aDigits[64]
522 = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
523 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
524 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
525 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
526 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
527 return aDigits[nWeight];
528 }
529
530 //============================================================================
531 // static
equalIgnoreCase(const sal_Char * pBegin1,const sal_Char * pEnd1,const sal_Char * pBegin2,const sal_Char * pEnd2)532 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
533 const sal_Char * pEnd1,
534 const sal_Char * pBegin2,
535 const sal_Char * pEnd2)
536 {
537 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pBegin2 && pBegin2 <= pEnd2,
538 "INetMIME::equalIgnoreCase(): Bad sequences");
539
540 if (pEnd1 - pBegin1 != pEnd2 - pBegin2)
541 return false;
542 while (pBegin1 != pEnd1)
543 if (toUpperCase(*pBegin1++) != toUpperCase(*pBegin2++))
544 return false;
545 return true;
546 }
547
548 //============================================================================
549 // static
equalIgnoreCase(const sal_Char * pBegin1,const sal_Char * pEnd1,const sal_Char * pString2)550 bool INetMIME::equalIgnoreCase(const sal_Char * pBegin1,
551 const sal_Char * pEnd1,
552 const sal_Char * pString2)
553 {
554 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
555 "INetMIME::equalIgnoreCase(): Bad sequences");
556
557 while (*pString2 != 0)
558 if (pBegin1 == pEnd1
559 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
560 return false;
561 return pBegin1 == pEnd1;
562 }
563
564 //============================================================================
565 // static
equalIgnoreCase(const sal_Unicode * pBegin1,const sal_Unicode * pEnd1,const sal_Char * pString2)566 bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
567 const sal_Unicode * pEnd1,
568 const sal_Char * pString2)
569 {
570 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
571 "INetMIME::equalIgnoreCase(): Bad sequences");
572
573 while (*pString2 != 0)
574 if (pBegin1 == pEnd1
575 || toUpperCase(*pBegin1++) != toUpperCase(*pString2++))
576 return false;
577 return pBegin1 == pEnd1;
578 }
579
580 //============================================================================
581 // static
skipLinearWhiteSpace(const sal_Char * pBegin,const sal_Char * pEnd)582 const sal_Char * INetMIME::skipLinearWhiteSpace(const sal_Char * pBegin,
583 const sal_Char * pEnd)
584 {
585 DBG_ASSERT(pBegin && pBegin <= pEnd,
586 "INetMIME::skipLinearWhiteSpace(): Bad sequence");
587
588 while (pBegin != pEnd)
589 switch (*pBegin)
590 {
591 case '\t':
592 case ' ':
593 ++pBegin;
594 break;
595
596 case 0x0D: // CR
597 if (startsWithLineFolding(pBegin, pEnd))
598 pBegin += 3;
599 else
600 return pBegin;
601 break;
602
603 default:
604 return pBegin;
605 }
606 return pBegin;
607 }
608
609 //============================================================================
610 // static
skipLinearWhiteSpace(const sal_Unicode * pBegin,const sal_Unicode * pEnd)611 const sal_Unicode * INetMIME::skipLinearWhiteSpace(const sal_Unicode * pBegin,
612 const sal_Unicode * pEnd)
613 {
614 DBG_ASSERT(pBegin && pBegin <= pEnd,
615 "INetMIME::skipLinearWhiteSpace(): Bad sequence");
616
617 while (pBegin != pEnd)
618 switch (*pBegin)
619 {
620 case '\t':
621 case ' ':
622 ++pBegin;
623 break;
624
625 case 0x0D: // CR
626 if (startsWithLineFolding(pBegin, pEnd))
627 pBegin += 3;
628 else
629 return pBegin;
630 break;
631
632 default:
633 return pBegin;
634 }
635 return pBegin;
636 }
637
638 //============================================================================
639 // static
skipComment(const sal_Char * pBegin,const sal_Char * pEnd)640 const sal_Char * INetMIME::skipComment(const sal_Char * pBegin,
641 const sal_Char * pEnd)
642 {
643 DBG_ASSERT(pBegin && pBegin <= pEnd,
644 "INetMIME::skipComment(): Bad sequence");
645
646 if (pBegin != pEnd && *pBegin == '(')
647 {
648 sal_uInt32 nLevel = 0;
649 for (const sal_Char * p = pBegin; p != pEnd;)
650 switch (*p++)
651 {
652 case '(':
653 ++nLevel;
654 break;
655
656 case ')':
657 if (--nLevel == 0)
658 return p;
659 break;
660
661 case '\\':
662 if (p != pEnd)
663 ++p;
664 break;
665 }
666 }
667 return pBegin;
668 }
669
670 //============================================================================
671 // static
skipComment(const sal_Unicode * pBegin,const sal_Unicode * pEnd)672 const sal_Unicode * INetMIME::skipComment(const sal_Unicode * pBegin,
673 const sal_Unicode * pEnd)
674 {
675 DBG_ASSERT(pBegin && pBegin <= pEnd,
676 "INetMIME::skipComment(): Bad sequence");
677
678 if (pBegin != pEnd && *pBegin == '(')
679 {
680 sal_uInt32 nLevel = 0;
681 for (const sal_Unicode * p = pBegin; p != pEnd;)
682 switch (*p++)
683 {
684 case '(':
685 ++nLevel;
686 break;
687
688 case ')':
689 if (--nLevel == 0)
690 return p;
691 break;
692
693 case '\\':
694 if (p != pEnd)
695 ++p;
696 break;
697 }
698 }
699 return pBegin;
700 }
701
702 //============================================================================
703 // static
skipLinearWhiteSpaceComment(const sal_Char * pBegin,const sal_Char * pEnd)704 const sal_Char * INetMIME::skipLinearWhiteSpaceComment(const sal_Char *
705 pBegin,
706 const sal_Char * pEnd)
707 {
708 DBG_ASSERT(pBegin && pBegin <= pEnd,
709 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
710
711 while (pBegin != pEnd)
712 switch (*pBegin)
713 {
714 case '\t':
715 case ' ':
716 ++pBegin;
717 break;
718
719 case 0x0D: // CR
720 if (startsWithLineFolding(pBegin, pEnd))
721 pBegin += 3;
722 else
723 return pBegin;
724 break;
725
726 case '(':
727 {
728 const sal_Char * p = skipComment(pBegin, pEnd);
729 if (p == pBegin)
730 return pBegin;
731 pBegin = p;
732 break;
733 }
734
735 default:
736 return pBegin;
737 }
738 return pBegin;
739 }
740
741 //============================================================================
742 // static
skipLinearWhiteSpaceComment(const sal_Unicode * pBegin,const sal_Unicode * pEnd)743 const sal_Unicode * INetMIME::skipLinearWhiteSpaceComment(const sal_Unicode *
744 pBegin,
745 const sal_Unicode *
746 pEnd)
747 {
748 DBG_ASSERT(pBegin && pBegin <= pEnd,
749 "INetMIME::skipLinearWhiteSpaceComment(): Bad sequence");
750
751 while (pBegin != pEnd)
752 switch (*pBegin)
753 {
754 case '\t':
755 case ' ':
756 ++pBegin;
757 break;
758
759 case 0x0D: // CR
760 if (startsWithLineFolding(pBegin, pEnd))
761 pBegin += 3;
762 else
763 return pBegin;
764 break;
765
766 case '(':
767 {
768 const sal_Unicode * p = skipComment(pBegin, pEnd);
769 if (p == pBegin)
770 return pBegin;
771 pBegin = p;
772 break;
773 }
774
775 default:
776 return pBegin;
777 }
778 return pBegin;
779 }
780
781 //============================================================================
782 // static
skipQuotedString(const sal_Char * pBegin,const sal_Char * pEnd)783 const sal_Char * INetMIME::skipQuotedString(const sal_Char * pBegin,
784 const sal_Char * pEnd)
785 {
786 DBG_ASSERT(pBegin && pBegin <= pEnd,
787 "INetMIME::skipQuotedString(): Bad sequence");
788
789 if (pBegin != pEnd && *pBegin == '"')
790 for (const sal_Char * p = pBegin + 1; p != pEnd;)
791 switch (*p++)
792 {
793 case 0x0D: // CR
794 if (pEnd - p < 2 || *p++ != 0x0A // LF
795 || !isWhiteSpace(*p++))
796 return pBegin;
797 break;
798
799 case '"':
800 return p;
801
802 case '\\':
803 if (p != pEnd)
804 ++p;
805 break;
806 }
807 return pBegin;
808 }
809
810 //============================================================================
811 // static
skipQuotedString(const sal_Unicode * pBegin,const sal_Unicode * pEnd)812 const sal_Unicode * INetMIME::skipQuotedString(const sal_Unicode * pBegin,
813 const sal_Unicode * pEnd)
814 {
815 DBG_ASSERT(pBegin && pBegin <= pEnd,
816 "INetMIME::skipQuotedString(): Bad sequence");
817
818 if (pBegin != pEnd && *pBegin == '"')
819 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
820 switch (*p++)
821 {
822 case 0x0D: // CR
823 if (pEnd - p < 2 || *p++ != 0x0A // LF
824 || !isWhiteSpace(*p++))
825 return pBegin;
826 break;
827
828 case '"':
829 return p;
830
831 case '\\':
832 if (p != pEnd)
833 ++p;
834 break;
835 }
836 return pBegin;
837 }
838
839 //============================================================================
840 // static
scanAtom(const sal_Char * pBegin,const sal_Char * pEnd)841 const sal_Char * INetMIME::scanAtom(const sal_Char * pBegin,
842 const sal_Char * pEnd)
843 {
844 while (pBegin != pEnd && isAtomChar(*pBegin))
845 ++pBegin;
846 return pBegin;
847 }
848
849 //============================================================================
850 // static
scanAtom(const sal_Unicode * pBegin,const sal_Unicode * pEnd)851 const sal_Unicode * INetMIME::scanAtom(const sal_Unicode * pBegin,
852 const sal_Unicode * pEnd)
853 {
854 while (pBegin != pEnd && isAtomChar(*pBegin))
855 ++pBegin;
856 return pBegin;
857 }
858
859 //============================================================================
860 // static
scanUnsigned(const sal_Char * & rBegin,const sal_Char * pEnd,bool bLeadingZeroes,sal_uInt32 & rValue)861 bool INetMIME::scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
862 bool bLeadingZeroes, sal_uInt32 & rValue)
863 {
864 sal_uInt64 nTheValue = 0;
865 const sal_Char * p = rBegin;
866 for ( ; p != pEnd; ++p)
867 {
868 int nWeight = getWeight(*p);
869 if (nWeight < 0)
870 break;
871 nTheValue = 10 * nTheValue + nWeight;
872 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
873 return false;
874 }
875 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
876 return false;
877 rBegin = p;
878 rValue = sal_uInt32(nTheValue);
879 return true;
880 }
881
882 //============================================================================
883 // static
scanUnsigned(const sal_Unicode * & rBegin,const sal_Unicode * pEnd,bool bLeadingZeroes,sal_uInt32 & rValue)884 bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
885 const sal_Unicode * pEnd, bool bLeadingZeroes,
886 sal_uInt32 & rValue)
887 {
888 sal_uInt64 nTheValue = 0;
889 const sal_Unicode * p = rBegin;
890 for ( ; p != pEnd; ++p)
891 {
892 int nWeight = getWeight(*p);
893 if (nWeight < 0)
894 break;
895 nTheValue = 10 * nTheValue + nWeight;
896 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
897 return false;
898 }
899 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
900 return false;
901 rBegin = p;
902 rValue = sal_uInt32(nTheValue);
903 return true;
904 }
905
906 //============================================================================
907 // static
scanUnsignedHex(const sal_Char * & rBegin,const sal_Char * pEnd,bool bLeadingZeroes,sal_uInt32 & rValue)908 bool INetMIME::scanUnsignedHex(const sal_Char *& rBegin,
909 const sal_Char * pEnd, bool bLeadingZeroes,
910 sal_uInt32 & rValue)
911 {
912 sal_uInt64 nTheValue = 0;
913 const sal_Char * p = rBegin;
914 for ( p = rBegin; p != pEnd; ++p)
915 {
916 int nWeight = getHexWeight(*p);
917 if (nWeight < 0)
918 break;
919 nTheValue = nTheValue << 4 | nWeight;
920 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
921 return false;
922 }
923 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
924 return false;
925 rBegin = p;
926 rValue = sal_uInt32(nTheValue);
927 return true;
928 }
929
930 //============================================================================
931 // static
scanUnsignedHex(const sal_Unicode * & rBegin,const sal_Unicode * pEnd,bool bLeadingZeroes,sal_uInt32 & rValue)932 bool INetMIME::scanUnsignedHex(const sal_Unicode *& rBegin,
933 const sal_Unicode * pEnd, bool bLeadingZeroes,
934 sal_uInt32 & rValue)
935 {
936 sal_uInt64 nTheValue = 0;
937 const sal_Unicode * p = rBegin;
938 for ( ; p != pEnd; ++p)
939 {
940 int nWeight = getHexWeight(*p);
941 if (nWeight < 0)
942 break;
943 nTheValue = nTheValue << 4 | nWeight;
944 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
945 return false;
946 }
947 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
948 return false;
949 rBegin = p;
950 rValue = sal_uInt32(nTheValue);
951 return true;
952 }
953
954 //============================================================================
955 // static
scanQuotedBlock(const sal_Char * pBegin,const sal_Char * pEnd,sal_uInt32 nOpening,sal_uInt32 nClosing,sal_Size & rLength,bool & rModify)956 const sal_Char * INetMIME::scanQuotedBlock(const sal_Char * pBegin,
957 const sal_Char * pEnd,
958 sal_uInt32 nOpening,
959 sal_uInt32 nClosing,
960 sal_Size & rLength,
961 bool & rModify)
962 {
963 DBG_ASSERT(pBegin && pBegin <= pEnd,
964 "INetMIME::scanQuotedBlock(): Bad sequence");
965
966 if (pBegin != pEnd && static_cast< unsigned char >(*pBegin) == nOpening)
967 {
968 ++rLength;
969 ++pBegin;
970 while (pBegin != pEnd)
971 if (static_cast< unsigned char >(*pBegin) == nClosing)
972 {
973 ++rLength;
974 return ++pBegin;
975 }
976 else
977 {
978 sal_uInt32 c = *pBegin++;
979 switch (c)
980 {
981 case 0x0D: // CR
982 if (pBegin != pEnd && *pBegin == 0x0A) // LF
983 if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
984 {
985 ++rLength;
986 rModify = true;
987 pBegin += 2;
988 }
989 else
990 {
991 rLength += 3;
992 rModify = true;
993 ++pBegin;
994 }
995 else
996 ++rLength;
997 break;
998
999 case '\\':
1000 ++rLength;
1001 if (pBegin != pEnd)
1002 {
1003 if (startsWithLineBreak(pBegin, pEnd)
1004 && (pEnd - pBegin < 3
1005 || !isWhiteSpace(pBegin[2])))
1006 {
1007 rLength += 3;
1008 rModify = true;
1009 pBegin += 2;
1010 }
1011 else
1012 ++pBegin;
1013 }
1014 break;
1015
1016 default:
1017 ++rLength;
1018 if (!isUSASCII(c))
1019 rModify = true;
1020 break;
1021 }
1022 }
1023 }
1024 return pBegin;
1025 }
1026
1027 //============================================================================
1028 // static
scanQuotedBlock(const sal_Unicode * pBegin,const sal_Unicode * pEnd,sal_uInt32 nOpening,sal_uInt32 nClosing,sal_Size & rLength,bool & rModify)1029 const sal_Unicode * INetMIME::scanQuotedBlock(const sal_Unicode * pBegin,
1030 const sal_Unicode * pEnd,
1031 sal_uInt32 nOpening,
1032 sal_uInt32 nClosing,
1033 sal_Size & rLength,
1034 bool & rModify)
1035 {
1036 DBG_ASSERT(pBegin && pBegin <= pEnd,
1037 "INetMIME::scanQuotedBlock(): Bad sequence");
1038
1039 if (pBegin != pEnd && *pBegin == nOpening)
1040 {
1041 ++rLength;
1042 ++pBegin;
1043 while (pBegin != pEnd)
1044 if (*pBegin == nClosing)
1045 {
1046 ++rLength;
1047 return ++pBegin;
1048 }
1049 else
1050 {
1051 sal_uInt32 c = *pBegin++;
1052 switch (c)
1053 {
1054 case 0x0D: // CR
1055 if (pBegin != pEnd && *pBegin == 0x0A) // LF
1056 if (pEnd - pBegin >= 2 && isWhiteSpace(pBegin[1]))
1057 {
1058 ++rLength;
1059 rModify = true;
1060 pBegin += 2;
1061 }
1062 else
1063 {
1064 rLength += 3;
1065 rModify = true;
1066 ++pBegin;
1067 }
1068 else
1069 ++rLength;
1070 break;
1071
1072 case '\\':
1073 ++rLength;
1074 if (pBegin != pEnd)
1075 {
1076 if (startsWithLineBreak(pBegin, pEnd)
1077 && (pEnd - pBegin < 3
1078 || !isWhiteSpace(pBegin[2])))
1079 {
1080 rLength += 3;
1081 rModify = true;
1082 pBegin += 2;
1083 }
1084 else
1085 ++pBegin;
1086 }
1087 break;
1088
1089 default:
1090 ++rLength;
1091 if (!isUSASCII(c))
1092 rModify = true;
1093 break;
1094 }
1095 }
1096 }
1097 return pBegin;
1098 }
1099
1100 //============================================================================
1101 // static
scanParameters(sal_Char const * pBegin,sal_Char const * pEnd,INetContentTypeParameterList * pParameters)1102 sal_Char const * INetMIME::scanParameters(sal_Char const * pBegin,
1103 sal_Char const * pEnd,
1104 INetContentTypeParameterList *
1105 pParameters)
1106 {
1107 ParameterList aList;
1108 sal_Char const * pParameterBegin = pBegin;
1109 for (sal_Char const * p = pParameterBegin;; pParameterBegin = p)
1110 {
1111 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1112 if (pParameterBegin == pEnd || *pParameterBegin != ';')
1113 break;
1114 p = pParameterBegin + 1;
1115
1116 sal_Char const * pAttributeBegin = skipLinearWhiteSpaceComment(p,
1117 pEnd);
1118 p = pAttributeBegin;
1119 bool bDowncaseAttribute = false;
1120 while (p != pEnd && isTokenChar(*p) && *p != '*')
1121 {
1122 bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1123 ++p;
1124 }
1125 if (p == pAttributeBegin)
1126 break;
1127 ByteString aAttribute(
1128 pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin));
1129 if (bDowncaseAttribute)
1130 aAttribute.ToLowerAscii();
1131
1132 sal_uInt32 nSection = 0;
1133 if (p != pEnd && *p == '*')
1134 {
1135 ++p;
1136 if (p != pEnd && isDigit(*p)
1137 && !scanUnsigned(p, pEnd, false, nSection))
1138 break;
1139 }
1140
1141 bool bPresent;
1142 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1143 if (bPresent)
1144 break;
1145
1146 bool bExtended = false;
1147 if (p != pEnd && *p == '*')
1148 {
1149 ++p;
1150 bExtended = true;
1151 }
1152
1153 p = skipLinearWhiteSpaceComment(p, pEnd);
1154
1155 if (p == pEnd || *p != '=')
1156 break;
1157
1158 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1159
1160 ByteString aCharset;
1161 ByteString aLanguage;
1162 ByteString aValue;
1163 if (bExtended)
1164 {
1165 if (nSection == 0)
1166 {
1167 sal_Char const * pCharsetBegin = p;
1168 bool bDowncaseCharset = false;
1169 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1170 {
1171 bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1172 ++p;
1173 }
1174 if (p == pCharsetBegin)
1175 break;
1176 if (pParameters)
1177 {
1178 aCharset = ByteString(
1179 pCharsetBegin,
1180 static_cast< xub_StrLen >(p - pCharsetBegin));
1181 if (bDowncaseCharset)
1182 aCharset.ToLowerAscii();
1183 }
1184
1185 if (p == pEnd || *p != '\'')
1186 break;
1187 ++p;
1188
1189 sal_Char const * pLanguageBegin = p;
1190 bool bDowncaseLanguage = false;
1191 int nLetters = 0;
1192 for (; p != pEnd; ++p)
1193 if (isAlpha(*p))
1194 {
1195 if (++nLetters > 8)
1196 break;
1197 bDowncaseLanguage = bDowncaseLanguage
1198 || isUpperCase(*p);
1199 }
1200 else if (*p == '-')
1201 {
1202 if (nLetters == 0)
1203 break;
1204 nLetters = 0;
1205 }
1206 else
1207 break;
1208 if (nLetters == 0 || nLetters > 8)
1209 break;
1210 if (pParameters)
1211 {
1212 aLanguage = ByteString(
1213 pLanguageBegin,
1214 static_cast< xub_StrLen >(p - pLanguageBegin));
1215 if (bDowncaseLanguage)
1216 aLanguage.ToLowerAscii();
1217 }
1218
1219 if (p == pEnd || *p != '\'')
1220 break;
1221 ++p;
1222 }
1223 if (pParameters)
1224 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1225 {
1226 if (*p == '%')
1227 {
1228 if (p + 2 < pEnd)
1229 {
1230 int nWeight1 = getHexWeight(p[1]);
1231 int nWeight2 = getHexWeight(p[2]);
1232 if (nWeight1 >= 0 && nWeight2 >= 0)
1233 {
1234 aValue += sal_Char(nWeight1 << 4 | nWeight2);
1235 p += 3;
1236 continue;
1237 }
1238 }
1239 }
1240 aValue += *p++;
1241 }
1242 else
1243 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1244 ++p;
1245 }
1246 else if (p != pEnd && *p == '"')
1247 if (pParameters)
1248 {
1249 bool bInvalid = false;
1250 for (++p;;)
1251 {
1252 if (p == pEnd)
1253 {
1254 bInvalid = true;
1255 break;
1256 }
1257 else if (*p == '"')
1258 {
1259 ++p;
1260 break;
1261 }
1262 else if (*p == 0x0D) // CR
1263 {
1264 if (pEnd - p < 3 || p[1] != 0x0A // LF
1265 || !isWhiteSpace(p[2]))
1266 {
1267 bInvalid = true;
1268 break;
1269 }
1270 p += 2;
1271 }
1272 else if (*p == '\\' && ++p == pEnd)
1273 {
1274 bInvalid = true;
1275 break;
1276 }
1277 aValue += *p++;
1278 }
1279 if (bInvalid)
1280 break;
1281 }
1282 else
1283 {
1284 sal_Char const * pStringEnd = skipQuotedString(p, pEnd);
1285 if (p == pStringEnd)
1286 break;
1287 p = pStringEnd;
1288 }
1289 else
1290 {
1291 sal_Char const * pTokenBegin = p;
1292 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1293 ++p;
1294 if (p == pTokenBegin)
1295 break;
1296 if (pParameters)
1297 aValue = ByteString(
1298 pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin));
1299 }
1300
1301 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1302 nSection, bExtended);
1303 }
1304 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1305 }
1306
1307 //============================================================================
1308 // static
scanParameters(sal_Unicode const * pBegin,sal_Unicode const * pEnd,INetContentTypeParameterList * pParameters)1309 sal_Unicode const * INetMIME::scanParameters(sal_Unicode const * pBegin,
1310 sal_Unicode const * pEnd,
1311 INetContentTypeParameterList *
1312 pParameters)
1313 {
1314 ParameterList aList;
1315 sal_Unicode const * pParameterBegin = pBegin;
1316 for (sal_Unicode const * p = pParameterBegin;; pParameterBegin = p)
1317 {
1318 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
1319 if (pParameterBegin == pEnd || *pParameterBegin != ';')
1320 break;
1321 p = pParameterBegin + 1;
1322
1323 sal_Unicode const * pAttributeBegin
1324 = skipLinearWhiteSpaceComment(p, pEnd);
1325 p = pAttributeBegin;
1326 bool bDowncaseAttribute = false;
1327 while (p != pEnd && isTokenChar(*p) && *p != '*')
1328 {
1329 bDowncaseAttribute = bDowncaseAttribute || isUpperCase(*p);
1330 ++p;
1331 }
1332 if (p == pAttributeBegin)
1333 break;
1334 ByteString aAttribute = ByteString(
1335 pAttributeBegin, static_cast< xub_StrLen >(p - pAttributeBegin),
1336 RTL_TEXTENCODING_ASCII_US);
1337 if (bDowncaseAttribute)
1338 aAttribute.ToLowerAscii();
1339
1340 sal_uInt32 nSection = 0;
1341 if (p != pEnd && *p == '*')
1342 {
1343 ++p;
1344 if (p != pEnd && isDigit(*p)
1345 && !scanUnsigned(p, pEnd, false, nSection))
1346 break;
1347 }
1348
1349 bool bPresent;
1350 Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
1351 if (bPresent)
1352 break;
1353
1354 bool bExtended = false;
1355 if (p != pEnd && *p == '*')
1356 {
1357 ++p;
1358 bExtended = true;
1359 }
1360
1361 p = skipLinearWhiteSpaceComment(p, pEnd);
1362
1363 if (p == pEnd || *p != '=')
1364 break;
1365
1366 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
1367
1368 ByteString aCharset;
1369 ByteString aLanguage;
1370 ByteString aValue;
1371 if (bExtended)
1372 {
1373 if (nSection == 0)
1374 {
1375 sal_Unicode const * pCharsetBegin = p;
1376 bool bDowncaseCharset = false;
1377 while (p != pEnd && isTokenChar(*p) && *p != '\'')
1378 {
1379 bDowncaseCharset = bDowncaseCharset || isUpperCase(*p);
1380 ++p;
1381 }
1382 if (p == pCharsetBegin)
1383 break;
1384 if (pParameters)
1385 {
1386 aCharset = ByteString(
1387 pCharsetBegin,
1388 static_cast< xub_StrLen >(p - pCharsetBegin),
1389 RTL_TEXTENCODING_ASCII_US);
1390 if (bDowncaseCharset)
1391 aCharset.ToLowerAscii();
1392 }
1393
1394 if (p == pEnd || *p != '\'')
1395 break;
1396 ++p;
1397
1398 sal_Unicode const * pLanguageBegin = p;
1399 bool bDowncaseLanguage = false;
1400 int nLetters = 0;
1401 for (; p != pEnd; ++p)
1402 if (isAlpha(*p))
1403 {
1404 if (++nLetters > 8)
1405 break;
1406 bDowncaseLanguage = bDowncaseLanguage
1407 || isUpperCase(*p);
1408 }
1409 else if (*p == '-')
1410 {
1411 if (nLetters == 0)
1412 break;
1413 nLetters = 0;
1414 }
1415 else
1416 break;
1417 if (nLetters == 0 || nLetters > 8)
1418 break;
1419 if (pParameters)
1420 {
1421 aLanguage = ByteString(
1422 pLanguageBegin,
1423 static_cast< xub_StrLen >(p - pLanguageBegin),
1424 RTL_TEXTENCODING_ASCII_US);
1425 if (bDowncaseLanguage)
1426 aLanguage.ToLowerAscii();
1427 }
1428
1429 if (p == pEnd || *p != '\'')
1430 break;
1431 ++p;
1432 }
1433 if (pParameters)
1434 {
1435 INetMIMEStringOutputSink
1436 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1437 while (p != pEnd)
1438 {
1439 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1440 if (isUSASCII(nChar) && !isTokenChar(nChar))
1441 break;
1442 if (nChar == '%' && p + 1 < pEnd)
1443 {
1444 int nWeight1 = getHexWeight(p[0]);
1445 int nWeight2 = getHexWeight(p[1]);
1446 if (nWeight1 >= 0 && nWeight2 >= 0)
1447 {
1448 aSink << sal_Char(nWeight1 << 4 | nWeight2);
1449 p += 2;
1450 continue;
1451 }
1452 }
1453 INetMIME::writeUTF8(aSink, nChar);
1454 }
1455 aValue = aSink.takeBuffer();
1456 }
1457 else
1458 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1459 ++p;
1460 }
1461 else if (p != pEnd && *p == '"')
1462 if (pParameters)
1463 {
1464 INetMIMEStringOutputSink
1465 aSink(0, INetMIMEOutputSink::NO_LINE_LENGTH_LIMIT);
1466 bool bInvalid = false;
1467 for (++p;;)
1468 {
1469 if (p == pEnd)
1470 {
1471 bInvalid = true;
1472 break;
1473 }
1474 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
1475 if (nChar == '"')
1476 break;
1477 else if (nChar == 0x0D) // CR
1478 {
1479 if (pEnd - p < 2 || *p++ != 0x0A // LF
1480 || !isWhiteSpace(*p))
1481 {
1482 bInvalid = true;
1483 break;
1484 }
1485 nChar = sal_uChar(*p++);
1486 }
1487 else if (nChar == '\\')
1488 {
1489 if (p == pEnd)
1490 {
1491 bInvalid = true;
1492 break;
1493 }
1494 nChar = INetMIME::getUTF32Character(p, pEnd);
1495 }
1496 INetMIME::writeUTF8(aSink, nChar);
1497 }
1498 if (bInvalid)
1499 break;
1500 aValue = aSink.takeBuffer();
1501 }
1502 else
1503 {
1504 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
1505 if (p == pStringEnd)
1506 break;
1507 p = pStringEnd;
1508 }
1509 else
1510 {
1511 sal_Unicode const * pTokenBegin = p;
1512 while (p != pEnd && (isTokenChar(*p) || !isUSASCII(*p)))
1513 ++p;
1514 if (p == pTokenBegin)
1515 break;
1516 if (pParameters)
1517 aValue = ByteString(
1518 pTokenBegin, static_cast< xub_StrLen >(p - pTokenBegin),
1519 RTL_TEXTENCODING_UTF8);
1520 }
1521
1522 *pPos = new Parameter(*pPos, aAttribute, aCharset, aLanguage, aValue,
1523 nSection, bExtended);
1524 }
1525 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
1526 }
1527
1528 //============================================================================
1529 // static
getCharsetName(rtl_TextEncoding eEncoding)1530 const sal_Char * INetMIME::getCharsetName(rtl_TextEncoding eEncoding)
1531 {
1532 if (rtl_isOctetTextEncoding(eEncoding))
1533 {
1534 char const * p = rtl_getMimeCharsetFromTextEncoding(eEncoding);
1535 DBG_ASSERT(p, "INetMIME::getCharsetName(): Unsupported encoding");
1536 return p;
1537 }
1538 else
1539 switch (eEncoding)
1540 {
1541 case RTL_TEXTENCODING_UCS4:
1542 return "ISO-10646-UCS-4";
1543
1544 case RTL_TEXTENCODING_UCS2:
1545 return "ISO-10646-UCS-2";
1546
1547 default:
1548 DBG_ERROR("INetMIME::getCharsetName(): Unsupported encoding");
1549 return 0;
1550 }
1551 }
1552
1553 //============================================================================
1554 namespace unnamed_tools_inetmime {
1555
1556 struct EncodingEntry
1557 {
1558 sal_Char const * m_aName;
1559 rtl_TextEncoding m_eEncoding;
1560 };
1561
1562 //============================================================================
1563 // The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
1564 // assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
1565 // noted:
1566 EncodingEntry const aEncodingMap[]
1567 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
1568 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
1569 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
1570 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
1571 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
1572 { "ASCII", RTL_TEXTENCODING_ASCII_US },
1573 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
1574 { "US", RTL_TEXTENCODING_ASCII_US },
1575 { "IBM367", RTL_TEXTENCODING_ASCII_US },
1576 { "CP367", RTL_TEXTENCODING_ASCII_US },
1577 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
1578 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1579 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
1580 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
1581 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
1582 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1583 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
1584 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
1585 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
1586 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
1587 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1588 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
1589 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
1590 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
1591 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1592 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
1593 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
1594 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1595 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
1596 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
1597 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
1598 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1599 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
1600 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
1601 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1602 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
1603 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
1604 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
1605 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1606 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
1607 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
1608 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1609 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
1610 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
1611 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
1612 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1613 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
1614 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1615 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
1616 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
1617 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
1618 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
1619 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
1620 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1621 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
1622 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1623 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
1624 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
1625 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
1626 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
1627 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
1628 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
1629 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
1630 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
1631 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1632 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
1633 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
1634 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
1635 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1636 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
1637 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1638 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
1639 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
1640 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
1641 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1642 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
1643 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
1644 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
1645 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
1646 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
1647 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1648 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
1649 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
1650 { "IBM437", RTL_TEXTENCODING_IBM_437 },
1651 { "CP437", RTL_TEXTENCODING_IBM_437 },
1652 { "437", RTL_TEXTENCODING_IBM_437 },
1653 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
1654 { "IBM850", RTL_TEXTENCODING_IBM_850 },
1655 { "CP850", RTL_TEXTENCODING_IBM_850 },
1656 { "850", RTL_TEXTENCODING_IBM_850 },
1657 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
1658 { "IBM860", RTL_TEXTENCODING_IBM_860 },
1659 { "CP860", RTL_TEXTENCODING_IBM_860 },
1660 { "860", RTL_TEXTENCODING_IBM_860 },
1661 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
1662 { "IBM861", RTL_TEXTENCODING_IBM_861 },
1663 { "CP861", RTL_TEXTENCODING_IBM_861 },
1664 { "861", RTL_TEXTENCODING_IBM_861 },
1665 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
1666 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
1667 { "IBM863", RTL_TEXTENCODING_IBM_863 },
1668 { "CP863", RTL_TEXTENCODING_IBM_863 },
1669 { "863", RTL_TEXTENCODING_IBM_863 },
1670 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
1671 { "IBM865", RTL_TEXTENCODING_IBM_865 },
1672 { "CP865", RTL_TEXTENCODING_IBM_865 },
1673 { "865", RTL_TEXTENCODING_IBM_865 },
1674 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
1675 { "IBM775", RTL_TEXTENCODING_IBM_775 },
1676 { "CP775", RTL_TEXTENCODING_IBM_775 },
1677 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
1678 { "IBM852", RTL_TEXTENCODING_IBM_852 },
1679 { "CP852", RTL_TEXTENCODING_IBM_852 },
1680 { "852", RTL_TEXTENCODING_IBM_852 },
1681 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
1682 { "IBM855", RTL_TEXTENCODING_IBM_855 },
1683 { "CP855", RTL_TEXTENCODING_IBM_855 },
1684 { "855", RTL_TEXTENCODING_IBM_855 },
1685 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
1686 { "IBM857", RTL_TEXTENCODING_IBM_857 },
1687 { "CP857", RTL_TEXTENCODING_IBM_857 },
1688 { "857", RTL_TEXTENCODING_IBM_857 },
1689 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
1690 { "IBM862", RTL_TEXTENCODING_IBM_862 },
1691 { "CP862", RTL_TEXTENCODING_IBM_862 },
1692 { "862", RTL_TEXTENCODING_IBM_862 },
1693 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
1694 { "IBM864", RTL_TEXTENCODING_IBM_864 },
1695 { "CP864", RTL_TEXTENCODING_IBM_864 },
1696 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
1697 { "IBM866", RTL_TEXTENCODING_IBM_866 },
1698 { "CP866", RTL_TEXTENCODING_IBM_866 },
1699 { "866", RTL_TEXTENCODING_IBM_866 },
1700 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
1701 { "IBM869", RTL_TEXTENCODING_IBM_869 },
1702 { "CP869", RTL_TEXTENCODING_IBM_869 },
1703 { "869", RTL_TEXTENCODING_IBM_869 },
1704 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
1705 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
1706 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
1707 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
1708 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
1709 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
1710 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
1711 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
1712 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
1713 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
1714 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
1715 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
1716 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
1717 { "GB2312", RTL_TEXTENCODING_GB_2312 },
1718 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
1719 { "BIG5", RTL_TEXTENCODING_BIG5 },
1720 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
1721 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
1722 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
1723 RTL_TEXTENCODING_EUC_JP },
1724 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
1725 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
1726 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
1727 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
1728 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
1729 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
1730 { "UTF-7", RTL_TEXTENCODING_UTF7 },
1731 { "UTF-8", RTL_TEXTENCODING_UTF8 },
1732 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
1733 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
1734 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
1735 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
1736 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
1737 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
1738 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
1739 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
1740 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
1741 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
1742
1743 //============================================================================
1744 template< typename T >
getCharsetEncoding_Impl(T const * pBegin,T const * pEnd)1745 inline rtl_TextEncoding getCharsetEncoding_Impl(T const * pBegin,
1746 T const * pEnd)
1747 {
1748 for (sal_Size i = 0; i < sizeof aEncodingMap / sizeof (EncodingEntry);
1749 ++i)
1750 if (INetMIME::equalIgnoreCase(pBegin, pEnd, aEncodingMap[i].m_aName))
1751 return aEncodingMap[i].m_eEncoding;
1752 return RTL_TEXTENCODING_DONTKNOW;
1753 }
1754
1755 }
1756
1757 //============================================================================
1758 // static
getCharsetEncoding(sal_Char const * pBegin,sal_Char const * pEnd)1759 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Char const * pBegin,
1760 sal_Char const * pEnd)
1761 {
1762 return getCharsetEncoding_Impl(pBegin, pEnd);
1763 }
1764
1765 //============================================================================
1766 // static
getCharsetEncoding(sal_Unicode const * pBegin,sal_Unicode const * pEnd)1767 rtl_TextEncoding INetMIME::getCharsetEncoding(sal_Unicode const * pBegin,
1768 sal_Unicode const * pEnd)
1769 {
1770 return getCharsetEncoding_Impl(pBegin, pEnd);
1771 }
1772
1773 //============================================================================
1774 // static
1775 INetMIMECharsetList_Impl *
createPreferredCharsetList(rtl_TextEncoding eEncoding)1776 INetMIME::createPreferredCharsetList(rtl_TextEncoding eEncoding)
1777 {
1778 static const sal_uInt32 aUSASCIIRanges[] = { 0, 0x7F, sal_uInt32(-1) };
1779
1780 static const sal_uInt32 aISO88591Ranges[] = { 0, 0xFF, sal_uInt32(-1) };
1781 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT> version
1782 // 1.0 of 1999 July 27
1783
1784 static const sal_uInt32 aISO88592Ranges[]
1785 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1786 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC2, 0xC4, 0xC4, 0xC7, 0xC7,
1787 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xCE, 0xD3, 0xD4, 0xD6, 0xD7,
1788 0xDA, 0xDA, 0xDC, 0xDD, 0xDF, 0xDF, 0xE1, 0xE2, 0xE4, 0xE4,
1789 0xE7, 0xE7, 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF3, 0xF4,
1790 0xF6, 0xF7, 0xFA, 0xFA, 0xFC, 0xFD, 0x102, 0x107, 0x10C, 0x111,
1791 0x118, 0x11B, 0x139, 0x13A, 0x13D, 0x13E, 0x141, 0x144,
1792 0x147, 0x148, 0x150, 0x151, 0x154, 0x155, 0x158, 0x15B,
1793 0x15E, 0x165, 0x16E, 0x171, 0x179, 0x17E, 0x2C7, 0x2C7,
1794 0x2D8, 0x2D9, 0x2DB, 0x2DB, 0x2DD, 0x2DD, sal_uInt32(-1) };
1795 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT> version
1796 // 1.0 of 1999 July 27
1797
1798 static const sal_uInt32 aISO88593Ranges[]
1799 = { 0, 0xA0, 0xA3, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xB0, 0xB0,
1800 0xB2, 0xB5, 0xB7, 0xB8, 0xBD, 0xBD, 0xC0, 0xC2, 0xC4, 0xC4,
1801 0xC7, 0xCF, 0xD1, 0xD4, 0xD6, 0xD7, 0xD9, 0xDC, 0xDF, 0xE2,
1802 0xE4, 0xE4, 0xE7, 0xEF, 0xF1, 0xF4, 0xF6, 0xF7, 0xF9, 0xFC,
1803 0x108, 0x10B, 0x11C, 0x121, 0x124, 0x127, 0x130, 0x131,
1804 0x134, 0x135, 0x15C, 0x15F, 0x16C, 0x16D, 0x17B, 0x17C,
1805 0x2D8, 0x2D9, sal_uInt32(-1) };
1806 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT> version
1807 // 1.0 of 1999 July 27
1808
1809 static const sal_uInt32 aISO88594Ranges[]
1810 = { 0, 0xA0, 0xA4, 0xA4, 0xA7, 0xA8, 0xAD, 0xAD, 0xAF, 0xB0,
1811 0xB4, 0xB4, 0xB8, 0xB8, 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB,
1812 0xCD, 0xCE, 0xD4, 0xD8, 0xDA, 0xDC, 0xDF, 0xDF, 0xE1, 0xE6,
1813 0xE9, 0xE9, 0xEB, 0xEB, 0xED, 0xEE, 0xF4, 0xF8, 0xFA, 0xFC,
1814 0x100, 0x101, 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113,
1815 0x116, 0x119, 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F,
1816 0x136, 0x138, 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D,
1817 0x156, 0x157, 0x160, 0x161, 0x166, 0x16B, 0x172, 0x173,
1818 0x17D, 0x17E, 0x2C7, 0x2C7, 0x2D9, 0x2D9, 0x2DB, 0x2DB,
1819 sal_uInt32(-1) };
1820 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT> version
1821 // 1.0 of 1999 July 27
1822
1823 static const sal_uInt32 aISO88595Ranges[]
1824 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0x401, 0x40C, 0x40E, 0x44F,
1825 0x451, 0x45C, 0x45E, 0x45F, 0x2116, 0x2116, sal_uInt32(-1) };
1826 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT> version
1827 // 1.0 of 1999 July 27
1828
1829 static const sal_uInt32 aISO88596Ranges[]
1830 = { 0, 0xA0, 0xA4, 0xA4, 0xAD, 0xAD, 0x60C, 0x60C, 0x61B, 0x61B,
1831 0x61F, 0x61F, 0x621, 0x63A, 0x640, 0x652, sal_uInt32(-1) };
1832 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT> version
1833 // 1.0 of 1999 July 27
1834
1835 static const sal_uInt32 aISO88597Ranges[]
1836 = { 0, 0xA0, 0xA3, 0xA3, 0xA6, 0xA9, 0xAB, 0xAD, 0xB0, 0xB3,
1837 0xB7, 0xB7, 0xBB, 0xBB, 0xBD, 0xBD, 0x384, 0x386, 0x388, 0x38A,
1838 0x38C, 0x38C, 0x38E, 0x3A1, 0x3A3, 0x3CE, 0x2015, 0x2015,
1839 0x2018, 0x2019, sal_uInt32(-1) };
1840 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT> version
1841 // 1.0 of 1999 July 27
1842
1843 static const sal_uInt32 aISO88598Ranges[]
1844 = { 0, 0xA0, 0xA2, 0xA9, 0xAB, 0xB9, 0xBB, 0xBE, 0xD7, 0xD7,
1845 0xF7, 0xF7, 0x5D0, 0x5EA, 0x200E, 0x200F, 0x2017, 0x2017,
1846 sal_uInt32(-1) };
1847 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT> version
1848 // 1.1 of 2000-Jan-03
1849
1850 static const sal_uInt32 aISO88599Ranges[]
1851 = { 0, 0xCF, 0xD1, 0xDC, 0xDF, 0xEF, 0xF1, 0xFC, 0xFF, 0xFF,
1852 0x11E, 0x11F, 0x130, 0x131, 0x15E, 0x15F, sal_uInt32(-1) };
1853 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT> version
1854 // 1.0 of 1999 July 27
1855
1856 static const sal_uInt32 aISO885910Ranges[]
1857 = { 0, 0xA0, 0xA7, 0xA7, 0xAD, 0xAD, 0xB0, 0xB0, 0xB7, 0xB7,
1858 0xC1, 0xC6, 0xC9, 0xC9, 0xCB, 0xCB, 0xCD, 0xD0, 0xD3, 0xD6,
1859 0xD8, 0xD8, 0xDA, 0xDF, 0xE1, 0xE6, 0xE9, 0xE9, 0xEB, 0xEB,
1860 0xED, 0xF0, 0xF3, 0xF6, 0xF8, 0xF8, 0xFA, 0xFE, 0x100, 0x101,
1861 0x104, 0x105, 0x10C, 0x10D, 0x110, 0x113, 0x116, 0x119,
1862 0x122, 0x123, 0x128, 0x12B, 0x12E, 0x12F, 0x136, 0x138,
1863 0x13B, 0x13C, 0x145, 0x146, 0x14A, 0x14D, 0x160, 0x161,
1864 0x166, 0x16B, 0x172, 0x173, 0x17D, 0x17E, 0x2015, 0x2015,
1865 sal_uInt32(-1) };
1866 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT> version
1867 // 1.1 of 1999 October 11
1868
1869 static const sal_uInt32 aISO885913Ranges[]
1870 = { 0, 0xA0, 0xA2, 0xA4, 0xA6, 0xA7, 0xA9, 0xA9, 0xAB, 0xAE,
1871 0xB0, 0xB3, 0xB5, 0xB7, 0xB9, 0xB9, 0xBB, 0xBE, 0xC4, 0xC6,
1872 0xC9, 0xC9, 0xD3, 0xD3, 0xD5, 0xD8, 0xDC, 0xDC, 0xDF, 0xDF,
1873 0xE4, 0xE6, 0xE9, 0xE9, 0xF3, 0xF3, 0xF5, 0xF8, 0xFC, 0xFC,
1874 0x100, 0x101, 0x104, 0x107, 0x10C, 0x10D, 0x112, 0x113,
1875 0x116, 0x119, 0x122, 0x123, 0x12A, 0x12B, 0x12E, 0x12F,
1876 0x136, 0x137, 0x13B, 0x13C, 0x141, 0x146, 0x14C, 0x14D,
1877 0x156, 0x157, 0x15A, 0x15B, 0x160, 0x161, 0x16A, 0x16B,
1878 0x172, 0x173, 0x179, 0x17E, 0x2019, 0x2019, 0x201C, 0x201E,
1879 sal_uInt32(-1) };
1880 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT> version
1881 // 1.0 of 1999 July 27
1882
1883 static const sal_uInt32 aISO885914Ranges[]
1884 = { 0, 0xA0, 0xA3, 0xA3, 0xA7, 0xA7, 0xA9, 0xA9, 0xAD, 0xAE,
1885 0xB6, 0xB6, 0xC0, 0xCF, 0xD1, 0xD6, 0xD8, 0xDD, 0xDF, 0xEF,
1886 0xF1, 0xF6, 0xF8, 0xFD, 0xFF, 0xFF, 0x10A, 0x10B, 0x120, 0x121,
1887 0x174, 0x178, 0x1E02, 0x1E03, 0x1E0A, 0x1E0B, 0x1E1E, 0x1E1F,
1888 0x1E40, 0x1E41, 0x1E56, 0x1E57, 0x1E60, 0x1E61, 0x1E6A, 0x1E6B,
1889 0x1E80, 0x1E85, 0x1EF2, 0x1EF3, sal_uInt32(-1) };
1890 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT> version
1891 // 1.0 of 1999 July 27
1892
1893 static const sal_uInt32 aISO885915Ranges[]
1894 = { 0, 0xA3, 0xA5, 0xA5, 0xA7, 0xA7, 0xA9, 0xB3, 0xB5, 0xB7,
1895 0xB9, 0xBB, 0xBF, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1896 0x17D, 0x17E, 0x20AC, 0x20AC, sal_uInt32(-1) };
1897 // <ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT> version
1898 // 1.0 of 1999 July 27
1899
1900 static const sal_uInt32 aKOI8RRanges[]
1901 = { 0, 0x7F, 0xA0, 0xA0, 0xA9, 0xA9, 0xB0, 0xB0, 0xB2, 0xB2,
1902 0xB7, 0xB7, 0xF7, 0xF7, 0x401, 0x401, 0x410, 0x44F, 0x451, 0x451,
1903 0x2219, 0x221A, 0x2248, 0x2248, 0x2264, 0x2265, 0x2320, 0x2321,
1904 0x2500, 0x2500, 0x2502, 0x2502, 0x250C, 0x250C, 0x2510, 0x2510,
1905 0x2514, 0x2514, 0x2518, 0x2518, 0x251C, 0x251C, 0x2524, 0x2524,
1906 0x252C, 0x252C, 0x2534, 0x2534, 0x253C, 0x253C, 0x2550, 0x256C,
1907 0x2580, 0x2580, 0x2584, 0x2584, 0x2588, 0x2588, 0x258C, 0x258C,
1908 0x2590, 0x2593, 0x25A0, 0x25A0, sal_uInt32(-1) };
1909 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT>
1910 // version 1.0 of 18 August 1999
1911
1912 #if defined WNT
1913 static const sal_uInt32 aWindows1252Ranges[]
1914 = { 0, 0x7F, 0xA0, 0xFF, 0x152, 0x153, 0x160, 0x161, 0x178, 0x178,
1915 0x17D, 0x17E, 0x192, 0x192, 0x2C6, 0x2C6, 0x2DC, 0x2DC,
1916 0x2013, 0x2014, 0x2018, 0x201A, 0x201C, 0x201E, 0x2020, 0x2022,
1917 0x2026, 0x2026, 0x2030, 0x2030, 0x2039, 0x203A, 0x20AC, 0x20AC,
1918 0x2122, 0x2122, sal_uInt32(-1) };
1919 // <ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
1920 // CP1252.TXT> version 2.01 of 04/15/98
1921 #endif // WNT
1922
1923 INetMIMECharsetList_Impl * pList = new INetMIMECharsetList_Impl;
1924 switch (eEncoding)
1925 {
1926 case RTL_TEXTENCODING_MS_1252:
1927 #if defined WNT
1928 pList->prepend(Charset(RTL_TEXTENCODING_MS_1252,
1929 aWindows1252Ranges));
1930 #endif // WNT
1931 case RTL_TEXTENCODING_ISO_8859_1:
1932 case RTL_TEXTENCODING_UTF7:
1933 case RTL_TEXTENCODING_UTF8:
1934 break;
1935
1936 case RTL_TEXTENCODING_ISO_8859_2:
1937 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1938 aISO88592Ranges));
1939 break;
1940
1941 case RTL_TEXTENCODING_ISO_8859_3:
1942 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_3,
1943 aISO88593Ranges));
1944 break;
1945
1946 case RTL_TEXTENCODING_ISO_8859_4:
1947 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
1948 aISO88594Ranges));
1949 break;
1950
1951 case RTL_TEXTENCODING_ISO_8859_5:
1952 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
1953 aISO88595Ranges));
1954 break;
1955
1956 case RTL_TEXTENCODING_ISO_8859_6:
1957 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
1958 aISO88596Ranges));
1959 break;
1960
1961 case RTL_TEXTENCODING_ISO_8859_7:
1962 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
1963 aISO88597Ranges));
1964 break;
1965
1966 case RTL_TEXTENCODING_ISO_8859_8:
1967 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
1968 aISO88598Ranges));
1969 break;
1970
1971 case RTL_TEXTENCODING_ISO_8859_9:
1972 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
1973 aISO88599Ranges));
1974 break;
1975
1976 case RTL_TEXTENCODING_ISO_8859_10:
1977 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_10,
1978 aISO885910Ranges));
1979 break;
1980
1981 case RTL_TEXTENCODING_ISO_8859_13:
1982 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_13,
1983 aISO885913Ranges));
1984 break;
1985
1986 case RTL_TEXTENCODING_ISO_8859_14:
1987 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_14,
1988 aISO885914Ranges));
1989 break;
1990
1991 case RTL_TEXTENCODING_ISO_8859_15:
1992 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_15,
1993 aISO885915Ranges));
1994 break;
1995
1996 case RTL_TEXTENCODING_MS_1250:
1997 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_2,
1998 aISO88592Ranges));
1999 break;
2000
2001 case RTL_TEXTENCODING_MS_1251:
2002 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2003 aISO88595Ranges));
2004 break;
2005
2006 case RTL_TEXTENCODING_MS_1253:
2007 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_7,
2008 aISO88597Ranges));
2009 break;
2010
2011 case RTL_TEXTENCODING_MS_1254:
2012 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_9,
2013 aISO88599Ranges));
2014 break;
2015
2016 case RTL_TEXTENCODING_MS_1255:
2017 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_8,
2018 aISO88598Ranges));
2019 break;
2020
2021 case RTL_TEXTENCODING_MS_1256:
2022 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_6,
2023 aISO88596Ranges));
2024 break;
2025
2026 case RTL_TEXTENCODING_MS_1257:
2027 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_4,
2028 aISO88594Ranges));
2029 break;
2030
2031 case RTL_TEXTENCODING_KOI8_R:
2032 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_5,
2033 aISO88595Ranges));
2034 pList->prepend(Charset(RTL_TEXTENCODING_KOI8_R, aKOI8RRanges));
2035 break;
2036
2037 default: //@@@ more cases are missing!
2038 DBG_ERROR("INetMIME::createPreferredCharsetList():"
2039 " Unsupported encoding");
2040 break;
2041 }
2042 pList->prepend(Charset(RTL_TEXTENCODING_ISO_8859_1, aISO88591Ranges));
2043 pList->prepend(Charset(RTL_TEXTENCODING_ASCII_US, aUSASCIIRanges));
2044 return pList;
2045 }
2046
2047 //============================================================================
2048 // static
convertToUnicode(const sal_Char * pBegin,const sal_Char * pEnd,rtl_TextEncoding eEncoding,sal_Size & rSize)2049 sal_Unicode * INetMIME::convertToUnicode(const sal_Char * pBegin,
2050 const sal_Char * pEnd,
2051 rtl_TextEncoding eEncoding,
2052 sal_Size & rSize)
2053 {
2054 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2055 return 0;
2056 rtl_TextToUnicodeConverter hConverter
2057 = rtl_createTextToUnicodeConverter(eEncoding);
2058 rtl_TextToUnicodeContext hContext
2059 = rtl_createTextToUnicodeContext(hConverter);
2060 sal_Unicode * pBuffer;
2061 sal_uInt32 nInfo;
2062 for (sal_Size nBufferSize = pEnd - pBegin;;
2063 nBufferSize += nBufferSize / 3 + 1)
2064 {
2065 pBuffer = new sal_Unicode[nBufferSize];
2066 sal_Size nSrcCvtBytes;
2067 rSize = rtl_convertTextToUnicode(
2068 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2069 nBufferSize,
2070 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
2071 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
2072 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
2073 &nInfo, &nSrcCvtBytes);
2074 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
2075 break;
2076 delete[] pBuffer;
2077 rtl_resetTextToUnicodeContext(hConverter, hContext);
2078 }
2079 rtl_destroyTextToUnicodeContext(hConverter, hContext);
2080 rtl_destroyTextToUnicodeConverter(hConverter);
2081 if (nInfo != 0)
2082 {
2083 delete[] pBuffer;
2084 pBuffer = 0;
2085 }
2086 return pBuffer;
2087 }
2088
2089 //============================================================================
2090 // static
convertFromUnicode(const sal_Unicode * pBegin,const sal_Unicode * pEnd,rtl_TextEncoding eEncoding,sal_Size & rSize)2091 sal_Char * INetMIME::convertFromUnicode(const sal_Unicode * pBegin,
2092 const sal_Unicode * pEnd,
2093 rtl_TextEncoding eEncoding,
2094 sal_Size & rSize)
2095 {
2096 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
2097 return 0;
2098 rtl_UnicodeToTextConverter hConverter
2099 = rtl_createUnicodeToTextConverter(eEncoding);
2100 rtl_UnicodeToTextContext hContext
2101 = rtl_createUnicodeToTextContext(hConverter);
2102 sal_Char * pBuffer;
2103 sal_uInt32 nInfo;
2104 for (sal_Size nBufferSize = pEnd - pBegin;;
2105 nBufferSize += nBufferSize / 3 + 1)
2106 {
2107 pBuffer = new sal_Char[nBufferSize];
2108 sal_Size nSrcCvtBytes;
2109 rSize = rtl_convertUnicodeToText(
2110 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
2111 nBufferSize,
2112 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
2113 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
2114 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
2115 | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
2116 &nInfo, &nSrcCvtBytes);
2117 if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
2118 break;
2119 delete[] pBuffer;
2120 rtl_resetUnicodeToTextContext(hConverter, hContext);
2121 }
2122 rtl_destroyUnicodeToTextContext(hConverter, hContext);
2123 rtl_destroyUnicodeToTextConverter(hConverter);
2124 if (nInfo != 0)
2125 {
2126 delete[] pBuffer;
2127 pBuffer = 0;
2128 }
2129 return pBuffer;
2130 }
2131
2132 //============================================================================
2133 // static
writeUTF8(INetMIMEOutputSink & rSink,sal_uInt32 nChar)2134 void INetMIME::writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
2135 {
2136 // See RFC 2279 for a discussion of UTF-8.
2137 DBG_ASSERT(nChar < 0x80000000, "INetMIME::writeUTF8(): Bad char");
2138
2139 if (nChar < 0x80)
2140 rSink << sal_Char(nChar);
2141 else if (nChar < 0x800)
2142 rSink << sal_Char(nChar >> 6 | 0xC0)
2143 << sal_Char((nChar & 0x3F) | 0x80);
2144 else if (nChar < 0x10000)
2145 rSink << sal_Char(nChar >> 12 | 0xE0)
2146 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2147 << sal_Char((nChar & 0x3F) | 0x80);
2148 else if (nChar < 0x200000)
2149 rSink << sal_Char(nChar >> 18 | 0xF0)
2150 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2151 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2152 << sal_Char((nChar & 0x3F) | 0x80);
2153 else if (nChar < 0x4000000)
2154 rSink << sal_Char(nChar >> 24 | 0xF8)
2155 << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2156 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2157 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2158 << sal_Char((nChar & 0x3F) | 0x80);
2159 else
2160 rSink << sal_Char(nChar >> 30 | 0xFC)
2161 << sal_Char((nChar >> 24 & 0x3F) | 0x80)
2162 << sal_Char((nChar >> 18 & 0x3F) | 0x80)
2163 << sal_Char((nChar >> 12 & 0x3F) | 0x80)
2164 << sal_Char((nChar >> 6 & 0x3F) | 0x80)
2165 << sal_Char((nChar & 0x3F) | 0x80);
2166 }
2167
2168 //============================================================================
2169 // static
writeUnsigned(INetMIMEOutputSink & rSink,sal_uInt32 nValue,int nMinDigits)2170 void INetMIME::writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
2171 int nMinDigits)
2172 {
2173 sal_Char aBuffer[10];
2174 // max unsigned 32 bit value (4294967295) has 10 places
2175 sal_Char * p = aBuffer;
2176 for (; nValue > 0; nValue /= 10)
2177 *p++ = sal_Char(getDigit(nValue % 10));
2178 nMinDigits -= p - aBuffer;
2179 while (nMinDigits-- > 0)
2180 rSink << '0';
2181 while (p != aBuffer)
2182 rSink << *--p;
2183 }
2184
2185 //============================================================================
2186 // static
writeDateTime(INetMIMEOutputSink & rSink,const DateTime & rUTC)2187 void INetMIME::writeDateTime(INetMIMEOutputSink & rSink,
2188 const DateTime & rUTC)
2189 {
2190 static const sal_Char aDay[7][3]
2191 = { { 'M', 'o', 'n' },
2192 { 'T', 'u', 'e' },
2193 { 'W', 'e', 'd' },
2194 { 'T', 'h', 'u' },
2195 { 'F', 'r', 'i' },
2196 { 'S', 'a', 't' },
2197 { 'S', 'u', 'n' } };
2198 const sal_Char * pTheDay = aDay[rUTC.GetDayOfWeek()];
2199 rSink.write(pTheDay, pTheDay + 3);
2200 rSink << ", ";
2201 writeUnsigned(rSink, rUTC.GetDay());
2202 rSink << ' ';
2203 static const sal_Char aMonth[12][3]
2204 = { { 'J', 'a', 'n' },
2205 { 'F', 'e', 'b' },
2206 { 'M', 'a', 'r' },
2207 { 'A', 'p', 'r' },
2208 { 'M', 'a', 'y' },
2209 { 'J', 'u', 'n' },
2210 { 'J', 'u', 'l' },
2211 { 'A', 'u', 'g' },
2212 { 'S', 'e', 'p' },
2213 { 'O', 'c', 't' },
2214 { 'N', 'o', 'v' },
2215 { 'D', 'e', 'c' } };
2216 const sal_Char * pTheMonth = aMonth[rUTC.GetMonth() - 1];
2217 rSink.write(pTheMonth, pTheMonth + 3);
2218 rSink << ' ';
2219 writeUnsigned(rSink, rUTC.GetYear());
2220 rSink << ' ';
2221 writeUnsigned(rSink, rUTC.GetHour(), 2);
2222 rSink << ':';
2223 writeUnsigned(rSink, rUTC.GetMin(), 2);
2224 rSink << ':';
2225 writeUnsigned(rSink, rUTC.GetSec(), 2);
2226 rSink << " +0000";
2227 }
2228
2229 //============================================================================
2230 // static
writeHeaderFieldBody(INetMIMEOutputSink & rSink,HeaderFieldType eType,const ByteString & rBody,rtl_TextEncoding ePreferredEncoding,bool bInitialSpace)2231 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2232 HeaderFieldType eType,
2233 const ByteString & rBody,
2234 rtl_TextEncoding ePreferredEncoding,
2235 bool bInitialSpace)
2236 {
2237 writeHeaderFieldBody(rSink, eType,
2238 UniString(rBody, RTL_TEXTENCODING_UTF8),
2239 ePreferredEncoding, bInitialSpace);
2240 }
2241
2242 //============================================================================
2243 // static
writeHeaderFieldBody(INetMIMEOutputSink & rSink,HeaderFieldType eType,const UniString & rBody,rtl_TextEncoding ePreferredEncoding,bool bInitialSpace)2244 void INetMIME::writeHeaderFieldBody(INetMIMEOutputSink & rSink,
2245 HeaderFieldType eType,
2246 const UniString & rBody,
2247 rtl_TextEncoding ePreferredEncoding,
2248 bool bInitialSpace)
2249 {
2250 if (eType == HEADER_FIELD_TEXT)
2251 {
2252 INetMIMEEncodedWordOutputSink
2253 aOutput(rSink, INetMIMEEncodedWordOutputSink::CONTEXT_TEXT,
2254 bInitialSpace ?
2255 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2256 INetMIMEEncodedWordOutputSink::SPACE_NO,
2257 ePreferredEncoding);
2258 aOutput.write(rBody.GetBuffer(), rBody.GetBuffer() + rBody.Len());
2259 aOutput.flush();
2260 }
2261 else
2262 {
2263 enum Brackets { BRACKETS_OUTSIDE, BRACKETS_OPENING, BRACKETS_INSIDE };
2264 Brackets eBrackets = BRACKETS_OUTSIDE;
2265
2266 const sal_Unicode * pBodyPtr = rBody.GetBuffer();
2267 const sal_Unicode * pBodyEnd = pBodyPtr + rBody.Len();
2268 while (pBodyPtr != pBodyEnd)
2269 switch (*pBodyPtr)
2270 {
2271 case '\t':
2272 case ' ':
2273 // A WSP adds to accumulated space:
2274 bInitialSpace = true;
2275 ++pBodyPtr;
2276 break;
2277
2278 case '(':
2279 {
2280 // Write a pending '<' if necessary:
2281 if (eBrackets == BRACKETS_OPENING)
2282 {
2283 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2284 >= rSink.getLineLengthLimit())
2285 rSink << INetMIMEOutputSink::endl << ' ';
2286 else if (bInitialSpace)
2287 rSink << ' ';
2288 rSink << '<';
2289 bInitialSpace = false;
2290 eBrackets = BRACKETS_INSIDE;
2291 }
2292
2293 // Write the comment, introducing encoded-words where
2294 // necessary:
2295 int nLevel = 0;
2296 INetMIMEEncodedWordOutputSink
2297 aOutput(
2298 rSink,
2299 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT,
2300 INetMIMEEncodedWordOutputSink::SPACE_NO,
2301 ePreferredEncoding);
2302 while (pBodyPtr != pBodyEnd)
2303 switch (*pBodyPtr)
2304 {
2305 case '(':
2306 aOutput.flush();
2307 if (rSink.getColumn()
2308 + (bInitialSpace ? 1 : 0)
2309 >= rSink.getLineLengthLimit())
2310 rSink << INetMIMEOutputSink::endl << ' ';
2311 else if (bInitialSpace)
2312 rSink << ' ';
2313 rSink << '(';
2314 bInitialSpace = false;
2315 ++nLevel;
2316 ++pBodyPtr;
2317 break;
2318
2319 case ')':
2320 aOutput.flush();
2321 if (rSink.getColumn()
2322 >= rSink.getLineLengthLimit())
2323 rSink << INetMIMEOutputSink::endl << ' ';
2324 rSink << ')';
2325 ++pBodyPtr;
2326 if (--nLevel == 0)
2327 goto comment_done;
2328 break;
2329
2330 case '\\':
2331 if (++pBodyPtr == pBodyEnd)
2332 break;
2333 default:
2334 aOutput << *pBodyPtr++;
2335 break;
2336 }
2337 comment_done:
2338 break;
2339 }
2340
2341 case '<':
2342 // Write an already pending '<' if necessary:
2343 if (eBrackets == BRACKETS_OPENING)
2344 {
2345 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2346 >= rSink.getLineLengthLimit())
2347 rSink << INetMIMEOutputSink::endl << ' ';
2348 else if (bInitialSpace)
2349 rSink << ' ';
2350 rSink << '<';
2351 bInitialSpace = false;
2352 }
2353
2354 // Remember this '<' as pending, and open a bracketed
2355 // block:
2356 eBrackets = BRACKETS_OPENING;
2357 ++pBodyPtr;
2358 break;
2359
2360 case '>':
2361 // Write a pending '<' if necessary:
2362 if (eBrackets == BRACKETS_OPENING)
2363 {
2364 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2365 >= rSink.getLineLengthLimit())
2366 rSink << INetMIMEOutputSink::endl << ' ';
2367 else if (bInitialSpace)
2368 rSink << ' ';
2369 rSink << '<';
2370 bInitialSpace = false;
2371 }
2372
2373 // Write this '>', and close any bracketed block:
2374 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2375 >= rSink.getLineLengthLimit())
2376 rSink << INetMIMEOutputSink::endl << ' ';
2377 else if (bInitialSpace)
2378 rSink << ' ';
2379 rSink << '>';
2380 bInitialSpace = false;
2381 eBrackets = BRACKETS_OUTSIDE;
2382 ++pBodyPtr;
2383 break;
2384
2385 case ',':
2386 case ':':
2387 case ';':
2388 case '\\':
2389 case ']':
2390 // Write a pending '<' if necessary:
2391 if (eBrackets == BRACKETS_OPENING)
2392 {
2393 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2394 >= rSink.getLineLengthLimit())
2395 rSink << INetMIMEOutputSink::endl << ' ';
2396 else if (bInitialSpace)
2397 rSink << ' ';
2398 rSink << '<';
2399 bInitialSpace = false;
2400 eBrackets = BRACKETS_INSIDE;
2401 }
2402
2403 // Write this specials:
2404 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2405 >= rSink.getLineLengthLimit())
2406 rSink << INetMIMEOutputSink::endl << ' ';
2407 else if (bInitialSpace)
2408 rSink << ' ';
2409 rSink << sal_Char(*pBodyPtr++);
2410 bInitialSpace = false;
2411 break;
2412
2413 case '\x0D': // CR
2414 // A <CRLF WSP> adds to accumulated space, a <CR> not
2415 // followed by <LF WSP> starts 'junk':
2416 if (startsWithLineFolding(pBodyPtr, pBodyEnd))
2417 {
2418 bInitialSpace = true;
2419 pBodyPtr += 3;
2420 break;
2421 }
2422 default:
2423 {
2424 // The next token is either one of <"." / "@" / atom /
2425 // quoted-string / domain-literal>, or it's 'junk'; if it
2426 // is not 'junk', it is either a 'phrase' (i.e., it may
2427 // contain encoded-words) or a 'non-phrase' (i.e., it may
2428 // not contain encoded-words):
2429 enum Entity { ENTITY_JUNK, ENTITY_NON_PHRASE,
2430 ENTITY_PHRASE };
2431 Entity eEntity = ENTITY_JUNK;
2432 switch (*pBodyPtr)
2433 {
2434 case '.':
2435 case '@':
2436 case '[':
2437 // A token of <"." / "@" / domain-literal> always
2438 // starts a 'non-phrase':
2439 eEntity = ENTITY_NON_PHRASE;
2440 break;
2441
2442 default:
2443 if (isUSASCII(*pBodyPtr)
2444 && !isAtomChar(*pBodyPtr))
2445 {
2446 eEntity = ENTITY_JUNK;
2447 break;
2448 }
2449 case '"':
2450 // A token of <atom / quoted-string> can either be
2451 // a 'phrase' or a 'non-phrase':
2452 switch (eType)
2453 {
2454 case HEADER_FIELD_STRUCTURED:
2455 eEntity = ENTITY_NON_PHRASE;
2456 break;
2457
2458 case HEADER_FIELD_PHRASE:
2459 eEntity = ENTITY_PHRASE;
2460 break;
2461
2462 case HEADER_FIELD_MESSAGE_ID:
2463 // A 'phrase' if and only if outside any
2464 // bracketed block:
2465 eEntity
2466 = eBrackets == BRACKETS_OUTSIDE ?
2467 ENTITY_PHRASE :
2468 ENTITY_NON_PHRASE;
2469 break;
2470
2471 case HEADER_FIELD_ADDRESS:
2472 {
2473 // A 'non-phrase' if and only if, after
2474 // skipping this token and any following
2475 // <linear-white-space> and <comment>s,
2476 // there is no token left, or the next
2477 // token is any of <"." / "@" / ">" / ","
2478 // / ";">, or the next token is <":"> and
2479 // is within a bracketed block:
2480 const sal_Unicode * pLookAhead = pBodyPtr;
2481 if (*pLookAhead == '"')
2482 {
2483 pLookAhead
2484 = skipQuotedString(pLookAhead,
2485 pBodyEnd);
2486 if (pLookAhead == pBodyPtr)
2487 pLookAhead = pBodyEnd;
2488 }
2489 else
2490 while (pLookAhead != pBodyEnd
2491 && (isAtomChar(*pLookAhead)
2492 || !isUSASCII(
2493 *pLookAhead)))
2494 ++pLookAhead;
2495 while (pLookAhead != pBodyEnd)
2496 switch (*pLookAhead)
2497 {
2498 case '\t':
2499 case ' ':
2500 ++pLookAhead;
2501 break;
2502
2503 case '(':
2504 {
2505 const sal_Unicode * pPast
2506 = skipComment(pLookAhead,
2507 pBodyEnd);
2508 pLookAhead
2509 = pPast == pLookAhead ?
2510 pBodyEnd : pPast;
2511 break;
2512 }
2513
2514 case ',':
2515 case '.':
2516 case ';':
2517 case '>':
2518 case '@':
2519 eEntity = ENTITY_NON_PHRASE;
2520 goto entity_determined;
2521
2522 case ':':
2523 eEntity
2524 = eBrackets
2525 == BRACKETS_OUTSIDE ?
2526 ENTITY_PHRASE :
2527 ENTITY_NON_PHRASE;
2528 goto entity_determined;
2529
2530 case '\x0D': // CR
2531 if (startsWithLineFolding(
2532 pLookAhead, pBodyEnd))
2533 {
2534 pLookAhead += 3;
2535 break;
2536 }
2537 default:
2538 eEntity = ENTITY_PHRASE;
2539 goto entity_determined;
2540 }
2541 eEntity = ENTITY_NON_PHRASE;
2542 entity_determined:
2543 break;
2544 }
2545
2546 case HEADER_FIELD_TEXT:
2547 OSL_ASSERT(false);
2548 break;
2549 }
2550
2551 // In a 'non-phrase', a non-US-ASCII character
2552 // cannot be part of an <atom>, but instead the
2553 // whole entity is 'junk' rather than 'non-
2554 // phrase':
2555 if (eEntity == ENTITY_NON_PHRASE
2556 && !isUSASCII(*pBodyPtr))
2557 eEntity = ENTITY_JUNK;
2558 break;
2559 }
2560
2561 switch (eEntity)
2562 {
2563 case ENTITY_JUNK:
2564 {
2565 // Write a pending '<' if necessary:
2566 if (eBrackets == BRACKETS_OPENING)
2567 {
2568 if (rSink.getColumn()
2569 + (bInitialSpace ? 1 : 0)
2570 >= rSink.getLineLengthLimit())
2571 rSink << INetMIMEOutputSink::endl << ' ';
2572 else if (bInitialSpace)
2573 rSink << ' ';
2574 rSink << '<';
2575 bInitialSpace = false;
2576 eBrackets = BRACKETS_INSIDE;
2577 }
2578
2579 // Calculate the length of in- and output:
2580 const sal_Unicode * pStart = pBodyPtr;
2581 sal_Size nLength = 0;
2582 bool bModify = false;
2583 bool bEnd = false;
2584 while (pBodyPtr != pBodyEnd && !bEnd)
2585 switch (*pBodyPtr)
2586 {
2587 case '\x0D': // CR
2588 if (startsWithLineFolding(pBodyPtr,
2589 pBodyEnd))
2590 bEnd = true;
2591 else if (startsWithLineBreak(
2592 pBodyPtr, pBodyEnd))
2593 {
2594 nLength += 3;
2595 bModify = true;
2596 pBodyPtr += 2;
2597 }
2598 else
2599 {
2600 ++nLength;
2601 ++pBodyPtr;
2602 }
2603 break;
2604
2605 case '\t':
2606 case ' ':
2607 bEnd = true;
2608 break;
2609
2610 default:
2611 if (isVisible(*pBodyPtr))
2612 bEnd = true;
2613 else if (isUSASCII(*pBodyPtr))
2614 {
2615 ++nLength;
2616 ++pBodyPtr;
2617 }
2618 else
2619 {
2620 nLength += getUTF8OctetCount(
2621 *pBodyPtr++);
2622 bModify = true;
2623 }
2624 break;
2625 }
2626
2627 // Write the output:
2628 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2629 + nLength
2630 > rSink.getLineLengthLimit())
2631 rSink << INetMIMEOutputSink::endl << ' ';
2632 else if (bInitialSpace)
2633 rSink << ' ';
2634 bInitialSpace = false;
2635 if (bModify)
2636 while (pStart != pBodyPtr)
2637 if (startsWithLineBreak(pStart, pBodyPtr))
2638 {
2639 rSink << "\x0D\\\x0A"; // CR, '\', LF
2640 pStart += 2;
2641 }
2642 else
2643 writeUTF8(rSink, *pStart++);
2644 else
2645 rSink.write(pStart, pBodyPtr);
2646 break;
2647 }
2648
2649 case ENTITY_NON_PHRASE:
2650 {
2651 // Calculate the length of in- and output:
2652 const sal_Unicode * pStart = pBodyPtr;
2653 sal_Size nLength = 0;
2654 bool bBracketedBlock = false;
2655 bool bSymbol = *pStart != '.' && *pStart != '@';
2656 bool bModify = false;
2657 bool bEnd = false;
2658 while (pBodyPtr != pBodyEnd && !bEnd)
2659 switch (*pBodyPtr)
2660 {
2661 case '\t':
2662 case ' ':
2663 case '\x0D': // CR
2664 {
2665 const sal_Unicode * pLookAhead
2666 = skipLinearWhiteSpace(pBodyPtr,
2667 pBodyEnd);
2668 if (pLookAhead < pBodyEnd
2669 && (bSymbol ?
2670 isAtomChar(*pLookAhead)
2671 || *pLookAhead == '"'
2672 || *pLookAhead == '[' :
2673 *pLookAhead == '.'
2674 || *pLookAhead == '@'
2675 || (*pLookAhead == '>'
2676 && eType
2677 >= HEADER_FIELD_MESSAGE_ID
2678 && eBrackets
2679 == BRACKETS_OPENING)))
2680 {
2681 bModify = true;
2682 pBodyPtr = pLookAhead;
2683 }
2684 else
2685 bEnd = true;
2686 break;
2687 }
2688
2689 case '"':
2690 if (bSymbol)
2691 {
2692 pBodyPtr
2693 = scanQuotedBlock(pBodyPtr,
2694 pBodyEnd,
2695 '"', '"',
2696 nLength,
2697 bModify);
2698 bSymbol = false;
2699 }
2700 else
2701 bEnd = true;
2702 break;
2703
2704 case '[':
2705 if (bSymbol)
2706 {
2707 pBodyPtr
2708 = scanQuotedBlock(pBodyPtr,
2709 pBodyEnd,
2710 '[', ']',
2711 nLength,
2712 bModify);
2713 bSymbol = false;
2714 }
2715 else
2716 bEnd = true;
2717 break;
2718
2719 case '.':
2720 case '@':
2721 if (bSymbol)
2722 bEnd = true;
2723 else
2724 {
2725 ++nLength;
2726 bSymbol = true;
2727 ++pBodyPtr;
2728 }
2729 break;
2730
2731 case '>':
2732 if (eBrackets == BRACKETS_OPENING
2733 && eType
2734 >= HEADER_FIELD_MESSAGE_ID)
2735 {
2736 ++nLength;
2737 bBracketedBlock = true;
2738 ++pBodyPtr;
2739 }
2740 bEnd = true;
2741 break;
2742
2743 default:
2744 if (isAtomChar(*pBodyPtr) && bSymbol)
2745 {
2746 while (pBodyPtr != pBodyEnd
2747 && isAtomChar(*pBodyPtr))
2748 {
2749 ++nLength;
2750 ++pBodyPtr;
2751 }
2752 bSymbol = false;
2753 }
2754 else
2755 {
2756 if (!isUSASCII(*pBodyPtr))
2757 bModify = true;
2758 bEnd = true;
2759 }
2760 break;
2761 }
2762
2763 // Write a pending '<' if necessary:
2764 if (eBrackets == BRACKETS_OPENING
2765 && !bBracketedBlock)
2766 {
2767 if (rSink.getColumn()
2768 + (bInitialSpace ? 1 : 0)
2769 >= rSink.getLineLengthLimit())
2770 rSink << INetMIMEOutputSink::endl << ' ';
2771 else if (bInitialSpace)
2772 rSink << ' ';
2773 rSink << '<';
2774 bInitialSpace = false;
2775 eBrackets = BRACKETS_INSIDE;
2776 }
2777
2778 // Write the output:
2779 if (rSink.getColumn() + (bInitialSpace ? 1 : 0)
2780 + nLength
2781 > rSink.getLineLengthLimit())
2782 rSink << INetMIMEOutputSink::endl << ' ';
2783 else if (bInitialSpace)
2784 rSink << ' ';
2785 bInitialSpace = false;
2786 if (bBracketedBlock)
2787 {
2788 rSink << '<';
2789 eBrackets = BRACKETS_OUTSIDE;
2790 }
2791 if (bModify)
2792 {
2793 enum Mode { MODE_PLAIN, MODE_QUOTED_STRING,
2794 MODE_DOMAIN_LITERAL };
2795 Mode eMode = MODE_PLAIN;
2796 while (pStart != pBodyPtr)
2797 switch (*pStart)
2798 {
2799 case '\x0D': // CR
2800 if (startsWithLineFolding(
2801 pStart, pBodyPtr))
2802 {
2803 if (eMode != MODE_PLAIN)
2804 rSink << sal_Char(
2805 pStart[2]);
2806 pStart += 3;
2807 }
2808 else if (startsWithLineBreak(
2809 pStart, pBodyPtr))
2810 {
2811 rSink << "\x0D\\\x0A";
2812 // CR, '\', LF
2813 pStart += 2;
2814 }
2815 else
2816 {
2817 rSink << '\x0D'; // CR
2818 ++pStart;
2819 }
2820 break;
2821
2822 case '\t':
2823 case ' ':
2824 if (eMode != MODE_PLAIN)
2825 rSink << sal_Char(*pStart);
2826 ++pStart;
2827 break;
2828
2829 case '"':
2830 if (eMode == MODE_PLAIN)
2831 eMode = MODE_QUOTED_STRING;
2832 else if (eMode
2833 == MODE_QUOTED_STRING)
2834 eMode = MODE_PLAIN;
2835 rSink << '"';
2836 ++pStart;
2837 break;
2838
2839 case '[':
2840 if (eMode == MODE_PLAIN)
2841 eMode = MODE_DOMAIN_LITERAL;
2842 rSink << '[';
2843 ++pStart;
2844 break;
2845
2846 case ']':
2847 if (eMode == MODE_DOMAIN_LITERAL)
2848 eMode = MODE_PLAIN;
2849 rSink << ']';
2850 ++pStart;
2851 break;
2852
2853 case '\\':
2854 rSink << '\\';
2855 if (++pStart < pBodyPtr)
2856 writeUTF8(rSink, *pStart++);
2857 break;
2858
2859 default:
2860 writeUTF8(rSink, *pStart++);
2861 break;
2862 }
2863 }
2864 else
2865 rSink.write(pStart, pBodyPtr);
2866 break;
2867 }
2868
2869 case ENTITY_PHRASE:
2870 {
2871 // Write a pending '<' if necessary:
2872 if (eBrackets == BRACKETS_OPENING)
2873 {
2874 if (rSink.getColumn()
2875 + (bInitialSpace ? 1 : 0)
2876 >= rSink.getLineLengthLimit())
2877 rSink << INetMIMEOutputSink::endl << ' ';
2878 else if (bInitialSpace)
2879 rSink << ' ';
2880 rSink << '<';
2881 bInitialSpace = false;
2882 eBrackets = BRACKETS_INSIDE;
2883 }
2884
2885 // Calculate the length of in- and output:
2886 const sal_Unicode * pStart = pBodyPtr;
2887 bool bQuotedString = false;
2888 bool bEnd = false;
2889 while (pBodyPtr != pBodyEnd && !bEnd)
2890 switch (*pBodyPtr)
2891 {
2892 case '\t':
2893 case ' ':
2894 case '\x0D': // CR
2895 if (bQuotedString)
2896 ++pBodyPtr;
2897 else
2898 {
2899 const sal_Unicode * pLookAhead
2900 = skipLinearWhiteSpace(
2901 pBodyPtr, pBodyEnd);
2902 if (pLookAhead != pBodyEnd
2903 && (isAtomChar(*pLookAhead)
2904 || !isUSASCII(*pLookAhead)
2905 || *pLookAhead == '"'))
2906 pBodyPtr = pLookAhead;
2907 else
2908 bEnd = true;
2909 }
2910 break;
2911
2912 case '"':
2913 bQuotedString = !bQuotedString;
2914 ++pBodyPtr;
2915 break;
2916
2917 case '\\':
2918 if (bQuotedString)
2919 {
2920 if (++pBodyPtr != pBodyEnd)
2921 ++pBodyPtr;
2922 }
2923 else
2924 bEnd = true;
2925 break;
2926
2927 default:
2928 if (bQuotedString
2929 || isAtomChar(*pBodyPtr)
2930 || !isUSASCII(*pBodyPtr))
2931 ++pBodyPtr;
2932 else
2933 bEnd = true;
2934 break;
2935 }
2936
2937 // Write the phrase, introducing encoded-words
2938 // where necessary:
2939 INetMIMEEncodedWordOutputSink
2940 aOutput(
2941 rSink,
2942 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE,
2943 bInitialSpace ?
2944 INetMIMEEncodedWordOutputSink::SPACE_ALWAYS :
2945 INetMIMEEncodedWordOutputSink::SPACE_ENCODED,
2946 ePreferredEncoding);
2947 while (pStart != pBodyPtr)
2948 switch (*pStart)
2949 {
2950 case '"':
2951 ++pStart;
2952 break;
2953
2954 case '\\':
2955 if (++pStart != pBodyPtr)
2956 aOutput << *pStart++;
2957 break;
2958
2959 case '\x0D': // CR
2960 pStart += 2;
2961 aOutput << *pStart++;
2962 break;
2963
2964 default:
2965 aOutput << *pStart++;
2966 break;
2967 }
2968 bInitialSpace = aOutput.flush();
2969 break;
2970 }
2971 }
2972 break;
2973 }
2974 }
2975 }
2976 }
2977
2978 //============================================================================
2979 // static
translateUTF8Char(const sal_Char * & rBegin,const sal_Char * pEnd,rtl_TextEncoding eEncoding,sal_uInt32 & rCharacter)2980 bool INetMIME::translateUTF8Char(const sal_Char *& rBegin,
2981 const sal_Char * pEnd,
2982 rtl_TextEncoding eEncoding,
2983 sal_uInt32 & rCharacter)
2984 {
2985 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
2986 || static_cast< unsigned char >(*rBegin) >= 0xFE)
2987 return false;
2988
2989 int nCount;
2990 sal_uInt32 nMin;
2991 sal_uInt32 nUCS4;
2992 const sal_Char * p = rBegin;
2993 if (static_cast< unsigned char >(*p) < 0xE0)
2994 {
2995 nCount = 1;
2996 nMin = 0x80;
2997 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
2998 }
2999 else if (static_cast< unsigned char >(*p) < 0xF0)
3000 {
3001 nCount = 2;
3002 nMin = 0x800;
3003 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
3004 }
3005 else if (static_cast< unsigned char >(*p) < 0xF8)
3006 {
3007 nCount = 3;
3008 nMin = 0x10000;
3009 nUCS4 = static_cast< unsigned char >(*p) & 7;
3010 }
3011 else if (static_cast< unsigned char >(*p) < 0xFC)
3012 {
3013 nCount = 4;
3014 nMin = 0x200000;
3015 nUCS4 = static_cast< unsigned char >(*p) & 3;
3016 }
3017 else
3018 {
3019 nCount = 5;
3020 nMin = 0x4000000;
3021 nUCS4 = static_cast< unsigned char >(*p) & 1;
3022 }
3023 ++p;
3024
3025 for (; nCount-- > 0; ++p)
3026 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
3027 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
3028 else
3029 return false;
3030
3031 if (nUCS4 < nMin || nUCS4 > 0x10FFFF)
3032 return false;
3033
3034 if (eEncoding >= RTL_TEXTENCODING_UCS4)
3035 rCharacter = nUCS4;
3036 else
3037 {
3038 sal_Unicode aUTF16[2];
3039 const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
3040 sal_Size nSize;
3041 sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
3042 nSize);
3043 if (!pBuffer)
3044 return false;
3045 DBG_ASSERT(nSize == 1,
3046 "INetMIME::translateUTF8Char(): Bad conversion");
3047 rCharacter = *pBuffer;
3048 delete[] pBuffer;
3049 }
3050 rBegin = p;
3051 return true;
3052 }
3053
3054 //============================================================================
3055 // static
decodeUTF8(const ByteString & rText,rtl_TextEncoding eEncoding)3056 ByteString INetMIME::decodeUTF8(const ByteString & rText,
3057 rtl_TextEncoding eEncoding)
3058 {
3059 const sal_Char * p = rText.GetBuffer();
3060 const sal_Char * pEnd = p + rText.Len();
3061 ByteString sDecoded;
3062 while (p != pEnd)
3063 {
3064 sal_uInt32 nCharacter;
3065 if (translateUTF8Char(p, pEnd, eEncoding, nCharacter))
3066 sDecoded += sal_Char(nCharacter);
3067 else
3068 sDecoded += sal_Char(*p++);
3069 }
3070 return sDecoded;
3071 }
3072
3073 //============================================================================
3074 // static
decodeHeaderFieldBody(HeaderFieldType eType,const ByteString & rBody)3075 UniString INetMIME::decodeHeaderFieldBody(HeaderFieldType eType,
3076 const ByteString & rBody)
3077 {
3078 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
3079 // versions of StarOffice send mails with header fields where encoded
3080 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
3081 // '=', ',', '.', '"', ')', without any required white space in between.
3082 // And there appear to exist some broken mailers that only encode single
3083 // letters within words, like "Appel
3084 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
3085 // detect encoded words even when not propperly surrounded by white space.
3086 //
3087 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
3088 //
3089 // encoded-word = "=?"
3090 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
3091 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
3092 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
3093 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
3094 // "?="
3095 //
3096 // base64 = ALPHA / DIGIT / "+" / "/"
3097
3098 const sal_Char * pBegin = rBody.GetBuffer();
3099 const sal_Char * pEnd = pBegin + rBody.Len();
3100
3101 UniString sDecoded;
3102 const sal_Char * pCopyBegin = pBegin;
3103
3104 /* bool bStartEncodedWord = true; */
3105 const sal_Char * pWSPBegin = pBegin;
3106 UniString sEncodedText;
3107 bool bQuotedEncodedText = false;
3108 sal_uInt32 nCommentLevel = 0;
3109
3110 for (const sal_Char * p = pBegin; p != pEnd;)
3111 {
3112 if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
3113 {
3114 const sal_Char * q = p + 1;
3115 bool bEncodedWord = q != pEnd && *q++ == '?';
3116
3117 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
3118 if (bEncodedWord)
3119 {
3120 const sal_Char * pCharsetBegin = q;
3121 const sal_Char * pLanguageBegin = 0;
3122 int nAlphaCount = 0;
3123 for (bool bDone = false; !bDone;)
3124 if (q == pEnd)
3125 {
3126 bEncodedWord = false;
3127 bDone = true;
3128 }
3129 else
3130 {
3131 sal_Char cChar = *q++;
3132 switch (cChar)
3133 {
3134 case '*':
3135 pLanguageBegin = q - 1;
3136 nAlphaCount = 0;
3137 break;
3138
3139 case '-':
3140 if (pLanguageBegin != 0)
3141 {
3142 if (nAlphaCount == 0)
3143 pLanguageBegin = 0;
3144 else
3145 nAlphaCount = 0;
3146 }
3147 break;
3148
3149 case '?':
3150 if (pCharsetBegin == q - 1)
3151 bEncodedWord = false;
3152 else
3153 {
3154 eCharsetEncoding
3155 = getCharsetEncoding(
3156 pCharsetBegin,
3157 pLanguageBegin == 0
3158 || nAlphaCount == 0 ?
3159 q - 1 : pLanguageBegin);
3160 bEncodedWord = isMIMECharsetEncoding(
3161 eCharsetEncoding);
3162 eCharsetEncoding
3163 = translateFromMIME(eCharsetEncoding);
3164 }
3165 bDone = true;
3166 break;
3167
3168 default:
3169 if (pLanguageBegin != 0
3170 && (!isAlpha(cChar) || ++nAlphaCount > 8))
3171 pLanguageBegin = 0;
3172 break;
3173 }
3174 }
3175 }
3176
3177 bool bEncodingB = false;
3178 if (bEncodedWord)
3179 {
3180 if (q == pEnd)
3181 bEncodedWord = false;
3182 else
3183 {
3184 switch (*q++)
3185 {
3186 case 'B':
3187 case 'b':
3188 bEncodingB = true;
3189 break;
3190
3191 case 'Q':
3192 case 'q':
3193 bEncodingB = false;
3194 break;
3195
3196 default:
3197 bEncodedWord = false;
3198 break;
3199 }
3200 }
3201 }
3202
3203 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
3204
3205 ByteString sText;
3206 if (bEncodedWord)
3207 {
3208 if (bEncodingB)
3209 {
3210 for (bool bDone = false; !bDone;)
3211 {
3212 if (pEnd - q < 4)
3213 {
3214 bEncodedWord = false;
3215 bDone = true;
3216 }
3217 else
3218 {
3219 bool bFinal = false;
3220 int nCount = 3;
3221 sal_uInt32 nValue = 0;
3222 for (int nShift = 18; nShift >= 0; nShift -= 6)
3223 {
3224 int nWeight = getBase64Weight(*q++);
3225 if (nWeight == -2)
3226 {
3227 bEncodedWord = false;
3228 bDone = true;
3229 break;
3230 }
3231 if (nWeight == -1)
3232 {
3233 if (!bFinal)
3234 {
3235 if (nShift >= 12)
3236 {
3237 bEncodedWord = false;
3238 bDone = true;
3239 break;
3240 }
3241 bFinal = true;
3242 nCount = nShift == 6 ? 1 : 2;
3243 }
3244 }
3245 else
3246 nValue |= nWeight << nShift;
3247 }
3248 if (bEncodedWord)
3249 {
3250 for (int nShift = 16; nCount-- > 0;
3251 nShift -= 8)
3252 sText += sal_Char(nValue >> nShift
3253 & 0xFF);
3254 if (*q == '?')
3255 {
3256 ++q;
3257 bDone = true;
3258 }
3259 if (bFinal && !bDone)
3260 {
3261 bEncodedWord = false;
3262 bDone = true;
3263 }
3264 }
3265 }
3266 }
3267 }
3268 else
3269 {
3270 const sal_Char * pEncodedTextBegin = q;
3271 const sal_Char * pEncodedTextCopyBegin = q;
3272 for (bool bDone = false; !bDone;)
3273 if (q == pEnd)
3274 {
3275 bEncodedWord = false;
3276 bDone = true;
3277 }
3278 else
3279 {
3280 sal_uInt32 nChar = *q++;
3281 switch (nChar)
3282 {
3283 case '=':
3284 {
3285 if (pEnd - q < 2)
3286 {
3287 bEncodedWord = false;
3288 bDone = true;
3289 break;
3290 }
3291 int nDigit1 = getHexWeight(q[0]);
3292 int nDigit2 = getHexWeight(q[1]);
3293 if (nDigit1 < 0 || nDigit2 < 0)
3294 {
3295 bEncodedWord = false;
3296 bDone = true;
3297 break;
3298 }
3299 sText += rBody.Copy(
3300 static_cast< xub_StrLen >(
3301 pEncodedTextCopyBegin - pBegin),
3302 static_cast< xub_StrLen >(
3303 q - 1 - pEncodedTextCopyBegin));
3304 sText += sal_Char(nDigit1 << 4 | nDigit2);
3305 q += 2;
3306 pEncodedTextCopyBegin = q;
3307 break;
3308 }
3309
3310 case '?':
3311 if (q - pEncodedTextBegin > 1)
3312 sText += rBody.Copy(
3313 static_cast< xub_StrLen >(
3314 pEncodedTextCopyBegin - pBegin),
3315 static_cast< xub_StrLen >(
3316 q - 1 - pEncodedTextCopyBegin));
3317 else
3318 bEncodedWord = false;
3319 bDone = true;
3320 break;
3321
3322 case '_':
3323 sText += rBody.Copy(
3324 static_cast< xub_StrLen >(
3325 pEncodedTextCopyBegin - pBegin),
3326 static_cast< xub_StrLen >(
3327 q - 1 - pEncodedTextCopyBegin));
3328 sText += ' ';
3329 pEncodedTextCopyBegin = q;
3330 break;
3331
3332 default:
3333 if (!isVisible(nChar))
3334 {
3335 bEncodedWord = false;
3336 bDone = true;
3337 }
3338 break;
3339 }
3340 }
3341 }
3342 }
3343
3344 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
3345
3346 // if (bEncodedWord && q != pEnd)
3347 // switch (*q)
3348 // {
3349 // case '\t':
3350 // case ' ':
3351 // case '"':
3352 // case ')':
3353 // case ',':
3354 // case '.':
3355 // case '=':
3356 // break;
3357 //
3358 // default:
3359 // bEncodedWord = false;
3360 // break;
3361 // }
3362
3363 sal_Unicode * pUnicodeBuffer = 0;
3364 sal_Size nUnicodeSize = 0;
3365 if (bEncodedWord)
3366 {
3367 pUnicodeBuffer
3368 = convertToUnicode(sText.GetBuffer(),
3369 sText.GetBuffer() + sText.Len(),
3370 eCharsetEncoding, nUnicodeSize);
3371 if (pUnicodeBuffer == 0)
3372 bEncodedWord = false;
3373 }
3374
3375 if (bEncodedWord)
3376 {
3377 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
3378 if (eType == HEADER_FIELD_TEXT)
3379 sDecoded.Append(
3380 pUnicodeBuffer,
3381 static_cast< xub_StrLen >(nUnicodeSize));
3382 else if (nCommentLevel == 0)
3383 {
3384 sEncodedText.Append(
3385 pUnicodeBuffer,
3386 static_cast< xub_StrLen >(nUnicodeSize));
3387 if (!bQuotedEncodedText)
3388 {
3389 const sal_Unicode * pTextPtr = pUnicodeBuffer;
3390 const sal_Unicode * pTextEnd = pTextPtr
3391 + nUnicodeSize;
3392 for (; pTextPtr != pTextEnd; ++pTextPtr)
3393 if (!isEncodedWordTokenChar(*pTextPtr))
3394 {
3395 bQuotedEncodedText = true;
3396 break;
3397 }
3398 }
3399 }
3400 else
3401 {
3402 const sal_Unicode * pTextPtr = pUnicodeBuffer;
3403 const sal_Unicode * pTextEnd = pTextPtr + nUnicodeSize;
3404 for (; pTextPtr != pTextEnd; ++pTextPtr)
3405 {
3406 switch (*pTextPtr)
3407 {
3408 case '(':
3409 case ')':
3410 case '\\':
3411 case '\x0D':
3412 case '=':
3413 sDecoded += '\\';
3414 break;
3415 }
3416 sDecoded += *pTextPtr;
3417 }
3418 }
3419 delete[] pUnicodeBuffer;
3420 p = q;
3421 pCopyBegin = p;
3422
3423 pWSPBegin = p;
3424 while (p != pEnd && isWhiteSpace(*p))
3425 ++p;
3426 /* bStartEncodedWord = p != pWSPBegin; */
3427 continue;
3428 }
3429 }
3430
3431 if (sEncodedText.Len() != 0)
3432 {
3433 if (bQuotedEncodedText)
3434 {
3435 sDecoded += '"';
3436 const sal_Unicode * pTextPtr = sEncodedText.GetBuffer();
3437 const sal_Unicode * pTextEnd = pTextPtr + sEncodedText.Len();
3438 for (;pTextPtr != pTextEnd; ++pTextPtr)
3439 {
3440 switch (*pTextPtr)
3441 {
3442 case '"':
3443 case '\\':
3444 case '\x0D':
3445 sDecoded += '\\';
3446 break;
3447 }
3448 sDecoded += *pTextPtr;
3449 }
3450 sDecoded += '"';
3451 }
3452 else
3453 sDecoded += sEncodedText;
3454 sEncodedText.Erase();
3455 bQuotedEncodedText = false;
3456 }
3457
3458 if (p == pEnd)
3459 break;
3460
3461 switch (*p++)
3462 {
3463 // case '\t':
3464 // case ' ':
3465 // case ',':
3466 // case '.':
3467 // case '=':
3468 // bStartEncodedWord = true;
3469 // break;
3470
3471 case '"':
3472 if (eType != HEADER_FIELD_TEXT && nCommentLevel == 0)
3473 {
3474 const sal_Char * pQuotedStringEnd
3475 = skipQuotedString(p - 1, pEnd);
3476 p = pQuotedStringEnd == p - 1 ? pEnd : pQuotedStringEnd;
3477 }
3478 /* bStartEncodedWord = true; */
3479 break;
3480
3481 case '(':
3482 if (eType != HEADER_FIELD_TEXT)
3483 ++nCommentLevel;
3484 /* bStartEncodedWord = true; */
3485 break;
3486
3487 case ')':
3488 if (nCommentLevel > 0)
3489 --nCommentLevel;
3490 /* bStartEncodedWord = false; */
3491 break;
3492
3493 default:
3494 {
3495 const sal_Char * pUTF8Begin = p - 1;
3496 const sal_Char * pUTF8End = pUTF8Begin;
3497 sal_uInt32 nCharacter;
3498 if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
3499 nCharacter))
3500 {
3501 appendISO88591(sDecoded, pCopyBegin, p - 1);
3502 sal_Unicode aUTF16Buf[2];
3503 xub_StrLen nUTF16Len = static_cast< xub_StrLen >(
3504 putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf);
3505 sDecoded.Append(aUTF16Buf, nUTF16Len);
3506 p = pUTF8End;
3507 pCopyBegin = p;
3508 }
3509 /* bStartEncodedWord = false; */
3510 break;
3511 }
3512 }
3513 pWSPBegin = p;
3514 }
3515
3516 appendISO88591(sDecoded, pCopyBegin, pEnd);
3517 return sDecoded;
3518 }
3519
3520 //============================================================================
3521 //
3522 // INetMIMEOutputSink
3523 //
3524 //============================================================================
3525
3526 // virtual
writeSequence(const sal_Char * pSequence)3527 sal_Size INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
3528 {
3529 sal_Size nLength = rtl_str_getLength(pSequence);
3530 writeSequence(pSequence, pSequence + nLength);
3531 return nLength;
3532 }
3533
3534 //============================================================================
3535 // virtual
writeSequence(const sal_uInt32 * pBegin,const sal_uInt32 * pEnd)3536 void INetMIMEOutputSink::writeSequence(const sal_uInt32 * pBegin,
3537 const sal_uInt32 * pEnd)
3538 {
3539 DBG_ASSERT(pBegin && pBegin <= pEnd,
3540 "INetMIMEOutputSink::writeSequence(): Bad sequence");
3541
3542 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3543 sal_Char * pBufferEnd = pBufferBegin;
3544 while (pBegin != pEnd)
3545 {
3546 DBG_ASSERT(*pBegin < 256,
3547 "INetMIMEOutputSink::writeSequence(): Bad octet");
3548 *pBufferEnd++ = sal_Char(*pBegin++);
3549 }
3550 writeSequence(pBufferBegin, pBufferEnd);
3551 delete[] pBufferBegin;
3552 }
3553
3554 //============================================================================
3555 // virtual
writeSequence(const sal_Unicode * pBegin,const sal_Unicode * pEnd)3556 void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
3557 const sal_Unicode * pEnd)
3558 {
3559 DBG_ASSERT(pBegin && pBegin <= pEnd,
3560 "INetMIMEOutputSink::writeSequence(): Bad sequence");
3561
3562 sal_Char * pBufferBegin = new sal_Char[pEnd - pBegin];
3563 sal_Char * pBufferEnd = pBufferBegin;
3564 while (pBegin != pEnd)
3565 {
3566 DBG_ASSERT(*pBegin < 256,
3567 "INetMIMEOutputSink::writeSequence(): Bad octet");
3568 *pBufferEnd++ = sal_Char(*pBegin++);
3569 }
3570 writeSequence(pBufferBegin, pBufferEnd);
3571 delete[] pBufferBegin;
3572 }
3573
3574 //============================================================================
3575 // virtual
getError() const3576 ErrCode INetMIMEOutputSink::getError() const
3577 {
3578 return ERRCODE_NONE;
3579 }
3580
3581 //============================================================================
writeLineEnd()3582 void INetMIMEOutputSink::writeLineEnd()
3583 {
3584 static const sal_Char aCRLF[2] = { 0x0D, 0x0A };
3585 writeSequence(aCRLF, aCRLF + 2);
3586 m_nColumn = 0;
3587 }
3588
3589 //============================================================================
3590 //
3591 // INetMIMEStringOutputSink
3592 //
3593 //============================================================================
3594
3595 // virtual
writeSequence(const sal_Char * pBegin,const sal_Char * pEnd)3596 void INetMIMEStringOutputSink::writeSequence(const sal_Char * pBegin,
3597 const sal_Char * pEnd)
3598 {
3599 DBG_ASSERT(pBegin && pBegin <= pEnd,
3600 "INetMIMEStringOutputSink::writeSequence(): Bad sequence");
3601
3602 m_bOverflow = m_bOverflow
3603 || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3604 if (!m_bOverflow)
3605 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3606 }
3607
3608 //============================================================================
3609 // virtual
getError() const3610 ErrCode INetMIMEStringOutputSink::getError() const
3611 {
3612 return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3613 }
3614
3615 //============================================================================
3616 //
3617 // INetMIMEUnicodeOutputSink
3618 //
3619 //============================================================================
3620
3621 // virtual
writeSequence(const sal_Char * pBegin,const sal_Char * pEnd)3622 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Char * pBegin,
3623 const sal_Char * pEnd)
3624 {
3625 DBG_ASSERT(pBegin && pBegin <= pEnd,
3626 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3627
3628 sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3629 sal_Unicode * pBufferEnd = pBufferBegin;
3630 while (pBegin != pEnd)
3631 *pBufferEnd++ = sal_uChar(*pBegin++);
3632 writeSequence(pBufferBegin, pBufferEnd);
3633 delete[] pBufferBegin;
3634 }
3635
3636 //============================================================================
3637 // virtual
writeSequence(const sal_uInt32 * pBegin,const sal_uInt32 * pEnd)3638 void INetMIMEUnicodeOutputSink::writeSequence(const sal_uInt32 * pBegin,
3639 const sal_uInt32 * pEnd)
3640 {
3641 DBG_ASSERT(pBegin && pBegin <= pEnd,
3642 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3643
3644 sal_Unicode * pBufferBegin = new sal_Unicode[pEnd - pBegin];
3645 sal_Unicode * pBufferEnd = pBufferBegin;
3646 while (pBegin != pEnd)
3647 {
3648 DBG_ASSERT(*pBegin < 256,
3649 "INetMIMEOutputSink::writeSequence(): Bad octet");
3650 *pBufferEnd++ = sal_Unicode(*pBegin++);
3651 }
3652 writeSequence(pBufferBegin, pBufferEnd);
3653 delete[] pBufferBegin;
3654 }
3655
3656 //============================================================================
3657 // virtual
writeSequence(const sal_Unicode * pBegin,const sal_Unicode * pEnd)3658 void INetMIMEUnicodeOutputSink::writeSequence(const sal_Unicode * pBegin,
3659 const sal_Unicode * pEnd)
3660 {
3661 DBG_ASSERT(pBegin && pBegin <= pEnd,
3662 "INetMIMEUnicodeOutputSink::writeSequence(): Bad sequence");
3663
3664 m_bOverflow = m_bOverflow
3665 || pEnd - pBegin > STRING_MAXLEN - m_aBuffer.Len();
3666 if (!m_bOverflow)
3667 m_aBuffer.Append(pBegin, static_cast< xub_StrLen >(pEnd - pBegin));
3668 }
3669
3670 //============================================================================
3671 // virtual
getError() const3672 ErrCode INetMIMEUnicodeOutputSink::getError() const
3673 {
3674 return m_bOverflow ? ERRCODE_IO_OUTOFMEMORY : ERRCODE_NONE;
3675 }
3676
3677 //============================================================================
3678 //
3679 // INetMIMEEncodedWordOutputSink
3680 //
3681 //============================================================================
3682
3683 static const sal_Char aEscape[128]
3684 = { INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x00
3685 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x01
3686 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x02
3687 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x03
3688 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x04
3689 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x05
3690 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x06
3691 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x07
3692 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x08
3693 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x09
3694 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0A
3695 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0B
3696 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0C
3697 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0D
3698 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0E
3699 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x0F
3700 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x10
3701 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x11
3702 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x12
3703 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x13
3704 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x14
3705 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x15
3706 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x16
3707 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x17
3708 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x18
3709 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x19
3710 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1A
3711 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1B
3712 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1C
3713 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1D
3714 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1E
3715 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // 0x1F
3716 0, // ' '
3717 0, // '!'
3718 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '"'
3719 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '#'
3720 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '$'
3721 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '%'
3722 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '&'
3723 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '''
3724 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '('
3725 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ')'
3726 0, // '*'
3727 0, // '+'
3728 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ','
3729 0, // '-'
3730 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '.'
3731 0, // '/'
3732 0, // '0'
3733 0, // '1'
3734 0, // '2'
3735 0, // '3'
3736 0, // '4'
3737 0, // '5'
3738 0, // '6'
3739 0, // '7'
3740 0, // '8'
3741 0, // '9'
3742 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ':'
3743 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ';'
3744 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '<'
3745 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '='
3746 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '>'
3747 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '?'
3748 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '@'
3749 0, // 'A'
3750 0, // 'B'
3751 0, // 'C'
3752 0, // 'D'
3753 0, // 'E'
3754 0, // 'F'
3755 0, // 'G'
3756 0, // 'H'
3757 0, // 'I'
3758 0, // 'J'
3759 0, // 'K'
3760 0, // 'L'
3761 0, // 'M'
3762 0, // 'N'
3763 0, // 'O'
3764 0, // 'P'
3765 0, // 'Q'
3766 0, // 'R'
3767 0, // 'S'
3768 0, // 'T'
3769 0, // 'U'
3770 0, // 'V'
3771 0, // 'W'
3772 0, // 'X'
3773 0, // 'Y'
3774 0, // 'Z'
3775 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '['
3776 INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '\'
3777 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // ']'
3778 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '^'
3779 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '_'
3780 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '`'
3781 0, // 'a'
3782 0, // 'b'
3783 0, // 'c'
3784 0, // 'd'
3785 0, // 'e'
3786 0, // 'f'
3787 0, // 'g'
3788 0, // 'h'
3789 0, // 'i'
3790 0, // 'j'
3791 0, // 'k'
3792 0, // 'l'
3793 0, // 'm'
3794 0, // 'n'
3795 0, // 'o'
3796 0, // 'p'
3797 0, // 'q'
3798 0, // 'r'
3799 0, // 's'
3800 0, // 't'
3801 0, // 'u'
3802 0, // 'v'
3803 0, // 'w'
3804 0, // 'x'
3805 0, // 'y'
3806 0, // 'z'
3807 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '{'
3808 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '|'
3809 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '}'
3810 INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE, // '~'
3811 INetMIMEEncodedWordOutputSink::CONTEXT_TEXT | INetMIMEEncodedWordOutputSink::CONTEXT_COMMENT | INetMIMEEncodedWordOutputSink::CONTEXT_PHRASE }; // DEL
3812
3813 inline bool
needsEncodedWordEscape(sal_uInt32 nChar) const3814 INetMIMEEncodedWordOutputSink::needsEncodedWordEscape(sal_uInt32 nChar) const
3815 {
3816 return !INetMIME::isUSASCII(nChar) || aEscape[nChar] & m_eContext;
3817 }
3818
3819 //============================================================================
finish(bool bWriteTrailer)3820 void INetMIMEEncodedWordOutputSink::finish(bool bWriteTrailer)
3821 {
3822 if (m_eInitialSpace == SPACE_ALWAYS && m_nExtraSpaces == 0)
3823 m_nExtraSpaces = 1;
3824
3825 if (m_eEncodedWordState == STATE_SECOND_EQUALS)
3826 {
3827 // If the text is already an encoded word, copy it verbatim:
3828 sal_uInt32 nSize = m_pBufferEnd - m_pBuffer;
3829 switch (m_ePrevCoding)
3830 {
3831 case CODING_QUOTED:
3832 m_rSink << '"';
3833 case CODING_NONE:
3834 if (m_eInitialSpace == SPACE_ENCODED && m_nExtraSpaces == 0)
3835 m_nExtraSpaces = 1;
3836 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3837 {
3838 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3839 m_rSink << INetMIMEOutputSink::endl;
3840 m_rSink << ' ';
3841 }
3842 if (m_nExtraSpaces == 1)
3843 {
3844 if (m_rSink.getColumn() + nSize
3845 >= m_rSink.getLineLengthLimit())
3846 m_rSink << INetMIMEOutputSink::endl;
3847 m_rSink << ' ';
3848 }
3849 break;
3850
3851 case CODING_ENCODED:
3852 {
3853 const sal_Char * pCharsetName
3854 = INetMIME::getCharsetName(m_ePrevMIMEEncoding);
3855 while (m_nExtraSpaces-- > 0)
3856 {
3857 if (m_rSink.getColumn()
3858 > m_rSink.getLineLengthLimit() - 3)
3859 m_rSink << "?=" << INetMIMEOutputSink::endl << " =?"
3860 << pCharsetName << "?Q?";
3861 m_rSink << '_';
3862 }
3863 m_rSink << "?=";
3864 }
3865 case CODING_ENCODED_TERMINATED:
3866 if (m_rSink.getColumn() + nSize
3867 > m_rSink.getLineLengthLimit() - 1)
3868 m_rSink << INetMIMEOutputSink::endl;
3869 m_rSink << ' ';
3870 break;
3871 }
3872 m_rSink.write(m_pBuffer, m_pBufferEnd);
3873 m_eCoding = CODING_ENCODED_TERMINATED;
3874 }
3875 else
3876 {
3877 // If the text itself is too long to fit into a single line, make it
3878 // into multiple encoded words:
3879 switch (m_eCoding)
3880 {
3881 case CODING_NONE:
3882 if (m_nExtraSpaces == 0)
3883 {
3884 DBG_ASSERT(m_ePrevCoding == CODING_NONE
3885 || m_pBuffer == m_pBufferEnd,
3886 "INetMIMEEncodedWordOutputSink::finish():"
3887 " Bad state");
3888 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3889 > m_rSink.getLineLengthLimit())
3890 m_eCoding = CODING_ENCODED;
3891 }
3892 else
3893 {
3894 OSL_ASSERT(m_pBufferEnd >= m_pBuffer);
3895 if (static_cast< std::size_t >(m_pBufferEnd - m_pBuffer)
3896 > m_rSink.getLineLengthLimit() - 1)
3897 {
3898 m_eCoding = CODING_ENCODED;
3899 }
3900 }
3901 break;
3902
3903 case CODING_QUOTED:
3904 if (m_nExtraSpaces == 0)
3905 {
3906 DBG_ASSERT(m_ePrevCoding == CODING_NONE,
3907 "INetMIMEEncodedWordOutputSink::finish():"
3908 " Bad state");
3909 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3910 + m_nQuotedEscaped
3911 > m_rSink.getLineLengthLimit() - 2)
3912 m_eCoding = CODING_ENCODED;
3913 }
3914 else if ((m_pBufferEnd - m_pBuffer) + m_nQuotedEscaped
3915 > m_rSink.getLineLengthLimit() - 3)
3916 m_eCoding = CODING_ENCODED;
3917 break;
3918
3919 default:
3920 break;
3921 }
3922
3923 switch (m_eCoding)
3924 {
3925 case CODING_NONE:
3926 switch (m_ePrevCoding)
3927 {
3928 case CODING_QUOTED:
3929 if (m_rSink.getColumn() + m_nExtraSpaces
3930 + (m_pBufferEnd - m_pBuffer)
3931 < m_rSink.getLineLengthLimit())
3932 m_eCoding = CODING_QUOTED;
3933 else
3934 m_rSink << '"';
3935 break;
3936
3937 case CODING_ENCODED:
3938 m_rSink << "?=";
3939 break;
3940
3941 default:
3942 break;
3943 }
3944 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3945 {
3946 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3947 m_rSink << INetMIMEOutputSink::endl;
3948 m_rSink << ' ';
3949 }
3950 if (m_nExtraSpaces == 1)
3951 {
3952 if (m_rSink.getColumn() + (m_pBufferEnd - m_pBuffer)
3953 >= m_rSink.getLineLengthLimit())
3954 m_rSink << INetMIMEOutputSink::endl;
3955 m_rSink << ' ';
3956 }
3957 m_rSink.write(m_pBuffer, m_pBufferEnd);
3958 if (m_eCoding == CODING_QUOTED && bWriteTrailer)
3959 {
3960 m_rSink << '"';
3961 m_eCoding = CODING_NONE;
3962 }
3963 break;
3964
3965 case CODING_QUOTED:
3966 {
3967 bool bInsertLeadingQuote = true;
3968 sal_uInt32 nSize = (m_pBufferEnd - m_pBuffer)
3969 + m_nQuotedEscaped + 2;
3970 switch (m_ePrevCoding)
3971 {
3972 case CODING_QUOTED:
3973 if (m_rSink.getColumn() + m_nExtraSpaces + nSize - 1
3974 < m_rSink.getLineLengthLimit())
3975 {
3976 bInsertLeadingQuote = false;
3977 --nSize;
3978 }
3979 else
3980 m_rSink << '"';
3981 break;
3982
3983 case CODING_ENCODED:
3984 m_rSink << "?=";
3985 break;
3986
3987 default:
3988 break;
3989 }
3990 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
3991 {
3992 if (m_rSink.getColumn() >= m_rSink.getLineLengthLimit())
3993 m_rSink << INetMIMEOutputSink::endl;
3994 m_rSink << ' ';
3995 }
3996 if (m_nExtraSpaces == 1)
3997 {
3998 if (m_rSink.getColumn() + nSize
3999 >= m_rSink.getLineLengthLimit())
4000 m_rSink << INetMIMEOutputSink::endl;
4001 m_rSink << ' ';
4002 }
4003 if (bInsertLeadingQuote)
4004 m_rSink << '"';
4005 for (const sal_Unicode * p = m_pBuffer; p != m_pBufferEnd;
4006 ++p)
4007 {
4008 if (INetMIME::needsQuotedStringEscape(*p))
4009 m_rSink << '\\';
4010 m_rSink << sal_Char(*p);
4011 }
4012 if (bWriteTrailer)
4013 {
4014 m_rSink << '"';
4015 m_eCoding = CODING_NONE;
4016 }
4017 break;
4018 }
4019
4020 case CODING_ENCODED:
4021 {
4022 rtl_TextEncoding eCharsetEncoding
4023 = m_pEncodingList->
4024 getPreferredEncoding(RTL_TEXTENCODING_UTF8);
4025 rtl_TextEncoding eMIMEEncoding
4026 = INetMIME::translateToMIME(eCharsetEncoding);
4027
4028 // The non UTF-8 code will only work for stateless single byte
4029 // character encodings (see also below):
4030 sal_Char * pTargetBuffer = NULL;
4031 sal_Size nTargetSize = 0;
4032 sal_uInt32 nSize;
4033 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4034 {
4035 nSize = 0;
4036 for (sal_Unicode const * p = m_pBuffer;
4037 p != m_pBufferEnd;)
4038 {
4039 sal_uInt32 nUTF32
4040 = INetMIME::getUTF32Character(p, m_pBufferEnd);
4041 nSize += needsEncodedWordEscape(nUTF32) ?
4042 3 * INetMIME::getUTF8OctetCount(nUTF32) :
4043 1;
4044 // only US-ASCII characters (that are converted to
4045 // a single byte by UTF-8) need no encoded word
4046 // escapes...
4047 }
4048 }
4049 else
4050 {
4051 rtl_UnicodeToTextConverter hConverter
4052 = rtl_createUnicodeToTextConverter(eCharsetEncoding);
4053 rtl_UnicodeToTextContext hContext
4054 = rtl_createUnicodeToTextContext(hConverter);
4055 for (sal_Size nBufferSize = m_pBufferEnd - m_pBuffer;;
4056 nBufferSize += nBufferSize / 3 + 1)
4057 {
4058 pTargetBuffer = new sal_Char[nBufferSize];
4059 sal_uInt32 nInfo;
4060 sal_Size nSrcCvtBytes;
4061 nTargetSize
4062 = rtl_convertUnicodeToText(
4063 hConverter, hContext, m_pBuffer,
4064 m_pBufferEnd - m_pBuffer, pTargetBuffer,
4065 nBufferSize,
4066 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE
4067 | RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE,
4068 &nInfo, &nSrcCvtBytes);
4069 if (!(nInfo
4070 & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
4071 break;
4072 delete[] pTargetBuffer;
4073 pTargetBuffer = NULL;
4074 rtl_resetUnicodeToTextContext(hConverter, hContext);
4075 }
4076 rtl_destroyUnicodeToTextContext(hConverter, hContext);
4077 rtl_destroyUnicodeToTextConverter(hConverter);
4078
4079 nSize = nTargetSize;
4080 for (sal_Size k = 0; k < nTargetSize; ++k)
4081 if (needsEncodedWordEscape(sal_uChar(
4082 pTargetBuffer[k])))
4083 nSize += 2;
4084 }
4085
4086 const sal_Char * pCharsetName
4087 = INetMIME::getCharsetName(eMIMEEncoding);
4088 sal_uInt32 nWrapperSize = rtl_str_getLength(pCharsetName) + 7;
4089 // '=?', '?Q?', '?='
4090
4091 switch (m_ePrevCoding)
4092 {
4093 case CODING_QUOTED:
4094 m_rSink << '"';
4095 case CODING_NONE:
4096 if (m_eInitialSpace == SPACE_ENCODED
4097 && m_nExtraSpaces == 0)
4098 m_nExtraSpaces = 1;
4099 nSize += nWrapperSize;
4100 for (; m_nExtraSpaces > 1; --m_nExtraSpaces)
4101 {
4102 if (m_rSink.getColumn()
4103 >= m_rSink.getLineLengthLimit())
4104 m_rSink << INetMIMEOutputSink::endl;
4105 m_rSink << ' ';
4106 }
4107 if (m_nExtraSpaces == 1)
4108 {
4109 if (m_rSink.getColumn() + nSize
4110 >= m_rSink.getLineLengthLimit())
4111 m_rSink << INetMIMEOutputSink::endl;
4112 m_rSink << ' ';
4113 }
4114 m_rSink << "=?" << pCharsetName << "?Q?";
4115 break;
4116
4117 case CODING_ENCODED:
4118 if (m_ePrevMIMEEncoding != eMIMEEncoding
4119 || m_rSink.getColumn() + m_nExtraSpaces + nSize
4120 > m_rSink.getLineLengthLimit() - 2)
4121 {
4122 m_rSink << "?=";
4123 if (m_rSink.getColumn() + nWrapperSize
4124 + m_nExtraSpaces + nSize
4125 > m_rSink.getLineLengthLimit() - 1)
4126 m_rSink << INetMIMEOutputSink::endl;
4127 m_rSink << " =?" << pCharsetName << "?Q?";
4128 }
4129 while (m_nExtraSpaces-- > 0)
4130 {
4131 if (m_rSink.getColumn()
4132 > m_rSink.getLineLengthLimit() - 3)
4133 m_rSink << "?=" << INetMIMEOutputSink::endl
4134 << " =?" << pCharsetName << "?Q?";
4135 m_rSink << '_';
4136 }
4137 break;
4138
4139 case CODING_ENCODED_TERMINATED:
4140 if (m_rSink.getColumn() + nWrapperSize
4141 + m_nExtraSpaces + nSize
4142 > m_rSink.getLineLengthLimit() - 1)
4143 m_rSink << INetMIMEOutputSink::endl;
4144 m_rSink << " =?" << pCharsetName << "?Q?";
4145 while (m_nExtraSpaces-- > 0)
4146 {
4147 if (m_rSink.getColumn()
4148 > m_rSink.getLineLengthLimit() - 3)
4149 m_rSink << "?=" << INetMIMEOutputSink::endl
4150 << " =?" << pCharsetName << "?Q?";
4151 m_rSink << '_';
4152 }
4153 break;
4154 }
4155
4156 // The non UTF-8 code will only work for stateless single byte
4157 // character encodings (see also above):
4158 if (eMIMEEncoding == RTL_TEXTENCODING_UTF8)
4159 {
4160 bool bInitial = true;
4161 for (sal_Unicode const * p = m_pBuffer;
4162 p != m_pBufferEnd;)
4163 {
4164 sal_uInt32 nUTF32
4165 = INetMIME::getUTF32Character(p, m_pBufferEnd);
4166 bool bEscape = needsEncodedWordEscape(nUTF32);
4167 sal_uInt32 nWidth
4168 = bEscape ?
4169 3 * INetMIME::getUTF8OctetCount(nUTF32) : 1;
4170 // only US-ASCII characters (that are converted to
4171 // a single byte by UTF-8) need no encoded word
4172 // escapes...
4173 if (!bInitial
4174 && m_rSink.getColumn() + nWidth + 2
4175 > m_rSink.getLineLengthLimit())
4176 m_rSink << "?=" << INetMIMEOutputSink::endl
4177 << " =?" << pCharsetName << "?Q?";
4178 if (bEscape)
4179 {
4180 DBG_ASSERT(
4181 nUTF32 < 0x10FFFF,
4182 "INetMIMEEncodedWordOutputSink::finish():"
4183 " Bad char");
4184 if (nUTF32 < 0x80)
4185 INetMIME::writeEscapeSequence(m_rSink,
4186 nUTF32);
4187 else if (nUTF32 < 0x800)
4188 {
4189 INetMIME::writeEscapeSequence(m_rSink,
4190 (nUTF32 >> 6)
4191 | 0xC0);
4192 INetMIME::writeEscapeSequence(m_rSink,
4193 (nUTF32 & 0x3F)
4194 | 0x80);
4195 }
4196 else if (nUTF32 < 0x10000)
4197 {
4198 INetMIME::writeEscapeSequence(m_rSink,
4199 (nUTF32 >> 12)
4200 | 0xE0);
4201 INetMIME::writeEscapeSequence(m_rSink,
4202 ((nUTF32 >> 6)
4203 & 0x3F)
4204 | 0x80);
4205 INetMIME::writeEscapeSequence(m_rSink,
4206 (nUTF32 & 0x3F)
4207 | 0x80);
4208 }
4209 else
4210 {
4211 INetMIME::writeEscapeSequence(m_rSink,
4212 (nUTF32 >> 18)
4213 | 0xF0);
4214 INetMIME::writeEscapeSequence(m_rSink,
4215 ((nUTF32 >> 12)
4216 & 0x3F)
4217 | 0x80);
4218 INetMIME::writeEscapeSequence(m_rSink,
4219 ((nUTF32 >> 6)
4220 & 0x3F)
4221 | 0x80);
4222 INetMIME::writeEscapeSequence(m_rSink,
4223 (nUTF32 & 0x3F)
4224 | 0x80);
4225 }
4226 }
4227 else
4228 m_rSink << sal_Char(nUTF32);
4229 bInitial = false;
4230 }
4231 }
4232 else
4233 {
4234 for (sal_Size k = 0; k < nTargetSize; ++k)
4235 {
4236 sal_uInt32 nUCS4 = sal_uChar(pTargetBuffer[k]);
4237 bool bEscape = needsEncodedWordEscape(nUCS4);
4238 if (k > 0
4239 && m_rSink.getColumn() + (bEscape ? 5 : 3)
4240 > m_rSink.getLineLengthLimit())
4241 m_rSink << "?=" << INetMIMEOutputSink::endl
4242 << " =?" << pCharsetName << "?Q?";
4243 if (bEscape)
4244 INetMIME::writeEscapeSequence(m_rSink, nUCS4);
4245 else
4246 m_rSink << sal_Char(nUCS4);
4247 }
4248 delete[] pTargetBuffer;
4249 }
4250
4251 if (bWriteTrailer)
4252 {
4253 m_rSink << "?=";
4254 m_eCoding = CODING_ENCODED_TERMINATED;
4255 }
4256
4257 m_ePrevMIMEEncoding = eMIMEEncoding;
4258 break;
4259 }
4260
4261 default:
4262 OSL_ASSERT(false);
4263 break;
4264 }
4265 }
4266
4267 m_eInitialSpace = SPACE_NO;
4268 m_nExtraSpaces = 0;
4269 m_pEncodingList->reset();
4270 m_pBufferEnd = m_pBuffer;
4271 m_ePrevCoding = m_eCoding;
4272 m_eCoding = CODING_NONE;
4273 m_nQuotedEscaped = 0;
4274 m_eEncodedWordState = STATE_INITIAL;
4275 }
4276
4277 //============================================================================
~INetMIMEEncodedWordOutputSink()4278 INetMIMEEncodedWordOutputSink::~INetMIMEEncodedWordOutputSink()
4279 {
4280 rtl_freeMemory(m_pBuffer);
4281 delete m_pEncodingList;
4282 }
4283
4284 //============================================================================
4285 INetMIMEEncodedWordOutputSink &
operator <<(sal_uInt32 nChar)4286 INetMIMEEncodedWordOutputSink::operator <<(sal_uInt32 nChar)
4287 {
4288 if (nChar == ' ')
4289 {
4290 if (m_pBufferEnd != m_pBuffer)
4291 finish(false);
4292 ++m_nExtraSpaces;
4293 }
4294 else
4295 {
4296 // Check for an already encoded word:
4297 switch (m_eEncodedWordState)
4298 {
4299 case STATE_INITIAL:
4300 if (nChar == '=')
4301 m_eEncodedWordState = STATE_FIRST_EQUALS;
4302 else
4303 m_eEncodedWordState = STATE_BAD;
4304 break;
4305
4306 case STATE_FIRST_EQUALS:
4307 if (nChar == '?')
4308 m_eEncodedWordState = STATE_FIRST_EQUALS;
4309 else
4310 m_eEncodedWordState = STATE_BAD;
4311 break;
4312
4313 case STATE_FIRST_QUESTION:
4314 if (INetMIME::isEncodedWordTokenChar(nChar))
4315 m_eEncodedWordState = STATE_CHARSET;
4316 else
4317 m_eEncodedWordState = STATE_BAD;
4318 break;
4319
4320 case STATE_CHARSET:
4321 if (nChar == '?')
4322 m_eEncodedWordState = STATE_SECOND_QUESTION;
4323 else if (!INetMIME::isEncodedWordTokenChar(nChar))
4324 m_eEncodedWordState = STATE_BAD;
4325 break;
4326
4327 case STATE_SECOND_QUESTION:
4328 if (nChar == 'B' || nChar == 'Q'
4329 || nChar == 'b' || nChar == 'q')
4330 m_eEncodedWordState = STATE_ENCODING;
4331 else
4332 m_eEncodedWordState = STATE_BAD;
4333 break;
4334
4335 case STATE_ENCODING:
4336 if (nChar == '?')
4337 m_eEncodedWordState = STATE_THIRD_QUESTION;
4338 else
4339 m_eEncodedWordState = STATE_BAD;
4340 break;
4341
4342 case STATE_THIRD_QUESTION:
4343 if (INetMIME::isVisible(nChar) && nChar != '?')
4344 m_eEncodedWordState = STATE_ENCODED_TEXT;
4345 else
4346 m_eEncodedWordState = STATE_BAD;
4347 break;
4348
4349 case STATE_ENCODED_TEXT:
4350 if (nChar == '?')
4351 m_eEncodedWordState = STATE_FOURTH_QUESTION;
4352 else if (!INetMIME::isVisible(nChar))
4353 m_eEncodedWordState = STATE_BAD;
4354 break;
4355
4356 case STATE_FOURTH_QUESTION:
4357 if (nChar == '=')
4358 m_eEncodedWordState = STATE_SECOND_EQUALS;
4359 else
4360 m_eEncodedWordState = STATE_BAD;
4361 break;
4362
4363 case STATE_SECOND_EQUALS:
4364 m_eEncodedWordState = STATE_BAD;
4365 break;
4366
4367 case STATE_BAD:
4368 break;
4369 }
4370
4371 // Update encoding:
4372 m_pEncodingList->includes(nChar);
4373
4374 // Update coding:
4375 enum { TENQ = 1, // CONTEXT_TEXT, CODING_ENCODED
4376 CENQ = 2, // CONTEXT_COMMENT, CODING_ENCODED
4377 PQTD = 4, // CONTEXT_PHRASE, CODING_QUOTED
4378 PENQ = 8 }; // CONTEXT_PHRASE, CODING_ENCODED
4379 static const sal_Char aMinimal[128]
4380 = { TENQ | CENQ | PENQ, // 0x00
4381 TENQ | CENQ | PENQ, // 0x01
4382 TENQ | CENQ | PENQ, // 0x02
4383 TENQ | CENQ | PENQ, // 0x03
4384 TENQ | CENQ | PENQ, // 0x04
4385 TENQ | CENQ | PENQ, // 0x05
4386 TENQ | CENQ | PENQ, // 0x06
4387 TENQ | CENQ | PENQ, // 0x07
4388 TENQ | CENQ | PENQ, // 0x08
4389 TENQ | CENQ | PENQ, // 0x09
4390 TENQ | CENQ | PENQ, // 0x0A
4391 TENQ | CENQ | PENQ, // 0x0B
4392 TENQ | CENQ | PENQ, // 0x0C
4393 TENQ | CENQ | PENQ, // 0x0D
4394 TENQ | CENQ | PENQ, // 0x0E
4395 TENQ | CENQ | PENQ, // 0x0F
4396 TENQ | CENQ | PENQ, // 0x10
4397 TENQ | CENQ | PENQ, // 0x11
4398 TENQ | CENQ | PENQ, // 0x12
4399 TENQ | CENQ | PENQ, // 0x13
4400 TENQ | CENQ | PENQ, // 0x14
4401 TENQ | CENQ | PENQ, // 0x15
4402 TENQ | CENQ | PENQ, // 0x16
4403 TENQ | CENQ | PENQ, // 0x17
4404 TENQ | CENQ | PENQ, // 0x18
4405 TENQ | CENQ | PENQ, // 0x19
4406 TENQ | CENQ | PENQ, // 0x1A
4407 TENQ | CENQ | PENQ, // 0x1B
4408 TENQ | CENQ | PENQ, // 0x1C
4409 TENQ | CENQ | PENQ, // 0x1D
4410 TENQ | CENQ | PENQ, // 0x1E
4411 TENQ | CENQ | PENQ, // 0x1F
4412 0, // ' '
4413 0, // '!'
4414 PQTD , // '"'
4415 0, // '#'
4416 0, // '$'
4417 0, // '%'
4418 0, // '&'
4419 0, // '''
4420 CENQ | PQTD , // '('
4421 CENQ | PQTD , // ')'
4422 0, // '*'
4423 0, // '+'
4424 PQTD , // ','
4425 0, // '-'
4426 PQTD , // '.'
4427 0, // '/'
4428 0, // '0'
4429 0, // '1'
4430 0, // '2'
4431 0, // '3'
4432 0, // '4'
4433 0, // '5'
4434 0, // '6'
4435 0, // '7'
4436 0, // '8'
4437 0, // '9'
4438 PQTD , // ':'
4439 PQTD , // ';'
4440 PQTD , // '<'
4441 0, // '='
4442 PQTD , // '>'
4443 0, // '?'
4444 PQTD , // '@'
4445 0, // 'A'
4446 0, // 'B'
4447 0, // 'C'
4448 0, // 'D'
4449 0, // 'E'
4450 0, // 'F'
4451 0, // 'G'
4452 0, // 'H'
4453 0, // 'I'
4454 0, // 'J'
4455 0, // 'K'
4456 0, // 'L'
4457 0, // 'M'
4458 0, // 'N'
4459 0, // 'O'
4460 0, // 'P'
4461 0, // 'Q'
4462 0, // 'R'
4463 0, // 'S'
4464 0, // 'T'
4465 0, // 'U'
4466 0, // 'V'
4467 0, // 'W'
4468 0, // 'X'
4469 0, // 'Y'
4470 0, // 'Z'
4471 PQTD , // '['
4472 CENQ | PQTD , // '\'
4473 PQTD , // ']'
4474 0, // '^'
4475 0, // '_'
4476 0, // '`'
4477 0, // 'a'
4478 0, // 'b'
4479 0, // 'c'
4480 0, // 'd'
4481 0, // 'e'
4482 0, // 'f'
4483 0, // 'g'
4484 0, // 'h'
4485 0, // 'i'
4486 0, // 'j'
4487 0, // 'k'
4488 0, // 'l'
4489 0, // 'm'
4490 0, // 'n'
4491 0, // 'o'
4492 0, // 'p'
4493 0, // 'q'
4494 0, // 'r'
4495 0, // 's'
4496 0, // 't'
4497 0, // 'u'
4498 0, // 'v'
4499 0, // 'w'
4500 0, // 'x'
4501 0, // 'y'
4502 0, // 'z'
4503 0, // '{'
4504 0, // '|'
4505 0, // '}'
4506 0, // '~'
4507 TENQ | CENQ | PENQ }; // DEL
4508 Coding eNewCoding = !INetMIME::isUSASCII(nChar) ? CODING_ENCODED :
4509 m_eContext == CONTEXT_PHRASE ?
4510 Coding(aMinimal[nChar] >> 2) :
4511 aMinimal[nChar] & m_eContext ? CODING_ENCODED :
4512 CODING_NONE;
4513 if (eNewCoding > m_eCoding)
4514 m_eCoding = eNewCoding;
4515 if (m_eCoding == CODING_QUOTED
4516 && INetMIME::needsQuotedStringEscape(nChar))
4517 ++m_nQuotedEscaped;
4518
4519 // Append to buffer:
4520 if (sal_uInt32(m_pBufferEnd - m_pBuffer) == m_nBufferSize)
4521 {
4522 m_pBuffer
4523 = static_cast< sal_Unicode * >(
4524 rtl_reallocateMemory(m_pBuffer,
4525 (m_nBufferSize + BUFFER_SIZE)
4526 * sizeof (sal_Unicode)));
4527 m_pBufferEnd = m_pBuffer + m_nBufferSize;
4528 m_nBufferSize += BUFFER_SIZE;
4529 }
4530 *m_pBufferEnd++ = sal_Unicode(nChar);
4531 }
4532 return *this;
4533 }
4534
4535 //============================================================================
4536 //
4537 // INetContentTypeParameterList
4538 //
4539 //============================================================================
4540
Clear()4541 void INetContentTypeParameterList::Clear()
4542 {
4543 while (Count() > 0)
4544 delete static_cast< INetContentTypeParameter * >(Remove(Count() - 1));
4545 }
4546
4547 //============================================================================
4548 const INetContentTypeParameter *
find(const ByteString & rAttribute) const4549 INetContentTypeParameterList::find(const ByteString & rAttribute) const
4550 {
4551 for (sal_uIntPtr i = 0; i < Count(); ++i)
4552 {
4553 const INetContentTypeParameter * pParameter = GetObject(i);
4554 if (pParameter->m_sAttribute.EqualsIgnoreCaseAscii(rAttribute))
4555 return pParameter;
4556 }
4557 return 0;
4558 }
4559
4560