1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "tenchelp.h" 29 #include "unichars.h" 30 #include "rtl/textcvt.h" 31 32 /* ======================================================================= */ 33 34 /* DBCS to Unicode conversion routine use a lead table for the first byte, */ 35 /* where we determine the trail table or for single byte chars the unicode */ 36 /* value. We have for all lead byte a separate table, because we can */ 37 /* then share many tables for diffrent charset encodings. */ 38 39 /* ======================================================================= */ 40 41 sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext, 42 const sal_Char* pSrcBuf, sal_Size nSrcBytes, 43 sal_Unicode* pDestBuf, sal_Size nDestChars, 44 sal_uInt32 nFlags, sal_uInt32* pInfo, 45 sal_Size* pSrcCvtBytes ) 46 { 47 sal_uChar cLead; 48 sal_uChar cTrail; 49 sal_Unicode cConv; 50 const ImplDBCSToUniLeadTab* pLeadEntry; 51 const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; 52 const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab; 53 sal_Unicode* pEndDestBuf; 54 const sal_Char* pEndSrcBuf; 55 56 (void) pContext; /* unused */ 57 58 *pInfo = 0; 59 pEndDestBuf = pDestBuf+nDestChars; 60 pEndSrcBuf = pSrcBuf+nSrcBytes; 61 while ( pSrcBuf < pEndSrcBuf ) 62 { 63 cLead = (sal_uChar)*pSrcBuf; 64 65 /* get entry for the lead byte */ 66 pLeadEntry = pLeadTab+cLead; 67 68 /* SingleByte char? */ 69 if (pLeadEntry->mpToUniTrailTab == NULL 70 || cLead < pConvertData->mnLeadStart 71 || cLead > pConvertData->mnLeadEnd) 72 { 73 cConv = pLeadEntry->mnUniChar; 74 if ( !cConv && (cLead != 0) ) 75 { 76 *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED; 77 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR ) 78 { 79 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 80 break; 81 } 82 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE ) 83 { 84 pSrcBuf++; 85 continue; 86 } 87 else 88 cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags); 89 } 90 } 91 else 92 { 93 /* Source buffer to small */ 94 if ( pSrcBuf +1 == pEndSrcBuf ) 95 { 96 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 97 break; 98 } 99 100 pSrcBuf++; 101 cTrail = (sal_uChar)*pSrcBuf; 102 if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) 103 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; 104 else 105 cConv = 0; 106 107 if ( !cConv ) 108 { 109 /* EUDC Ranges */ 110 sal_uInt16 i; 111 const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab; 112 for ( i = 0; i < pConvertData->mnEUDCCount; i++ ) 113 { 114 if ( (cLead >= pEUDCTab->mnLeadStart) && 115 (cLead <= pEUDCTab->mnLeadEnd) ) 116 { 117 sal_uInt16 nTrailCount = 0; 118 if ( (cTrail >= pEUDCTab->mnTrail1Start) && 119 (cTrail <= pEUDCTab->mnTrail1End) ) 120 { 121 cConv = pEUDCTab->mnUniStart+ 122 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 123 (cTrail-pEUDCTab->mnTrail1Start); 124 break; 125 } 126 else 127 { 128 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1; 129 if ( (pEUDCTab->mnTrailCount >= 2) && 130 (cTrail >= pEUDCTab->mnTrail2Start) && 131 (cTrail <= pEUDCTab->mnTrail2End) ) 132 { 133 cConv = pEUDCTab->mnUniStart+ 134 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 135 nTrailCount+ 136 (cTrail-pEUDCTab->mnTrail2Start); 137 break; 138 } 139 else 140 { 141 nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1; 142 if ( (pEUDCTab->mnTrailCount >= 3) && 143 (cTrail >= pEUDCTab->mnTrail3Start) && 144 (cTrail <= pEUDCTab->mnTrail3End) ) 145 { 146 cConv = pEUDCTab->mnUniStart+ 147 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+ 148 nTrailCount+ 149 (cTrail-pEUDCTab->mnTrail3Start); 150 break; 151 } 152 } 153 } 154 } 155 156 pEUDCTab++; 157 } 158 159 if ( !cConv ) 160 { 161 /* Wir vergleichen den kompletten Trailbereich den wir */ 162 /* definieren, der normalerweise groesser sein kann als */ 163 /* der definierte. Dies machen wir, damit Erweiterungen von */ 164 /* uns nicht beruecksichtigten Encodings so weit wie */ 165 /* moeglich auch richtig zu behandeln, das double byte */ 166 /* characters auch als ein einzelner Character behandelt */ 167 /* wird. */ 168 if (cLead < pConvertData->mnLeadStart 169 || cLead > pConvertData->mnLeadEnd 170 || cTrail < pConvertData->mnTrailStart 171 || cTrail > pConvertData->mnTrailEnd) 172 { 173 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 174 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 175 { 176 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 177 break; 178 } 179 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 180 { 181 pSrcBuf++; 182 continue; 183 } 184 else 185 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 186 } 187 else 188 { 189 *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; 190 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) 191 { 192 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 193 break; 194 } 195 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) 196 { 197 pSrcBuf++; 198 continue; 199 } 200 else 201 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 202 } 203 } 204 } 205 } 206 207 if ( pDestBuf == pEndDestBuf ) 208 { 209 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 210 break; 211 } 212 213 *pDestBuf = cConv; 214 pDestBuf++; 215 pSrcBuf++; 216 } 217 218 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); 219 return (nDestChars - (pEndDestBuf-pDestBuf)); 220 } 221 222 /* ----------------------------------------------------------------------- */ 223 224 sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext, 225 const sal_Unicode* pSrcBuf, sal_Size nSrcChars, 226 sal_Char* pDestBuf, sal_Size nDestBytes, 227 sal_uInt32 nFlags, sal_uInt32* pInfo, 228 sal_Size* pSrcCvtChars ) 229 { 230 sal_uInt16 cConv; 231 sal_Unicode c; 232 sal_uChar nHighChar; 233 sal_uChar nLowChar; 234 const ImplUniToDBCSHighTab* pHighEntry; 235 const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData; 236 const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab; 237 sal_Char* pEndDestBuf; 238 const sal_Unicode* pEndSrcBuf; 239 240 sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0 241 || pConvertData->mnLeadEnd != 0xFF); 242 /* this statement has the effect that this extra check is only done for 243 EUC-KR, which uses the MS-949 tables, but does not support the full 244 range of MS-949 */ 245 246 (void) pContext; /* unused */ 247 248 *pInfo = 0; 249 pEndDestBuf = pDestBuf+nDestBytes; 250 pEndSrcBuf = pSrcBuf+nSrcChars; 251 while ( pSrcBuf < pEndSrcBuf ) 252 { 253 c = *pSrcBuf; 254 nHighChar = (sal_uChar)((c >> 8) & 0xFF); 255 nLowChar = (sal_uChar)(c & 0xFF); 256 257 /* get entry for the high byte */ 258 pHighEntry = pHighTab+nHighChar; 259 260 /* is low byte in the table range */ 261 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 262 { 263 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 264 if (bCheckRange && cConv > 0x7F 265 && ((cConv >> 8) < pConvertData->mnLeadStart 266 || (cConv >> 8) > pConvertData->mnLeadEnd 267 || (cConv & 0xFF) < pConvertData->mnTrailStart 268 || (cConv & 0xFF) > pConvertData->mnTrailEnd)) 269 cConv = 0; 270 } 271 else 272 cConv = 0; 273 274 if (cConv == 0 && c != 0) 275 { 276 /* Map to EUDC ranges: */ 277 ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab; 278 sal_uInt32 i; 279 for (i = 0; i < pConvertData->mnEUDCCount; ++i) 280 { 281 if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd) 282 { 283 sal_uInt32 nIndex = c - pEUDCTab->mnUniStart; 284 sal_uInt32 nLeadOff 285 = nIndex / pEUDCTab->mnTrailRangeCount; 286 sal_uInt32 nTrailOff 287 = nIndex % pEUDCTab->mnTrailRangeCount; 288 sal_uInt32 nSize; 289 cConv = (sal_uInt16) 290 ((pEUDCTab->mnLeadStart + nLeadOff) << 8); 291 nSize 292 = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1; 293 if (nTrailOff < nSize) 294 { 295 cConv |= pEUDCTab->mnTrail1Start + nTrailOff; 296 break; 297 } 298 nTrailOff -= nSize; 299 nSize 300 = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1; 301 if (nTrailOff < nSize) 302 { 303 cConv |= pEUDCTab->mnTrail2Start + nTrailOff; 304 break; 305 } 306 nTrailOff -= nSize; 307 cConv |= pEUDCTab->mnTrail3Start + nTrailOff; 308 break; 309 } 310 pEUDCTab++; 311 } 312 313 /* FIXME 314 * SB: Not sure why this is in here. Plus, it does not work as 315 * intended when (c & 0xFF) == 0, because the next !cConv check 316 * will then think c has not yet been converted... 317 */ 318 if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START 319 && c <= RTL_TEXTCVT_BYTE_PRIVATE_END) 320 { 321 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) 322 cConv = (sal_Char)(sal_uChar)(c & 0xFF); 323 } 324 } 325 326 if ( !cConv ) 327 { 328 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) 329 { 330 /* !!! */ 331 } 332 333 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) 334 { 335 /* !!! */ 336 } 337 338 /* Handle undefined and surrogates characters */ 339 /* (all surrogates characters are undefined) */ 340 if (ImplHandleUndefinedUnicodeToTextChar(pData, 341 &pSrcBuf, 342 pEndSrcBuf, 343 &pDestBuf, 344 pEndDestBuf, 345 nFlags, 346 pInfo)) 347 continue; 348 else 349 break; 350 } 351 352 /* SingleByte */ 353 if ( !(cConv & 0xFF00) ) 354 { 355 if ( pDestBuf == pEndDestBuf ) 356 { 357 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 358 break; 359 } 360 361 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 362 pDestBuf++; 363 } 364 else 365 { 366 if ( pDestBuf+1 >= pEndDestBuf ) 367 { 368 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 369 break; 370 } 371 372 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 373 pDestBuf++; 374 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 375 pDestBuf++; 376 } 377 378 pSrcBuf++; 379 } 380 381 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); 382 return (nDestBytes - (pEndDestBuf-pDestBuf)); 383 } 384 385 /* ======================================================================= */ 386 387 #define JIS_EUC_LEAD_OFF 0x80 388 #define JIS_EUC_TRAIL_OFF 0x80 389 390 /* ----------------------------------------------------------------------- */ 391 392 sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData, 393 void* pContext, 394 const sal_Char* pSrcBuf, sal_Size nSrcBytes, 395 sal_Unicode* pDestBuf, sal_Size nDestChars, 396 sal_uInt32 nFlags, sal_uInt32* pInfo, 397 sal_Size* pSrcCvtBytes ) 398 { 399 sal_uChar c; 400 sal_uChar cLead = '\0'; 401 sal_uChar cTrail = '\0'; 402 sal_Unicode cConv; 403 const ImplDBCSToUniLeadTab* pLeadEntry; 404 const ImplDBCSToUniLeadTab* pLeadTab; 405 const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; 406 sal_Unicode* pEndDestBuf; 407 const sal_Char* pEndSrcBuf; 408 409 (void) pContext; /* unused */ 410 411 *pInfo = 0; 412 pEndDestBuf = pDestBuf+nDestChars; 413 pEndSrcBuf = pSrcBuf+nSrcBytes; 414 while ( pSrcBuf < pEndSrcBuf ) 415 { 416 c = (sal_uChar)*pSrcBuf; 417 418 /* ASCII */ 419 if ( c <= 0x7F ) 420 cConv = c; 421 else 422 { 423 /* SS2 - Half-width katakana */ 424 /* 8E + A1-DF */ 425 if ( c == 0x8E ) 426 { 427 /* Source buffer to small */ 428 if ( pSrcBuf + 1 == pEndSrcBuf ) 429 { 430 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 431 break; 432 } 433 434 pSrcBuf++; 435 c = (sal_uChar)*pSrcBuf; 436 if ( (c >= 0xA1) && (c <= 0xDF) ) 437 cConv = 0xFF61+(c-0xA1); 438 else 439 { 440 cConv = 0; 441 cLead = 0x8E; 442 cTrail = c; 443 } 444 } 445 else 446 { 447 /* SS3 - JIS 0212-1990 */ 448 /* 8F + A1-FE + A1-FE */ 449 if ( c == 0x8F ) 450 { 451 /* Source buffer to small */ 452 if (pEndSrcBuf - pSrcBuf < 3) 453 { 454 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 455 break; 456 } 457 458 pSrcBuf++; 459 cLead = (sal_uChar)*pSrcBuf; 460 pSrcBuf++; 461 cTrail = (sal_uChar)*pSrcBuf; 462 pLeadTab = pConvertData->mpJIS0212ToUniLeadTab; 463 } 464 /* CodeSet 2 JIS 0208-1997 */ 465 /* A1-FE + A1-FE */ 466 else 467 { 468 /* Source buffer to small */ 469 if ( pSrcBuf + 1 == pEndSrcBuf ) 470 { 471 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 472 break; 473 } 474 475 cLead = c; 476 pSrcBuf++; 477 cTrail = (sal_uChar)*pSrcBuf; 478 pLeadTab = pConvertData->mpJIS0208ToUniLeadTab; 479 } 480 481 /* Undefined Range */ 482 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) 483 cConv = 0; 484 else 485 { 486 cLead -= JIS_EUC_LEAD_OFF; 487 cTrail -= JIS_EUC_TRAIL_OFF; 488 pLeadEntry = pLeadTab+cLead; 489 if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) ) 490 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart]; 491 else 492 cConv = 0; 493 } 494 } 495 496 if ( !cConv ) 497 { 498 /* Wir vergleichen den kompletten Trailbereich den wir */ 499 /* definieren, der normalerweise groesser sein kann als */ 500 /* der definierte. Dies machen wir, damit Erweiterungen von */ 501 /* uns nicht beruecksichtigten Encodings so weit wie */ 502 /* moeglich auch richtig zu behandeln, das double byte */ 503 /* characters auch als ein einzelner Character behandelt */ 504 /* wird. */ 505 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) ) 506 { 507 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID; 508 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR ) 509 { 510 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 511 break; 512 } 513 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE ) 514 { 515 pSrcBuf++; 516 continue; 517 } 518 else 519 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 520 } 521 else 522 { 523 *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED; 524 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR ) 525 { 526 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR; 527 break; 528 } 529 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE ) 530 { 531 pSrcBuf++; 532 continue; 533 } 534 else 535 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 536 } 537 } 538 } 539 540 if ( pDestBuf == pEndDestBuf ) 541 { 542 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 543 break; 544 } 545 546 *pDestBuf = cConv; 547 pDestBuf++; 548 pSrcBuf++; 549 } 550 551 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf); 552 return (nDestChars - (pEndDestBuf-pDestBuf)); 553 } 554 555 /* ----------------------------------------------------------------------- */ 556 557 sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData, 558 void* pContext, 559 const sal_Unicode* pSrcBuf, sal_Size nSrcChars, 560 sal_Char* pDestBuf, sal_Size nDestBytes, 561 sal_uInt32 nFlags, sal_uInt32* pInfo, 562 sal_Size* pSrcCvtChars ) 563 { 564 sal_uInt32 cConv; 565 sal_Unicode c; 566 sal_uChar nHighChar; 567 sal_uChar nLowChar; 568 const ImplUniToDBCSHighTab* pHighEntry; 569 const ImplUniToDBCSHighTab* pHighTab; 570 const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData; 571 sal_Char* pEndDestBuf; 572 const sal_Unicode* pEndSrcBuf; 573 574 (void) pContext; /* unused */ 575 576 *pInfo = 0; 577 pEndDestBuf = pDestBuf+nDestBytes; 578 pEndSrcBuf = pSrcBuf+nSrcChars; 579 while ( pSrcBuf < pEndSrcBuf ) 580 { 581 c = *pSrcBuf; 582 583 /* ASCII */ 584 if ( c <= 0x7F ) 585 cConv = c; 586 /* Half-width katakana */ 587 else if ( (c >= 0xFF61) && (c <= 0xFF9F) ) 588 cConv = 0x8E00+0xA1+(c-0xFF61); 589 else 590 { 591 nHighChar = (sal_uChar)((c >> 8) & 0xFF); 592 nLowChar = (sal_uChar)(c & 0xFF); 593 594 /* JIS 0208 */ 595 pHighTab = pConvertData->mpUniToJIS0208HighTab; 596 pHighEntry = pHighTab+nHighChar; 597 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 598 { 599 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 600 if (cConv != 0) 601 cConv |= 0x8080; 602 } 603 else 604 cConv = 0; 605 606 /* JIS 0212 */ 607 if ( !cConv ) 608 { 609 pHighTab = pConvertData->mpUniToJIS0212HighTab; 610 pHighEntry = pHighTab+nHighChar; 611 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) ) 612 { 613 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart]; 614 if (cConv != 0) 615 cConv |= 0x8F8080; 616 } 617 618 if ( !cConv ) 619 { 620 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE ) 621 { 622 /* !!! */ 623 } 624 625 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR ) 626 { 627 /* !!! */ 628 } 629 630 /* Handle undefined and surrogates characters */ 631 /* (all surrogates characters are undefined) */ 632 if (ImplHandleUndefinedUnicodeToTextChar(pData, 633 &pSrcBuf, 634 pEndSrcBuf, 635 &pDestBuf, 636 pEndDestBuf, 637 nFlags, 638 pInfo)) 639 continue; 640 else 641 break; 642 } 643 } 644 } 645 646 /* SingleByte */ 647 if ( !(cConv & 0xFFFF00) ) 648 { 649 if ( pDestBuf == pEndDestBuf ) 650 { 651 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 652 break; 653 } 654 655 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 656 pDestBuf++; 657 } 658 /* DoubleByte */ 659 else if ( !(cConv & 0xFF0000) ) 660 { 661 if ( pDestBuf+1 >= pEndDestBuf ) 662 { 663 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 664 break; 665 } 666 667 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 668 pDestBuf++; 669 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 670 pDestBuf++; 671 } 672 else 673 { 674 if ( pDestBuf+2 >= pEndDestBuf ) 675 { 676 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 677 break; 678 } 679 680 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF); 681 pDestBuf++; 682 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF); 683 pDestBuf++; 684 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF); 685 pDestBuf++; 686 } 687 688 pSrcBuf++; 689 } 690 691 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf); 692 return (nDestBytes - (pEndDestBuf-pDestBuf)); 693 } 694