1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "tenchelp.h"
25 #include "unichars.h"
26 #include "rtl/textcvt.h"
27
28 /* ======================================================================= */
29
30 /* DBCS to Unicode conversion routine use a lead table for the first byte, */
31 /* where we determine the trail table or for single byte chars the unicode */
32 /* value. We have for all lead byte a separate table, because we can */
33 /* then share many tables for diffrent charset encodings. */
34
35 /* ======================================================================= */
36
ImplDBCSToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)37 sal_Size ImplDBCSToUnicode( const ImplTextConverterData* pData, void* pContext,
38 const sal_Char* pSrcBuf, sal_Size nSrcBytes,
39 sal_Unicode* pDestBuf, sal_Size nDestChars,
40 sal_uInt32 nFlags, sal_uInt32* pInfo,
41 sal_Size* pSrcCvtBytes )
42 {
43 sal_uChar cLead;
44 sal_uChar cTrail;
45 sal_Unicode cConv;
46 const ImplDBCSToUniLeadTab* pLeadEntry;
47 const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
48 const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
49 sal_Unicode* pEndDestBuf;
50 const sal_Char* pEndSrcBuf;
51
52 (void) pContext; /* unused */
53
54 *pInfo = 0;
55 pEndDestBuf = pDestBuf+nDestChars;
56 pEndSrcBuf = pSrcBuf+nSrcBytes;
57 while ( pSrcBuf < pEndSrcBuf )
58 {
59 cLead = (sal_uChar)*pSrcBuf;
60
61 /* get entry for the lead byte */
62 pLeadEntry = pLeadTab+cLead;
63
64 /* SingleByte char? */
65 if (pLeadEntry->mpToUniTrailTab == NULL
66 || cLead < pConvertData->mnLeadStart
67 || cLead > pConvertData->mnLeadEnd)
68 {
69 cConv = pLeadEntry->mnUniChar;
70 if ( !cConv && (cLead != 0) )
71 {
72 *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
73 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
74 {
75 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
76 break;
77 }
78 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
79 {
80 pSrcBuf++;
81 continue;
82 }
83 else
84 cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
85 }
86 }
87 else
88 {
89 /* Source buffer to small */
90 if ( pSrcBuf +1 == pEndSrcBuf )
91 {
92 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
93 break;
94 }
95
96 pSrcBuf++;
97 cTrail = (sal_uChar)*pSrcBuf;
98 if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
99 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
100 else
101 cConv = 0;
102
103 if ( !cConv )
104 {
105 /* EUDC Ranges */
106 sal_uInt16 i;
107 const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
108 for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
109 {
110 if ( (cLead >= pEUDCTab->mnLeadStart) &&
111 (cLead <= pEUDCTab->mnLeadEnd) )
112 {
113 sal_uInt16 nTrailCount = 0;
114 if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
115 (cTrail <= pEUDCTab->mnTrail1End) )
116 {
117 cConv = pEUDCTab->mnUniStart+
118 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
119 (cTrail-pEUDCTab->mnTrail1Start);
120 break;
121 }
122 else
123 {
124 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
125 if ( (pEUDCTab->mnTrailCount >= 2) &&
126 (cTrail >= pEUDCTab->mnTrail2Start) &&
127 (cTrail <= pEUDCTab->mnTrail2End) )
128 {
129 cConv = pEUDCTab->mnUniStart+
130 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
131 nTrailCount+
132 (cTrail-pEUDCTab->mnTrail2Start);
133 break;
134 }
135 else
136 {
137 nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
138 if ( (pEUDCTab->mnTrailCount >= 3) &&
139 (cTrail >= pEUDCTab->mnTrail3Start) &&
140 (cTrail <= pEUDCTab->mnTrail3End) )
141 {
142 cConv = pEUDCTab->mnUniStart+
143 ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
144 nTrailCount+
145 (cTrail-pEUDCTab->mnTrail3Start);
146 break;
147 }
148 }
149 }
150 }
151
152 pEUDCTab++;
153 }
154
155 if ( !cConv )
156 {
157 /* Wir vergleichen den kompletten Trailbereich den wir */
158 /* definieren, der normalerweise groesser sein kann als */
159 /* der definierte. Dies machen wir, damit Erweiterungen von */
160 /* uns nicht beruecksichtigten Encodings so weit wie */
161 /* moeglich auch richtig zu behandeln, das double byte */
162 /* characters auch als ein einzelner Character behandelt */
163 /* wird. */
164 if (cLead < pConvertData->mnLeadStart
165 || cLead > pConvertData->mnLeadEnd
166 || cTrail < pConvertData->mnTrailStart
167 || cTrail > pConvertData->mnTrailEnd)
168 {
169 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
170 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
171 {
172 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
173 break;
174 }
175 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
176 {
177 pSrcBuf++;
178 continue;
179 }
180 else
181 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
182 }
183 else
184 {
185 *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
186 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
187 {
188 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
189 break;
190 }
191 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
192 {
193 pSrcBuf++;
194 continue;
195 }
196 else
197 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
198 }
199 }
200 }
201 }
202
203 if ( pDestBuf == pEndDestBuf )
204 {
205 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
206 break;
207 }
208
209 *pDestBuf = cConv;
210 pDestBuf++;
211 pSrcBuf++;
212 }
213
214 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
215 return (nDestChars - (pEndDestBuf-pDestBuf));
216 }
217
218 /* ----------------------------------------------------------------------- */
219
ImplUnicodeToDBCS(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)220 sal_Size ImplUnicodeToDBCS( const ImplTextConverterData* pData, void* pContext,
221 const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
222 sal_Char* pDestBuf, sal_Size nDestBytes,
223 sal_uInt32 nFlags, sal_uInt32* pInfo,
224 sal_Size* pSrcCvtChars )
225 {
226 sal_uInt16 cConv;
227 sal_Unicode c;
228 sal_uChar nHighChar;
229 sal_uChar nLowChar;
230 const ImplUniToDBCSHighTab* pHighEntry;
231 const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
232 const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
233 sal_Char* pEndDestBuf;
234 const sal_Unicode* pEndSrcBuf;
235
236 sal_Bool bCheckRange = (pConvertData->mnLeadStart != 0
237 || pConvertData->mnLeadEnd != 0xFF);
238 /* this statement has the effect that this extra check is only done for
239 EUC-KR, which uses the MS-949 tables, but does not support the full
240 range of MS-949 */
241
242 (void) pContext; /* unused */
243
244 *pInfo = 0;
245 pEndDestBuf = pDestBuf+nDestBytes;
246 pEndSrcBuf = pSrcBuf+nSrcChars;
247 while ( pSrcBuf < pEndSrcBuf )
248 {
249 c = *pSrcBuf;
250 nHighChar = (sal_uChar)((c >> 8) & 0xFF);
251 nLowChar = (sal_uChar)(c & 0xFF);
252
253 /* get entry for the high byte */
254 pHighEntry = pHighTab+nHighChar;
255
256 /* is low byte in the table range */
257 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
258 {
259 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
260 if (bCheckRange && cConv > 0x7F
261 && ((cConv >> 8) < pConvertData->mnLeadStart
262 || (cConv >> 8) > pConvertData->mnLeadEnd
263 || (cConv & 0xFF) < pConvertData->mnTrailStart
264 || (cConv & 0xFF) > pConvertData->mnTrailEnd))
265 cConv = 0;
266 }
267 else
268 cConv = 0;
269
270 if (cConv == 0 && c != 0)
271 {
272 /* Map to EUDC ranges: */
273 ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
274 sal_uInt32 i;
275 for (i = 0; i < pConvertData->mnEUDCCount; ++i)
276 {
277 if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
278 {
279 sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
280 sal_uInt32 nLeadOff
281 = nIndex / pEUDCTab->mnTrailRangeCount;
282 sal_uInt32 nTrailOff
283 = nIndex % pEUDCTab->mnTrailRangeCount;
284 sal_uInt32 nSize;
285 cConv = (sal_uInt16)
286 ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
287 nSize
288 = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
289 if (nTrailOff < nSize)
290 {
291 cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
292 break;
293 }
294 nTrailOff -= nSize;
295 nSize
296 = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
297 if (nTrailOff < nSize)
298 {
299 cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
300 break;
301 }
302 nTrailOff -= nSize;
303 cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
304 break;
305 }
306 pEUDCTab++;
307 }
308
309 /* FIXME
310 * SB: Not sure why this is in here. Plus, it does not work as
311 * intended when (c & 0xFF) == 0, because the next !cConv check
312 * will then think c has not yet been converted...
313 */
314 if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
315 && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
316 {
317 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
318 cConv = (sal_Char)(sal_uChar)(c & 0xFF);
319 }
320 }
321
322 if ( !cConv )
323 {
324 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
325 {
326 /* !!! */
327 }
328
329 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
330 {
331 /* !!! */
332 }
333
334 /* Handle undefined and surrogates characters */
335 /* (all surrogates characters are undefined) */
336 if (ImplHandleUndefinedUnicodeToTextChar(pData,
337 &pSrcBuf,
338 pEndSrcBuf,
339 &pDestBuf,
340 pEndDestBuf,
341 nFlags,
342 pInfo))
343 continue;
344 else
345 break;
346 }
347
348 /* SingleByte */
349 if ( !(cConv & 0xFF00) )
350 {
351 if ( pDestBuf == pEndDestBuf )
352 {
353 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
354 break;
355 }
356
357 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
358 pDestBuf++;
359 }
360 else
361 {
362 if ( pDestBuf+1 >= pEndDestBuf )
363 {
364 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
365 break;
366 }
367
368 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
369 pDestBuf++;
370 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
371 pDestBuf++;
372 }
373
374 pSrcBuf++;
375 }
376
377 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
378 return (nDestBytes - (pEndDestBuf-pDestBuf));
379 }
380
381 /* ======================================================================= */
382
383 #define JIS_EUC_LEAD_OFF 0x80
384 #define JIS_EUC_TRAIL_OFF 0x80
385
386 /* ----------------------------------------------------------------------- */
387
ImplEUCJPToUnicode(const ImplTextConverterData * pData,void * pContext,const sal_Char * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)388 sal_Size ImplEUCJPToUnicode( const ImplTextConverterData* pData,
389 void* pContext,
390 const sal_Char* pSrcBuf, sal_Size nSrcBytes,
391 sal_Unicode* pDestBuf, sal_Size nDestChars,
392 sal_uInt32 nFlags, sal_uInt32* pInfo,
393 sal_Size* pSrcCvtBytes )
394 {
395 sal_uChar c;
396 sal_uChar cLead = '\0';
397 sal_uChar cTrail = '\0';
398 sal_Unicode cConv;
399 const ImplDBCSToUniLeadTab* pLeadEntry;
400 const ImplDBCSToUniLeadTab* pLeadTab;
401 const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
402 sal_Unicode* pEndDestBuf;
403 const sal_Char* pEndSrcBuf;
404
405 (void) pContext; /* unused */
406
407 *pInfo = 0;
408 pEndDestBuf = pDestBuf+nDestChars;
409 pEndSrcBuf = pSrcBuf+nSrcBytes;
410 while ( pSrcBuf < pEndSrcBuf )
411 {
412 c = (sal_uChar)*pSrcBuf;
413
414 /* ASCII */
415 if ( c <= 0x7F )
416 cConv = c;
417 else
418 {
419 /* SS2 - Half-width katakana */
420 /* 8E + A1-DF */
421 if ( c == 0x8E )
422 {
423 /* Source buffer to small */
424 if ( pSrcBuf + 1 == pEndSrcBuf )
425 {
426 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
427 break;
428 }
429
430 pSrcBuf++;
431 c = (sal_uChar)*pSrcBuf;
432 if ( (c >= 0xA1) && (c <= 0xDF) )
433 cConv = 0xFF61+(c-0xA1);
434 else
435 {
436 cConv = 0;
437 cLead = 0x8E;
438 cTrail = c;
439 }
440 }
441 else
442 {
443 /* SS3 - JIS 0212-1990 */
444 /* 8F + A1-FE + A1-FE */
445 if ( c == 0x8F )
446 {
447 /* Source buffer to small */
448 if (pEndSrcBuf - pSrcBuf < 3)
449 {
450 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
451 break;
452 }
453
454 pSrcBuf++;
455 cLead = (sal_uChar)*pSrcBuf;
456 pSrcBuf++;
457 cTrail = (sal_uChar)*pSrcBuf;
458 pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
459 }
460 /* CodeSet 2 JIS 0208-1997 */
461 /* A1-FE + A1-FE */
462 else
463 {
464 /* Source buffer to small */
465 if ( pSrcBuf + 1 == pEndSrcBuf )
466 {
467 *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
468 break;
469 }
470
471 cLead = c;
472 pSrcBuf++;
473 cTrail = (sal_uChar)*pSrcBuf;
474 pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
475 }
476
477 /* Undefined Range */
478 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
479 cConv = 0;
480 else
481 {
482 cLead -= JIS_EUC_LEAD_OFF;
483 cTrail -= JIS_EUC_TRAIL_OFF;
484 pLeadEntry = pLeadTab+cLead;
485 if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
486 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
487 else
488 cConv = 0;
489 }
490 }
491
492 if ( !cConv )
493 {
494 /* Wir vergleichen den kompletten Trailbereich den wir */
495 /* definieren, der normalerweise groesser sein kann als */
496 /* der definierte. Dies machen wir, damit Erweiterungen von */
497 /* uns nicht beruecksichtigten Encodings so weit wie */
498 /* moeglich auch richtig zu behandeln, das double byte */
499 /* characters auch als ein einzelner Character behandelt */
500 /* wird. */
501 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
502 {
503 *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
504 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
505 {
506 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
507 break;
508 }
509 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
510 {
511 pSrcBuf++;
512 continue;
513 }
514 else
515 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
516 }
517 else
518 {
519 *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
520 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
521 {
522 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
523 break;
524 }
525 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
526 {
527 pSrcBuf++;
528 continue;
529 }
530 else
531 cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
532 }
533 }
534 }
535
536 if ( pDestBuf == pEndDestBuf )
537 {
538 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
539 break;
540 }
541
542 *pDestBuf = cConv;
543 pDestBuf++;
544 pSrcBuf++;
545 }
546
547 *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
548 return (nDestChars - (pEndDestBuf-pDestBuf));
549 }
550
551 /* ----------------------------------------------------------------------- */
552
ImplUnicodeToEUCJP(const ImplTextConverterData * pData,void * pContext,const sal_Unicode * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)553 sal_Size ImplUnicodeToEUCJP( const ImplTextConverterData* pData,
554 void* pContext,
555 const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
556 sal_Char* pDestBuf, sal_Size nDestBytes,
557 sal_uInt32 nFlags, sal_uInt32* pInfo,
558 sal_Size* pSrcCvtChars )
559 {
560 sal_uInt32 cConv;
561 sal_Unicode c;
562 sal_uChar nHighChar;
563 sal_uChar nLowChar;
564 const ImplUniToDBCSHighTab* pHighEntry;
565 const ImplUniToDBCSHighTab* pHighTab;
566 const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
567 sal_Char* pEndDestBuf;
568 const sal_Unicode* pEndSrcBuf;
569
570 (void) pContext; /* unused */
571
572 *pInfo = 0;
573 pEndDestBuf = pDestBuf+nDestBytes;
574 pEndSrcBuf = pSrcBuf+nSrcChars;
575 while ( pSrcBuf < pEndSrcBuf )
576 {
577 c = *pSrcBuf;
578
579 /* ASCII */
580 if ( c <= 0x7F )
581 cConv = c;
582 /* Half-width katakana */
583 else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
584 cConv = 0x8E00+0xA1+(c-0xFF61);
585 else
586 {
587 nHighChar = (sal_uChar)((c >> 8) & 0xFF);
588 nLowChar = (sal_uChar)(c & 0xFF);
589
590 /* JIS 0208 */
591 pHighTab = pConvertData->mpUniToJIS0208HighTab;
592 pHighEntry = pHighTab+nHighChar;
593 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
594 {
595 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
596 if (cConv != 0)
597 cConv |= 0x8080;
598 }
599 else
600 cConv = 0;
601
602 /* JIS 0212 */
603 if ( !cConv )
604 {
605 pHighTab = pConvertData->mpUniToJIS0212HighTab;
606 pHighEntry = pHighTab+nHighChar;
607 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
608 {
609 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
610 if (cConv != 0)
611 cConv |= 0x8F8080;
612 }
613
614 if ( !cConv )
615 {
616 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
617 {
618 /* !!! */
619 }
620
621 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
622 {
623 /* !!! */
624 }
625
626 /* Handle undefined and surrogates characters */
627 /* (all surrogates characters are undefined) */
628 if (ImplHandleUndefinedUnicodeToTextChar(pData,
629 &pSrcBuf,
630 pEndSrcBuf,
631 &pDestBuf,
632 pEndDestBuf,
633 nFlags,
634 pInfo))
635 continue;
636 else
637 break;
638 }
639 }
640 }
641
642 /* SingleByte */
643 if ( !(cConv & 0xFFFF00) )
644 {
645 if ( pDestBuf == pEndDestBuf )
646 {
647 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
648 break;
649 }
650
651 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
652 pDestBuf++;
653 }
654 /* DoubleByte */
655 else if ( !(cConv & 0xFF0000) )
656 {
657 if ( pDestBuf+1 >= pEndDestBuf )
658 {
659 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
660 break;
661 }
662
663 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
664 pDestBuf++;
665 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
666 pDestBuf++;
667 }
668 else
669 {
670 if ( pDestBuf+2 >= pEndDestBuf )
671 {
672 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
673 break;
674 }
675
676 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 16) & 0xFF);
677 pDestBuf++;
678 *pDestBuf = (sal_Char)(sal_uChar)((cConv >> 8) & 0xFF);
679 pDestBuf++;
680 *pDestBuf = (sal_Char)(sal_uChar)(cConv & 0xFF);
681 pDestBuf++;
682 }
683
684 pSrcBuf++;
685 }
686
687 *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
688 return (nDestBytes - (pEndDestBuf-pDestBuf));
689 }
690