1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 #include "convertiso2022jp.h"
25 #include "context.h"
26 #include "converter.h"
27 #include "tenchelp.h"
28 #include "unichars.h"
29 #include "rtl/alloc.h"
30 #include "rtl/textcvt.h"
31 #include "sal/types.h"
32
33 typedef enum /* order is important: */
34 {
35 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
36 IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
37 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
38 IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
39 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
40 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
41 IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
42 } ImplIso2022JpToUnicodeState;
43
44 typedef struct
45 {
46 ImplIso2022JpToUnicodeState m_eState;
47 sal_uInt32 m_nRow;
48 } ImplIso2022JpToUnicodeContext;
49
50 typedef struct
51 {
52 sal_Unicode m_nHighSurrogate;
53 sal_Bool m_b0208;
54 } ImplUnicodeToIso2022JpContext;
55
ImplCreateIso2022JpToUnicodeContext(void)56 void * ImplCreateIso2022JpToUnicodeContext(void)
57 {
58 void * pContext
59 = rtl_allocateMemory(sizeof (ImplIso2022JpToUnicodeContext));
60 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
61 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
62 return pContext;
63 }
64
ImplResetIso2022JpToUnicodeContext(void * pContext)65 void ImplResetIso2022JpToUnicodeContext(void * pContext)
66 {
67 if (pContext)
68 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
69 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
70 }
71
ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData,void * pContext,sal_Char const * pSrcBuf,sal_Size nSrcBytes,sal_Unicode * pDestBuf,sal_Size nDestChars,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtBytes)72 sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData,
73 void * pContext,
74 sal_Char const * pSrcBuf,
75 sal_Size nSrcBytes,
76 sal_Unicode * pDestBuf,
77 sal_Size nDestChars,
78 sal_uInt32 nFlags,
79 sal_uInt32 * pInfo,
80 sal_Size * pSrcCvtBytes)
81 {
82 ImplDBCSToUniLeadTab const * pJisX0208Data
83 = ((ImplIso2022JpConverterData const *) pData)->
84 m_pJisX0208ToUnicodeData;
85 ImplIso2022JpToUnicodeState eState
86 = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
87 sal_uInt32 nRow = 0;
88 sal_uInt32 nInfo = 0;
89 sal_Size nConverted = 0;
90 sal_Unicode * pDestBufPtr = pDestBuf;
91 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
92
93 if (pContext)
94 {
95 eState = ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState;
96 nRow = ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow;
97 }
98
99 for (; nConverted < nSrcBytes; ++nConverted)
100 {
101 sal_Bool bUndefined = sal_True;
102 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
103 switch (eState)
104 {
105 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
106 if (nChar == 0x1B) /* ESC */
107 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
108 else if (nChar < 0x80)
109 if (pDestBufPtr != pDestBufEnd)
110 *pDestBufPtr++ = (sal_Unicode) nChar;
111 else
112 goto no_output;
113 else
114 {
115 bUndefined = sal_False;
116 goto bad_input;
117 }
118 break;
119
120 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
121 if (nChar == 0x1B) /* ESC */
122 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
123 else if (nChar < 0x80)
124 if (pDestBufPtr != pDestBufEnd)
125 {
126 switch (nChar)
127 {
128 case 0x5C: /* \ */
129 nChar = 0xA5; /* YEN SIGN */
130 break;
131
132 case 0x7E: /* ~ */
133 nChar = 0xAF; /* MACRON */
134 break;
135 }
136 *pDestBufPtr++ = (sal_Unicode) nChar;
137 }
138 else
139 goto no_output;
140 else
141 {
142 bUndefined = sal_False;
143 goto bad_input;
144 }
145 break;
146
147 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
148 if (nChar == 0x1B) /* ESC */
149 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
150 else if (nChar >= 0x21 && nChar <= 0x7E)
151 {
152 nRow = nChar;
153 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
154 }
155 else
156 {
157 bUndefined = sal_False;
158 goto bad_input;
159 }
160 break;
161
162 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
163 if (nChar >= 0x21 && nChar <= 0x7E)
164 {
165 sal_uInt16 nUnicode = 0;
166 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
167 if (nChar >= nFirst
168 && nChar <= pJisX0208Data[nRow].mnTrailEnd)
169 nUnicode = pJisX0208Data[nRow].
170 mpToUniTrailTab[nChar - nFirst];
171 if (nUnicode != 0)
172 if (pDestBufPtr != pDestBufEnd)
173 {
174 *pDestBufPtr++ = (sal_Unicode) nUnicode;
175 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
176 }
177 else
178 goto no_output;
179 else
180 goto bad_input;
181 }
182 else
183 {
184 bUndefined = sal_False;
185 goto bad_input;
186 }
187 break;
188
189 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
190 switch (nChar)
191 {
192 case 0x24: /* $ */
193 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
194 break;
195
196 case 0x28: /* ( */
197 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
198 break;
199
200 default:
201 bUndefined = sal_False;
202 goto bad_input;
203 }
204 break;
205
206 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
207 switch (nChar)
208 {
209 case 0x42: /* A */
210 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
211 break;
212
213 case 0x4A: /* J */
214 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
215 break;
216
217 default:
218 bUndefined = sal_False;
219 goto bad_input;
220 }
221 break;
222
223 case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
224 switch (nChar)
225 {
226 case 0x40: /* @ */
227 case 0x42: /* B */
228 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
229 break;
230
231 default:
232 bUndefined = sal_False;
233 goto bad_input;
234 }
235 break;
236 }
237 continue;
238
239 bad_input:
240 switch (ImplHandleBadInputTextToUnicodeConversion(
241 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
242 &nInfo))
243 {
244 case IMPL_BAD_INPUT_STOP:
245 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
246 break;
247
248 case IMPL_BAD_INPUT_CONTINUE:
249 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
250 continue;
251
252 case IMPL_BAD_INPUT_NO_OUTPUT:
253 goto no_output;
254 }
255 break;
256
257 no_output:
258 --pSrcBuf;
259 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
260 break;
261 }
262
263 if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
264 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
265 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
266 == 0)
267 {
268 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
269 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
270 else
271 switch (ImplHandleBadInputTextToUnicodeConversion(
272 sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
273 &nInfo))
274 {
275 case IMPL_BAD_INPUT_STOP:
276 case IMPL_BAD_INPUT_CONTINUE:
277 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
278 break;
279
280 case IMPL_BAD_INPUT_NO_OUTPUT:
281 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
282 break;
283 }
284 }
285
286 if (pContext)
287 {
288 ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState = eState;
289 ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow = nRow;
290 }
291 if (pInfo)
292 *pInfo = nInfo;
293 if (pSrcCvtBytes)
294 *pSrcCvtBytes = nConverted;
295
296 return pDestBufPtr - pDestBuf;
297 }
298
ImplCreateUnicodeToIso2022JpContext(void)299 void * ImplCreateUnicodeToIso2022JpContext(void)
300 {
301 void * pContext
302 = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022JpContext));
303 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
304 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
305 return pContext;
306 }
307
ImplResetUnicodeToIso2022JpContext(void * pContext)308 void ImplResetUnicodeToIso2022JpContext(void * pContext)
309 {
310 if (pContext)
311 {
312 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
313 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
314 }
315 }
316
ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData,void * pContext,sal_Unicode const * pSrcBuf,sal_Size nSrcChars,sal_Char * pDestBuf,sal_Size nDestBytes,sal_uInt32 nFlags,sal_uInt32 * pInfo,sal_Size * pSrcCvtChars)317 sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData,
318 void * pContext,
319 sal_Unicode const * pSrcBuf,
320 sal_Size nSrcChars,
321 sal_Char * pDestBuf,
322 sal_Size nDestBytes,
323 sal_uInt32 nFlags,
324 sal_uInt32 * pInfo,
325 sal_Size * pSrcCvtChars)
326 {
327 ImplUniToDBCSHighTab const * pJisX0208Data
328 = ((ImplIso2022JpConverterData const *) pData)->
329 m_pUnicodeToJisX0208Data;
330 sal_Unicode nHighSurrogate = 0;
331 sal_Bool b0208 = sal_False;
332 sal_uInt32 nInfo = 0;
333 sal_Size nConverted = 0;
334 sal_Char * pDestBufPtr = pDestBuf;
335 sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
336 sal_Bool bWritten;
337
338 if (pContext)
339 {
340 nHighSurrogate
341 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate;
342 b0208 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208;
343 }
344
345 for (; nConverted < nSrcChars; ++nConverted)
346 {
347 sal_Bool bUndefined = sal_True;
348 sal_uInt32 nChar = *pSrcBuf++;
349 if (nHighSurrogate == 0)
350 {
351 if (ImplIsHighSurrogate(nChar))
352 {
353 nHighSurrogate = (sal_Unicode) nChar;
354 continue;
355 }
356 }
357 else if (ImplIsLowSurrogate(nChar))
358 nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
359 else
360 {
361 bUndefined = sal_False;
362 goto bad_input;
363 }
364
365 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
366 {
367 bUndefined = sal_False;
368 goto bad_input;
369 }
370
371 if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */
372 {
373 if (b0208)
374 {
375 if (pDestBufEnd - pDestBufPtr >= 3)
376 {
377 *pDestBufPtr++ = 0x1B; /* ESC */
378 *pDestBufPtr++ = 0x28; /* ( */
379 *pDestBufPtr++ = 0x42; /* B */
380 b0208 = sal_False;
381 }
382 else
383 goto no_output;
384 }
385 if (pDestBufPtr != pDestBufEnd)
386 *pDestBufPtr++ = (sal_Char) nChar;
387 else
388 goto no_output;
389 }
390 else if (nChar == 0x1B)
391 goto bad_input;
392 else if (nChar < 0x80)
393 {
394 if (b0208)
395 {
396 if (pDestBufEnd - pDestBufPtr >= 3)
397 {
398 *pDestBufPtr++ = 0x1B; /* ESC */
399 *pDestBufPtr++ = 0x28; /* ( */
400 *pDestBufPtr++ = 0x42; /* B */
401 b0208 = sal_False;
402 }
403 else
404 goto no_output;
405 }
406 if (pDestBufPtr != pDestBufEnd)
407 *pDestBufPtr++ = (sal_Char) nChar;
408 else
409 goto no_output;
410 }
411 else
412 {
413 sal_uInt16 nBytes = 0;
414 sal_uInt32 nIndex1 = nChar >> 8;
415 if (nIndex1 < 0x100)
416 {
417 sal_uInt32 nIndex2 = nChar & 0xFF;
418 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
419 if (nIndex2 >= nFirst
420 && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
421 {
422 nBytes = pJisX0208Data[nIndex1].
423 mpToUniTrailTab[nIndex2 - nFirst];
424 if (nBytes == 0)
425 /* For some reason, the tables in tcvtjp4.tab do not
426 include these two conversions: */
427 switch (nChar)
428 {
429 case 0xA5: /* YEN SIGN */
430 nBytes = 0x216F;
431 break;
432
433 case 0xAF: /* MACRON */
434 nBytes = 0x2131;
435 break;
436 }
437 }
438 }
439 if (nBytes != 0)
440 {
441 if (!b0208)
442 {
443 if (pDestBufEnd - pDestBufPtr >= 3)
444 {
445 *pDestBufPtr++ = 0x1B; /* ESC */
446 *pDestBufPtr++ = 0x24; /* $ */
447 *pDestBufPtr++ = 0x42; /* B */
448 b0208 = sal_True;
449 }
450 else
451 goto no_output;
452 }
453 if (pDestBufEnd - pDestBufPtr >= 2)
454 {
455 *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
456 *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
457 }
458 else
459 goto no_output;
460 }
461 else
462 goto bad_input;
463 }
464 nHighSurrogate = 0;
465 continue;
466
467 bad_input:
468 switch (ImplHandleBadInputUnicodeToTextConversion(
469 bUndefined,
470 nChar,
471 nFlags,
472 &pDestBufPtr,
473 pDestBufEnd,
474 &nInfo,
475 "\x1B(B",
476 b0208 ? 3 : 0,
477 &bWritten))
478 {
479 case IMPL_BAD_INPUT_STOP:
480 nHighSurrogate = 0;
481 break;
482
483 case IMPL_BAD_INPUT_CONTINUE:
484 if (bWritten)
485 b0208 = sal_False;
486 nHighSurrogate = 0;
487 continue;
488
489 case IMPL_BAD_INPUT_NO_OUTPUT:
490 goto no_output;
491 }
492 break;
493
494 no_output:
495 --pSrcBuf;
496 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
497 break;
498 }
499
500 if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
501 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
502 == 0)
503 {
504 sal_Bool bFlush = sal_True;
505 if (nHighSurrogate != 0)
506 {
507 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
508 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
509 else
510 switch (ImplHandleBadInputUnicodeToTextConversion(
511 sal_False,
512 0,
513 nFlags,
514 &pDestBufPtr,
515 pDestBufEnd,
516 &nInfo,
517 "\x1B(B",
518 b0208 ? 3 : 0,
519 &bWritten))
520 {
521 case IMPL_BAD_INPUT_STOP:
522 nHighSurrogate = 0;
523 bFlush = sal_False;
524 break;
525
526 case IMPL_BAD_INPUT_CONTINUE:
527 if (bWritten)
528 b0208 = sal_False;
529 nHighSurrogate = 0;
530 break;
531
532 case IMPL_BAD_INPUT_NO_OUTPUT:
533 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
534 break;
535 }
536 }
537 if (bFlush
538 && b0208
539 && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
540 {
541 if (pDestBufEnd - pDestBufPtr >= 3)
542 {
543 *pDestBufPtr++ = 0x1B; /* ESC */
544 *pDestBufPtr++ = 0x28; /* ( */
545 *pDestBufPtr++ = 0x42; /* B */
546 b0208 = sal_False;
547 }
548 else
549 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
550 }
551 }
552
553 if (pContext)
554 {
555 ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate
556 = nHighSurrogate;
557 ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = b0208;
558 }
559 if (pInfo)
560 *pInfo = nInfo;
561 if (pSrcCvtChars)
562 *pSrcCvtChars = nConverted;
563
564 return pDestBufPtr - pDestBuf;
565 }
566