xref: /aoo42x/main/sal/textenc/convertiso2022jp.c (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #include "convertiso2022jp.h"
29*cdf0e10cSrcweir #include "context.h"
30*cdf0e10cSrcweir #include "converter.h"
31*cdf0e10cSrcweir #include "tenchelp.h"
32*cdf0e10cSrcweir #include "unichars.h"
33*cdf0e10cSrcweir #include "rtl/alloc.h"
34*cdf0e10cSrcweir #include "rtl/textcvt.h"
35*cdf0e10cSrcweir #include "sal/types.h"
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir typedef enum /* order is important: */
38*cdf0e10cSrcweir {
39*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII,
40*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN,
41*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208,
42*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2,
43*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC,
44*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN,
45*cdf0e10cSrcweir     IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR
46*cdf0e10cSrcweir } ImplIso2022JpToUnicodeState;
47*cdf0e10cSrcweir 
48*cdf0e10cSrcweir typedef struct
49*cdf0e10cSrcweir {
50*cdf0e10cSrcweir     ImplIso2022JpToUnicodeState m_eState;
51*cdf0e10cSrcweir     sal_uInt32 m_nRow;
52*cdf0e10cSrcweir } ImplIso2022JpToUnicodeContext;
53*cdf0e10cSrcweir 
54*cdf0e10cSrcweir typedef struct
55*cdf0e10cSrcweir {
56*cdf0e10cSrcweir     sal_Unicode m_nHighSurrogate;
57*cdf0e10cSrcweir     sal_Bool m_b0208;
58*cdf0e10cSrcweir } ImplUnicodeToIso2022JpContext;
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir void * ImplCreateIso2022JpToUnicodeContext(void)
61*cdf0e10cSrcweir {
62*cdf0e10cSrcweir     void * pContext
63*cdf0e10cSrcweir         = rtl_allocateMemory(sizeof (ImplIso2022JpToUnicodeContext));
64*cdf0e10cSrcweir     ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
65*cdf0e10cSrcweir         = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
66*cdf0e10cSrcweir     return pContext;
67*cdf0e10cSrcweir }
68*cdf0e10cSrcweir 
69*cdf0e10cSrcweir void ImplResetIso2022JpToUnicodeContext(void * pContext)
70*cdf0e10cSrcweir {
71*cdf0e10cSrcweir     if (pContext)
72*cdf0e10cSrcweir         ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState
73*cdf0e10cSrcweir             = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
74*cdf0e10cSrcweir }
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir sal_Size ImplConvertIso2022JpToUnicode(ImplTextConverterData const * pData,
77*cdf0e10cSrcweir                                        void * pContext,
78*cdf0e10cSrcweir                                        sal_Char const * pSrcBuf,
79*cdf0e10cSrcweir                                        sal_Size nSrcBytes,
80*cdf0e10cSrcweir                                        sal_Unicode * pDestBuf,
81*cdf0e10cSrcweir                                        sal_Size nDestChars,
82*cdf0e10cSrcweir                                        sal_uInt32 nFlags,
83*cdf0e10cSrcweir                                        sal_uInt32 * pInfo,
84*cdf0e10cSrcweir                                        sal_Size * pSrcCvtBytes)
85*cdf0e10cSrcweir {
86*cdf0e10cSrcweir     ImplDBCSToUniLeadTab const * pJisX0208Data
87*cdf0e10cSrcweir         = ((ImplIso2022JpConverterData const *) pData)->
88*cdf0e10cSrcweir               m_pJisX0208ToUnicodeData;
89*cdf0e10cSrcweir     ImplIso2022JpToUnicodeState eState
90*cdf0e10cSrcweir         = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
91*cdf0e10cSrcweir     sal_uInt32 nRow = 0;
92*cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
93*cdf0e10cSrcweir     sal_Size nConverted = 0;
94*cdf0e10cSrcweir     sal_Unicode * pDestBufPtr = pDestBuf;
95*cdf0e10cSrcweir     sal_Unicode * pDestBufEnd = pDestBuf + nDestChars;
96*cdf0e10cSrcweir 
97*cdf0e10cSrcweir     if (pContext)
98*cdf0e10cSrcweir     {
99*cdf0e10cSrcweir         eState = ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState;
100*cdf0e10cSrcweir         nRow = ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow;
101*cdf0e10cSrcweir     }
102*cdf0e10cSrcweir 
103*cdf0e10cSrcweir     for (; nConverted < nSrcBytes; ++nConverted)
104*cdf0e10cSrcweir     {
105*cdf0e10cSrcweir         sal_Bool bUndefined = sal_True;
106*cdf0e10cSrcweir         sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++;
107*cdf0e10cSrcweir         switch (eState)
108*cdf0e10cSrcweir         {
109*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII:
110*cdf0e10cSrcweir             if (nChar == 0x1B) /* ESC */
111*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
112*cdf0e10cSrcweir             else if (nChar < 0x80)
113*cdf0e10cSrcweir                 if (pDestBufPtr != pDestBufEnd)
114*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) nChar;
115*cdf0e10cSrcweir                 else
116*cdf0e10cSrcweir                     goto no_output;
117*cdf0e10cSrcweir             else
118*cdf0e10cSrcweir             {
119*cdf0e10cSrcweir                 bUndefined = sal_False;
120*cdf0e10cSrcweir                 goto bad_input;
121*cdf0e10cSrcweir             }
122*cdf0e10cSrcweir             break;
123*cdf0e10cSrcweir 
124*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN:
125*cdf0e10cSrcweir             if (nChar == 0x1B) /* ESC */
126*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
127*cdf0e10cSrcweir             else if (nChar < 0x80)
128*cdf0e10cSrcweir                 if (pDestBufPtr != pDestBufEnd)
129*cdf0e10cSrcweir                 {
130*cdf0e10cSrcweir                     switch (nChar)
131*cdf0e10cSrcweir                     {
132*cdf0e10cSrcweir                     case 0x5C: /* \ */
133*cdf0e10cSrcweir                         nChar = 0xA5; /* YEN SIGN */
134*cdf0e10cSrcweir                         break;
135*cdf0e10cSrcweir 
136*cdf0e10cSrcweir                     case 0x7E: /* ~ */
137*cdf0e10cSrcweir                         nChar = 0xAF; /* MACRON */
138*cdf0e10cSrcweir                         break;
139*cdf0e10cSrcweir                     }
140*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Unicode) nChar;
141*cdf0e10cSrcweir                 }
142*cdf0e10cSrcweir                 else
143*cdf0e10cSrcweir                     goto no_output;
144*cdf0e10cSrcweir             else
145*cdf0e10cSrcweir             {
146*cdf0e10cSrcweir                 bUndefined = sal_False;
147*cdf0e10cSrcweir                 goto bad_input;
148*cdf0e10cSrcweir             }
149*cdf0e10cSrcweir             break;
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208:
152*cdf0e10cSrcweir             if (nChar == 0x1B) /* ESC */
153*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC;
154*cdf0e10cSrcweir             else if (nChar >= 0x21 && nChar <= 0x7E)
155*cdf0e10cSrcweir             {
156*cdf0e10cSrcweir                 nRow = nChar;
157*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2;
158*cdf0e10cSrcweir             }
159*cdf0e10cSrcweir             else
160*cdf0e10cSrcweir             {
161*cdf0e10cSrcweir                 bUndefined = sal_False;
162*cdf0e10cSrcweir                 goto bad_input;
163*cdf0e10cSrcweir             }
164*cdf0e10cSrcweir             break;
165*cdf0e10cSrcweir 
166*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208_2:
167*cdf0e10cSrcweir             if (nChar >= 0x21 && nChar <= 0x7E)
168*cdf0e10cSrcweir             {
169*cdf0e10cSrcweir                 sal_uInt16 nUnicode = 0;
170*cdf0e10cSrcweir                 sal_uInt32 nFirst = pJisX0208Data[nRow].mnTrailStart;
171*cdf0e10cSrcweir                 if (nChar >= nFirst
172*cdf0e10cSrcweir                     && nChar <= pJisX0208Data[nRow].mnTrailEnd)
173*cdf0e10cSrcweir                     nUnicode = pJisX0208Data[nRow].
174*cdf0e10cSrcweir                                    mpToUniTrailTab[nChar - nFirst];
175*cdf0e10cSrcweir                 if (nUnicode != 0)
176*cdf0e10cSrcweir                     if (pDestBufPtr != pDestBufEnd)
177*cdf0e10cSrcweir                     {
178*cdf0e10cSrcweir                         *pDestBufPtr++ = (sal_Unicode) nUnicode;
179*cdf0e10cSrcweir                         eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
180*cdf0e10cSrcweir                     }
181*cdf0e10cSrcweir                     else
182*cdf0e10cSrcweir                         goto no_output;
183*cdf0e10cSrcweir                 else
184*cdf0e10cSrcweir                     goto bad_input;
185*cdf0e10cSrcweir             }
186*cdf0e10cSrcweir             else
187*cdf0e10cSrcweir             {
188*cdf0e10cSrcweir                 bUndefined = sal_False;
189*cdf0e10cSrcweir                 goto bad_input;
190*cdf0e10cSrcweir             }
191*cdf0e10cSrcweir             break;
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC:
194*cdf0e10cSrcweir             switch (nChar)
195*cdf0e10cSrcweir             {
196*cdf0e10cSrcweir             case 0x24: /* $ */
197*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR;
198*cdf0e10cSrcweir                 break;
199*cdf0e10cSrcweir 
200*cdf0e10cSrcweir             case 0x28: /* ( */
201*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN;
202*cdf0e10cSrcweir                 break;
203*cdf0e10cSrcweir 
204*cdf0e10cSrcweir             default:
205*cdf0e10cSrcweir                 bUndefined = sal_False;
206*cdf0e10cSrcweir                 goto bad_input;
207*cdf0e10cSrcweir             }
208*cdf0e10cSrcweir             break;
209*cdf0e10cSrcweir 
210*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_LPAREN:
211*cdf0e10cSrcweir             switch (nChar)
212*cdf0e10cSrcweir             {
213*cdf0e10cSrcweir             case 0x42: /* A */
214*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
215*cdf0e10cSrcweir                 break;
216*cdf0e10cSrcweir 
217*cdf0e10cSrcweir             case 0x4A: /* J */
218*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_JIS_ROMAN;
219*cdf0e10cSrcweir                 break;
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir             default:
222*cdf0e10cSrcweir                 bUndefined = sal_False;
223*cdf0e10cSrcweir                 goto bad_input;
224*cdf0e10cSrcweir             }
225*cdf0e10cSrcweir             break;
226*cdf0e10cSrcweir 
227*cdf0e10cSrcweir         case IMPL_ISO_2022_JP_TO_UNICODE_STATE_ESC_DOLLAR:
228*cdf0e10cSrcweir             switch (nChar)
229*cdf0e10cSrcweir             {
230*cdf0e10cSrcweir             case 0x40: /* @ */
231*cdf0e10cSrcweir             case 0x42: /* B */
232*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208;
233*cdf0e10cSrcweir                 break;
234*cdf0e10cSrcweir 
235*cdf0e10cSrcweir             default:
236*cdf0e10cSrcweir                 bUndefined = sal_False;
237*cdf0e10cSrcweir                 goto bad_input;
238*cdf0e10cSrcweir             }
239*cdf0e10cSrcweir             break;
240*cdf0e10cSrcweir         }
241*cdf0e10cSrcweir         continue;
242*cdf0e10cSrcweir 
243*cdf0e10cSrcweir     bad_input:
244*cdf0e10cSrcweir         switch (ImplHandleBadInputTextToUnicodeConversion(
245*cdf0e10cSrcweir                     bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
246*cdf0e10cSrcweir                     &nInfo))
247*cdf0e10cSrcweir         {
248*cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
249*cdf0e10cSrcweir             eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
250*cdf0e10cSrcweir             break;
251*cdf0e10cSrcweir 
252*cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
253*cdf0e10cSrcweir             eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
254*cdf0e10cSrcweir             continue;
255*cdf0e10cSrcweir 
256*cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
257*cdf0e10cSrcweir             goto no_output;
258*cdf0e10cSrcweir         }
259*cdf0e10cSrcweir         break;
260*cdf0e10cSrcweir 
261*cdf0e10cSrcweir     no_output:
262*cdf0e10cSrcweir         --pSrcBuf;
263*cdf0e10cSrcweir         nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
264*cdf0e10cSrcweir         break;
265*cdf0e10cSrcweir     }
266*cdf0e10cSrcweir 
267*cdf0e10cSrcweir     if (eState > IMPL_ISO_2022_JP_TO_UNICODE_STATE_0208
268*cdf0e10cSrcweir         && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR
269*cdf0e10cSrcweir                          | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL))
270*cdf0e10cSrcweir                == 0)
271*cdf0e10cSrcweir     {
272*cdf0e10cSrcweir         if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0)
273*cdf0e10cSrcweir             nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
274*cdf0e10cSrcweir         else
275*cdf0e10cSrcweir             switch (ImplHandleBadInputTextToUnicodeConversion(
276*cdf0e10cSrcweir                         sal_False, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd,
277*cdf0e10cSrcweir                         &nInfo))
278*cdf0e10cSrcweir             {
279*cdf0e10cSrcweir             case IMPL_BAD_INPUT_STOP:
280*cdf0e10cSrcweir             case IMPL_BAD_INPUT_CONTINUE:
281*cdf0e10cSrcweir                 eState = IMPL_ISO_2022_JP_TO_UNICODE_STATE_ASCII;
282*cdf0e10cSrcweir                 break;
283*cdf0e10cSrcweir 
284*cdf0e10cSrcweir             case IMPL_BAD_INPUT_NO_OUTPUT:
285*cdf0e10cSrcweir                 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
286*cdf0e10cSrcweir                 break;
287*cdf0e10cSrcweir             }
288*cdf0e10cSrcweir     }
289*cdf0e10cSrcweir 
290*cdf0e10cSrcweir     if (pContext)
291*cdf0e10cSrcweir     {
292*cdf0e10cSrcweir         ((ImplIso2022JpToUnicodeContext *) pContext)->m_eState = eState;
293*cdf0e10cSrcweir         ((ImplIso2022JpToUnicodeContext *) pContext)->m_nRow = nRow;
294*cdf0e10cSrcweir     }
295*cdf0e10cSrcweir     if (pInfo)
296*cdf0e10cSrcweir         *pInfo = nInfo;
297*cdf0e10cSrcweir     if (pSrcCvtBytes)
298*cdf0e10cSrcweir         *pSrcCvtBytes = nConverted;
299*cdf0e10cSrcweir 
300*cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
301*cdf0e10cSrcweir }
302*cdf0e10cSrcweir 
303*cdf0e10cSrcweir void * ImplCreateUnicodeToIso2022JpContext(void)
304*cdf0e10cSrcweir {
305*cdf0e10cSrcweir     void * pContext
306*cdf0e10cSrcweir         = rtl_allocateMemory(sizeof (ImplUnicodeToIso2022JpContext));
307*cdf0e10cSrcweir     ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
308*cdf0e10cSrcweir     ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
309*cdf0e10cSrcweir     return pContext;
310*cdf0e10cSrcweir }
311*cdf0e10cSrcweir 
312*cdf0e10cSrcweir void ImplResetUnicodeToIso2022JpContext(void * pContext)
313*cdf0e10cSrcweir {
314*cdf0e10cSrcweir     if (pContext)
315*cdf0e10cSrcweir     {
316*cdf0e10cSrcweir         ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate = 0;
317*cdf0e10cSrcweir         ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = sal_False;
318*cdf0e10cSrcweir     }
319*cdf0e10cSrcweir }
320*cdf0e10cSrcweir 
321*cdf0e10cSrcweir sal_Size ImplConvertUnicodeToIso2022Jp(ImplTextConverterData const * pData,
322*cdf0e10cSrcweir                                        void * pContext,
323*cdf0e10cSrcweir                                        sal_Unicode const * pSrcBuf,
324*cdf0e10cSrcweir                                        sal_Size nSrcChars,
325*cdf0e10cSrcweir                                        sal_Char * pDestBuf,
326*cdf0e10cSrcweir                                        sal_Size nDestBytes,
327*cdf0e10cSrcweir                                        sal_uInt32 nFlags,
328*cdf0e10cSrcweir                                        sal_uInt32 * pInfo,
329*cdf0e10cSrcweir                                        sal_Size * pSrcCvtChars)
330*cdf0e10cSrcweir {
331*cdf0e10cSrcweir     ImplUniToDBCSHighTab const * pJisX0208Data
332*cdf0e10cSrcweir         = ((ImplIso2022JpConverterData const *) pData)->
333*cdf0e10cSrcweir               m_pUnicodeToJisX0208Data;
334*cdf0e10cSrcweir     sal_Unicode nHighSurrogate = 0;
335*cdf0e10cSrcweir     sal_Bool b0208 = sal_False;
336*cdf0e10cSrcweir     sal_uInt32 nInfo = 0;
337*cdf0e10cSrcweir     sal_Size nConverted = 0;
338*cdf0e10cSrcweir     sal_Char * pDestBufPtr = pDestBuf;
339*cdf0e10cSrcweir     sal_Char * pDestBufEnd = pDestBuf + nDestBytes;
340*cdf0e10cSrcweir     sal_Bool bWritten;
341*cdf0e10cSrcweir 
342*cdf0e10cSrcweir     if (pContext)
343*cdf0e10cSrcweir     {
344*cdf0e10cSrcweir         nHighSurrogate
345*cdf0e10cSrcweir             = ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate;
346*cdf0e10cSrcweir         b0208 = ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208;
347*cdf0e10cSrcweir     }
348*cdf0e10cSrcweir 
349*cdf0e10cSrcweir     for (; nConverted < nSrcChars; ++nConverted)
350*cdf0e10cSrcweir     {
351*cdf0e10cSrcweir         sal_Bool bUndefined = sal_True;
352*cdf0e10cSrcweir         sal_uInt32 nChar = *pSrcBuf++;
353*cdf0e10cSrcweir         if (nHighSurrogate == 0)
354*cdf0e10cSrcweir         {
355*cdf0e10cSrcweir             if (ImplIsHighSurrogate(nChar))
356*cdf0e10cSrcweir             {
357*cdf0e10cSrcweir                 nHighSurrogate = (sal_Unicode) nChar;
358*cdf0e10cSrcweir                 continue;
359*cdf0e10cSrcweir             }
360*cdf0e10cSrcweir         }
361*cdf0e10cSrcweir         else if (ImplIsLowSurrogate(nChar))
362*cdf0e10cSrcweir             nChar = ImplCombineSurrogates(nHighSurrogate, nChar);
363*cdf0e10cSrcweir         else
364*cdf0e10cSrcweir         {
365*cdf0e10cSrcweir             bUndefined = sal_False;
366*cdf0e10cSrcweir             goto bad_input;
367*cdf0e10cSrcweir         }
368*cdf0e10cSrcweir 
369*cdf0e10cSrcweir         if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar))
370*cdf0e10cSrcweir         {
371*cdf0e10cSrcweir             bUndefined = sal_False;
372*cdf0e10cSrcweir             goto bad_input;
373*cdf0e10cSrcweir         }
374*cdf0e10cSrcweir 
375*cdf0e10cSrcweir         if (nChar == 0x0A || nChar == 0x0D) /* LF, CR */
376*cdf0e10cSrcweir         {
377*cdf0e10cSrcweir             if (b0208)
378*cdf0e10cSrcweir             {
379*cdf0e10cSrcweir                 if (pDestBufEnd - pDestBufPtr >= 3)
380*cdf0e10cSrcweir                 {
381*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x1B; /* ESC */
382*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x28; /* ( */
383*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x42; /* B */
384*cdf0e10cSrcweir                     b0208 = sal_False;
385*cdf0e10cSrcweir                 }
386*cdf0e10cSrcweir                 else
387*cdf0e10cSrcweir                     goto no_output;
388*cdf0e10cSrcweir             }
389*cdf0e10cSrcweir             if (pDestBufPtr != pDestBufEnd)
390*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) nChar;
391*cdf0e10cSrcweir             else
392*cdf0e10cSrcweir                 goto no_output;
393*cdf0e10cSrcweir         }
394*cdf0e10cSrcweir         else if (nChar == 0x1B)
395*cdf0e10cSrcweir             goto bad_input;
396*cdf0e10cSrcweir         else if (nChar < 0x80)
397*cdf0e10cSrcweir         {
398*cdf0e10cSrcweir             if (b0208)
399*cdf0e10cSrcweir             {
400*cdf0e10cSrcweir                 if (pDestBufEnd - pDestBufPtr >= 3)
401*cdf0e10cSrcweir                 {
402*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x1B; /* ESC */
403*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x28; /* ( */
404*cdf0e10cSrcweir                     *pDestBufPtr++ = 0x42; /* B */
405*cdf0e10cSrcweir                     b0208 = sal_False;
406*cdf0e10cSrcweir                 }
407*cdf0e10cSrcweir                 else
408*cdf0e10cSrcweir                     goto no_output;
409*cdf0e10cSrcweir             }
410*cdf0e10cSrcweir             if (pDestBufPtr != pDestBufEnd)
411*cdf0e10cSrcweir                 *pDestBufPtr++ = (sal_Char) nChar;
412*cdf0e10cSrcweir             else
413*cdf0e10cSrcweir                 goto no_output;
414*cdf0e10cSrcweir         }
415*cdf0e10cSrcweir         else
416*cdf0e10cSrcweir         {
417*cdf0e10cSrcweir             sal_uInt16 nBytes = 0;
418*cdf0e10cSrcweir             sal_uInt32 nIndex1 = nChar >> 8;
419*cdf0e10cSrcweir             if (nIndex1 < 0x100)
420*cdf0e10cSrcweir             {
421*cdf0e10cSrcweir                 sal_uInt32 nIndex2 = nChar & 0xFF;
422*cdf0e10cSrcweir                 sal_uInt32 nFirst = pJisX0208Data[nIndex1].mnLowStart;
423*cdf0e10cSrcweir                 if (nIndex2 >= nFirst
424*cdf0e10cSrcweir                     && nIndex2 <= pJisX0208Data[nIndex1].mnLowEnd)
425*cdf0e10cSrcweir                 {
426*cdf0e10cSrcweir                     nBytes = pJisX0208Data[nIndex1].
427*cdf0e10cSrcweir                                  mpToUniTrailTab[nIndex2 - nFirst];
428*cdf0e10cSrcweir                     if (nBytes == 0)
429*cdf0e10cSrcweir                         /* For some reason, the tables in tcvtjp4.tab do not
430*cdf0e10cSrcweir                            include these two conversions: */
431*cdf0e10cSrcweir                         switch (nChar)
432*cdf0e10cSrcweir                         {
433*cdf0e10cSrcweir                         case 0xA5: /* YEN SIGN */
434*cdf0e10cSrcweir                             nBytes = 0x216F;
435*cdf0e10cSrcweir                             break;
436*cdf0e10cSrcweir 
437*cdf0e10cSrcweir                         case 0xAF: /* MACRON */
438*cdf0e10cSrcweir                             nBytes = 0x2131;
439*cdf0e10cSrcweir                             break;
440*cdf0e10cSrcweir                         }
441*cdf0e10cSrcweir                 }
442*cdf0e10cSrcweir             }
443*cdf0e10cSrcweir             if (nBytes != 0)
444*cdf0e10cSrcweir             {
445*cdf0e10cSrcweir                 if (!b0208)
446*cdf0e10cSrcweir                 {
447*cdf0e10cSrcweir                     if (pDestBufEnd - pDestBufPtr >= 3)
448*cdf0e10cSrcweir                     {
449*cdf0e10cSrcweir                         *pDestBufPtr++ = 0x1B; /* ESC */
450*cdf0e10cSrcweir                         *pDestBufPtr++ = 0x24; /* $ */
451*cdf0e10cSrcweir                         *pDestBufPtr++ = 0x42; /* B */
452*cdf0e10cSrcweir                         b0208 = sal_True;
453*cdf0e10cSrcweir                     }
454*cdf0e10cSrcweir                     else
455*cdf0e10cSrcweir                         goto no_output;
456*cdf0e10cSrcweir                 }
457*cdf0e10cSrcweir                 if (pDestBufEnd - pDestBufPtr >= 2)
458*cdf0e10cSrcweir                 {
459*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Char) (nBytes >> 8);
460*cdf0e10cSrcweir                     *pDestBufPtr++ = (sal_Char) (nBytes & 0xFF);
461*cdf0e10cSrcweir                 }
462*cdf0e10cSrcweir                 else
463*cdf0e10cSrcweir                     goto no_output;
464*cdf0e10cSrcweir             }
465*cdf0e10cSrcweir             else
466*cdf0e10cSrcweir                 goto bad_input;
467*cdf0e10cSrcweir         }
468*cdf0e10cSrcweir         nHighSurrogate = 0;
469*cdf0e10cSrcweir         continue;
470*cdf0e10cSrcweir 
471*cdf0e10cSrcweir     bad_input:
472*cdf0e10cSrcweir         switch (ImplHandleBadInputUnicodeToTextConversion(
473*cdf0e10cSrcweir                     bUndefined,
474*cdf0e10cSrcweir                     nChar,
475*cdf0e10cSrcweir                     nFlags,
476*cdf0e10cSrcweir                     &pDestBufPtr,
477*cdf0e10cSrcweir                     pDestBufEnd,
478*cdf0e10cSrcweir                     &nInfo,
479*cdf0e10cSrcweir                     "\x1B(B",
480*cdf0e10cSrcweir                     b0208 ? 3 : 0,
481*cdf0e10cSrcweir                     &bWritten))
482*cdf0e10cSrcweir         {
483*cdf0e10cSrcweir         case IMPL_BAD_INPUT_STOP:
484*cdf0e10cSrcweir             nHighSurrogate = 0;
485*cdf0e10cSrcweir             break;
486*cdf0e10cSrcweir 
487*cdf0e10cSrcweir         case IMPL_BAD_INPUT_CONTINUE:
488*cdf0e10cSrcweir             if (bWritten)
489*cdf0e10cSrcweir                 b0208 = sal_False;
490*cdf0e10cSrcweir             nHighSurrogate = 0;
491*cdf0e10cSrcweir             continue;
492*cdf0e10cSrcweir 
493*cdf0e10cSrcweir         case IMPL_BAD_INPUT_NO_OUTPUT:
494*cdf0e10cSrcweir             goto no_output;
495*cdf0e10cSrcweir         }
496*cdf0e10cSrcweir         break;
497*cdf0e10cSrcweir 
498*cdf0e10cSrcweir     no_output:
499*cdf0e10cSrcweir         --pSrcBuf;
500*cdf0e10cSrcweir         nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
501*cdf0e10cSrcweir         break;
502*cdf0e10cSrcweir     }
503*cdf0e10cSrcweir 
504*cdf0e10cSrcweir     if ((nInfo & (RTL_UNICODETOTEXT_INFO_ERROR
505*cdf0e10cSrcweir                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
506*cdf0e10cSrcweir             == 0)
507*cdf0e10cSrcweir     {
508*cdf0e10cSrcweir         sal_Bool bFlush = sal_True;
509*cdf0e10cSrcweir         if (nHighSurrogate != 0)
510*cdf0e10cSrcweir         {
511*cdf0e10cSrcweir             if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
512*cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
513*cdf0e10cSrcweir             else
514*cdf0e10cSrcweir                 switch (ImplHandleBadInputUnicodeToTextConversion(
515*cdf0e10cSrcweir                             sal_False,
516*cdf0e10cSrcweir                             0,
517*cdf0e10cSrcweir                             nFlags,
518*cdf0e10cSrcweir                             &pDestBufPtr,
519*cdf0e10cSrcweir                             pDestBufEnd,
520*cdf0e10cSrcweir                             &nInfo,
521*cdf0e10cSrcweir                             "\x1B(B",
522*cdf0e10cSrcweir                             b0208 ? 3 : 0,
523*cdf0e10cSrcweir                             &bWritten))
524*cdf0e10cSrcweir                 {
525*cdf0e10cSrcweir                 case IMPL_BAD_INPUT_STOP:
526*cdf0e10cSrcweir                     nHighSurrogate = 0;
527*cdf0e10cSrcweir                     bFlush = sal_False;
528*cdf0e10cSrcweir                     break;
529*cdf0e10cSrcweir 
530*cdf0e10cSrcweir                 case IMPL_BAD_INPUT_CONTINUE:
531*cdf0e10cSrcweir                     if (bWritten)
532*cdf0e10cSrcweir                         b0208 = sal_False;
533*cdf0e10cSrcweir                     nHighSurrogate = 0;
534*cdf0e10cSrcweir                     break;
535*cdf0e10cSrcweir 
536*cdf0e10cSrcweir                 case IMPL_BAD_INPUT_NO_OUTPUT:
537*cdf0e10cSrcweir                     nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
538*cdf0e10cSrcweir                     break;
539*cdf0e10cSrcweir                 }
540*cdf0e10cSrcweir         }
541*cdf0e10cSrcweir         if (bFlush
542*cdf0e10cSrcweir             && b0208
543*cdf0e10cSrcweir             && (nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0)
544*cdf0e10cSrcweir         {
545*cdf0e10cSrcweir             if (pDestBufEnd - pDestBufPtr >= 3)
546*cdf0e10cSrcweir             {
547*cdf0e10cSrcweir                 *pDestBufPtr++ = 0x1B; /* ESC */
548*cdf0e10cSrcweir                 *pDestBufPtr++ = 0x28; /* ( */
549*cdf0e10cSrcweir                 *pDestBufPtr++ = 0x42; /* B */
550*cdf0e10cSrcweir                 b0208 = sal_False;
551*cdf0e10cSrcweir             }
552*cdf0e10cSrcweir             else
553*cdf0e10cSrcweir                 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
554*cdf0e10cSrcweir         }
555*cdf0e10cSrcweir     }
556*cdf0e10cSrcweir 
557*cdf0e10cSrcweir     if (pContext)
558*cdf0e10cSrcweir     {
559*cdf0e10cSrcweir         ((ImplUnicodeToIso2022JpContext *) pContext)->m_nHighSurrogate
560*cdf0e10cSrcweir             = nHighSurrogate;
561*cdf0e10cSrcweir         ((ImplUnicodeToIso2022JpContext *) pContext)->m_b0208 = b0208;
562*cdf0e10cSrcweir     }
563*cdf0e10cSrcweir     if (pInfo)
564*cdf0e10cSrcweir         *pInfo = nInfo;
565*cdf0e10cSrcweir     if (pSrcCvtChars)
566*cdf0e10cSrcweir         *pSrcCvtChars = nConverted;
567*cdf0e10cSrcweir 
568*cdf0e10cSrcweir     return pDestBufPtr - pDestBuf;
569*cdf0e10cSrcweir }
570