1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "converteuctw.h" 29 #include "context.h" 30 #include "converter.h" 31 #include "tenchelp.h" 32 #include "unichars.h" 33 #include "rtl/alloc.h" 34 #include "rtl/textcvt.h" 35 #include "sal/types.h" 36 37 typedef enum 38 { 39 IMPL_EUC_TW_TO_UNICODE_STATE_0, 40 IMPL_EUC_TW_TO_UNICODE_STATE_1, 41 IMPL_EUC_TW_TO_UNICODE_STATE_2_1, 42 IMPL_EUC_TW_TO_UNICODE_STATE_2_2, 43 IMPL_EUC_TW_TO_UNICODE_STATE_2_3 44 } ImplEucTwToUnicodeState; 45 46 typedef struct 47 { 48 ImplEucTwToUnicodeState m_eState; 49 sal_Int32 m_nPlane; /* 0--15 */ 50 sal_Int32 m_nRow; /* 0--93 */ 51 } ImplEucTwToUnicodeContext; 52 53 void * ImplCreateEucTwToUnicodeContext(void) 54 { 55 void * pContext = rtl_allocateMemory(sizeof (ImplEucTwToUnicodeContext)); 56 ((ImplEucTwToUnicodeContext *) pContext)->m_eState 57 = IMPL_EUC_TW_TO_UNICODE_STATE_0; 58 return pContext; 59 } 60 61 void ImplResetEucTwToUnicodeContext(void * pContext) 62 { 63 if (pContext) 64 ((ImplEucTwToUnicodeContext *) pContext)->m_eState 65 = IMPL_EUC_TW_TO_UNICODE_STATE_0; 66 } 67 68 sal_Size ImplConvertEucTwToUnicode(ImplTextConverterData const * pData, 69 void * pContext, 70 sal_Char const * pSrcBuf, 71 sal_Size nSrcBytes, 72 sal_Unicode * pDestBuf, 73 sal_Size nDestChars, 74 sal_uInt32 nFlags, 75 sal_uInt32 * pInfo, 76 sal_Size * pSrcCvtBytes) 77 { 78 sal_uInt16 const * pCns116431992Data 79 = ((ImplEucTwConverterData const *) pData)-> 80 m_pCns116431992ToUnicodeData; 81 sal_Int32 const * pCns116431992RowOffsets 82 = ((ImplEucTwConverterData const *) pData)-> 83 m_pCns116431992ToUnicodeRowOffsets; 84 sal_Int32 const * pCns116431992PlaneOffsets 85 = ((ImplEucTwConverterData const *) pData)-> 86 m_pCns116431992ToUnicodePlaneOffsets; 87 ImplEucTwToUnicodeState eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 88 sal_Int32 nPlane = 0; 89 sal_Int32 nRow = 0; 90 sal_uInt32 nInfo = 0; 91 sal_Size nConverted = 0; 92 sal_Unicode * pDestBufPtr = pDestBuf; 93 sal_Unicode * pDestBufEnd = pDestBuf + nDestChars; 94 95 if (pContext) 96 { 97 eState = ((ImplEucTwToUnicodeContext *) pContext)->m_eState; 98 nPlane = ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane; 99 nRow = ((ImplEucTwToUnicodeContext *) pContext)->m_nRow; 100 } 101 102 for (; nConverted < nSrcBytes; ++nConverted) 103 { 104 sal_Bool bUndefined = sal_True; 105 sal_uInt32 nChar = *(sal_uChar const *) pSrcBuf++; 106 switch (eState) 107 { 108 case IMPL_EUC_TW_TO_UNICODE_STATE_0: 109 if (nChar < 0x80) 110 if (pDestBufPtr != pDestBufEnd) 111 *pDestBufPtr++ = (sal_Unicode) nChar; 112 else 113 goto no_output; 114 else if (nChar >= 0xA1 && nChar <= 0xFE) 115 { 116 nRow = nChar - 0xA1; 117 eState = IMPL_EUC_TW_TO_UNICODE_STATE_1; 118 } 119 else if (nChar == 0x8E) 120 eState = IMPL_EUC_TW_TO_UNICODE_STATE_2_1; 121 else 122 { 123 bUndefined = sal_False; 124 goto bad_input; 125 } 126 break; 127 128 case IMPL_EUC_TW_TO_UNICODE_STATE_1: 129 if (nChar >= 0xA1 && nChar <= 0xFE) 130 { 131 nPlane = 0; 132 goto transform; 133 } 134 else 135 { 136 bUndefined = sal_False; 137 goto bad_input; 138 } 139 break; 140 141 case IMPL_EUC_TW_TO_UNICODE_STATE_2_1: 142 if (nChar >= 0xA1 && nChar <= 0xB0) 143 { 144 nPlane = nChar - 0xA1; 145 ++eState; 146 } 147 else 148 { 149 bUndefined = sal_False; 150 goto bad_input; 151 } 152 break; 153 154 case IMPL_EUC_TW_TO_UNICODE_STATE_2_2: 155 if (nChar >= 0xA1 && nChar <= 0xFE) 156 { 157 nRow = nChar - 0xA1; 158 ++eState; 159 } 160 else 161 { 162 bUndefined = sal_False; 163 goto bad_input; 164 } 165 break; 166 167 case IMPL_EUC_TW_TO_UNICODE_STATE_2_3: 168 if (nChar >= 0xA1 && nChar <= 0xFE) 169 goto transform; 170 else 171 { 172 bUndefined = sal_False; 173 goto bad_input; 174 } 175 break; 176 } 177 continue; 178 179 transform: 180 { 181 sal_Int32 nPlaneOffset = pCns116431992PlaneOffsets[nPlane]; 182 if (nPlaneOffset == -1) 183 goto bad_input; 184 else 185 { 186 sal_Int32 nOffset 187 = pCns116431992RowOffsets[nPlaneOffset + nRow]; 188 if (nOffset == -1) 189 goto bad_input; 190 else 191 { 192 sal_uInt32 nFirstLast = pCns116431992Data[nOffset++]; 193 sal_uInt32 nFirst = nFirstLast & 0xFF; 194 sal_uInt32 nLast = nFirstLast >> 8; 195 nChar -= 0xA0; 196 if (nChar >= nFirst && nChar <= nLast) 197 { 198 sal_uInt32 nUnicode 199 = pCns116431992Data[nOffset + (nChar - nFirst)]; 200 if (nUnicode == 0xFFFF) 201 goto bad_input; 202 else if (ImplIsHighSurrogate(nUnicode)) 203 if (pDestBufEnd - pDestBufPtr >= 2) 204 { 205 nOffset += nLast - nFirst + 1; 206 nFirst = pCns116431992Data[nOffset++]; 207 *pDestBufPtr++ = (sal_Unicode) nUnicode; 208 *pDestBufPtr++ 209 = (sal_Unicode) 210 pCns116431992Data[ 211 nOffset + (nChar - nFirst)]; 212 } 213 else 214 goto no_output; 215 else 216 if (pDestBufPtr != pDestBufEnd) 217 *pDestBufPtr++ = (sal_Unicode) nUnicode; 218 else 219 goto no_output; 220 } 221 else 222 goto bad_input; 223 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 224 } 225 } 226 continue; 227 } 228 229 bad_input: 230 switch (ImplHandleBadInputTextToUnicodeConversion( 231 bUndefined, sal_True, 0, nFlags, &pDestBufPtr, pDestBufEnd, 232 &nInfo)) 233 { 234 case IMPL_BAD_INPUT_STOP: 235 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 236 break; 237 238 case IMPL_BAD_INPUT_CONTINUE: 239 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 240 continue; 241 242 case IMPL_BAD_INPUT_NO_OUTPUT: 243 goto no_output; 244 } 245 break; 246 247 no_output: 248 --pSrcBuf; 249 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 250 break; 251 } 252 253 if (eState != IMPL_EUC_TW_TO_UNICODE_STATE_0 254 && (nInfo & (RTL_TEXTTOUNICODE_INFO_ERROR 255 | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)) 256 == 0) 257 { 258 if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) 259 nInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL; 260 else 261 switch (ImplHandleBadInputTextToUnicodeConversion( 262 sal_False, sal_True, 0, nFlags, &pDestBufPtr, 263 pDestBufEnd, &nInfo)) 264 { 265 case IMPL_BAD_INPUT_STOP: 266 case IMPL_BAD_INPUT_CONTINUE: 267 eState = IMPL_EUC_TW_TO_UNICODE_STATE_0; 268 break; 269 270 case IMPL_BAD_INPUT_NO_OUTPUT: 271 nInfo |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 272 break; 273 } 274 } 275 276 if (pContext) 277 { 278 ((ImplEucTwToUnicodeContext *) pContext)->m_eState = eState; 279 ((ImplEucTwToUnicodeContext *) pContext)->m_nPlane = nPlane; 280 ((ImplEucTwToUnicodeContext *) pContext)->m_nRow = nRow; 281 } 282 if (pInfo) 283 *pInfo = nInfo; 284 if (pSrcCvtBytes) 285 *pSrcCvtBytes = nConverted; 286 287 return pDestBufPtr - pDestBuf; 288 } 289 290 sal_Size ImplConvertUnicodeToEucTw(ImplTextConverterData const * pData, 291 void * pContext, 292 sal_Unicode const * pSrcBuf, 293 sal_Size nSrcChars, 294 sal_Char * pDestBuf, 295 sal_Size nDestBytes, 296 sal_uInt32 nFlags, 297 sal_uInt32 * pInfo, 298 sal_Size * pSrcCvtChars) 299 { 300 sal_uInt8 const * pCns116431992Data 301 = ((ImplEucTwConverterData const *) pData)-> 302 m_pUnicodeToCns116431992Data; 303 sal_Int32 const * pCns116431992PageOffsets 304 = ((ImplEucTwConverterData const *) pData)-> 305 m_pUnicodeToCns116431992PageOffsets; 306 sal_Int32 const * pCns116431992PlaneOffsets 307 = ((ImplEucTwConverterData const *) pData)-> 308 m_pUnicodeToCns116431992PlaneOffsets; 309 sal_Unicode nHighSurrogate = 0; 310 sal_uInt32 nInfo = 0; 311 sal_Size nConverted = 0; 312 sal_Char * pDestBufPtr = pDestBuf; 313 sal_Char * pDestBufEnd = pDestBuf + nDestBytes; 314 315 if (pContext) 316 nHighSurrogate 317 = ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate; 318 319 for (; nConverted < nSrcChars; ++nConverted) 320 { 321 sal_Bool bUndefined = sal_True; 322 sal_uInt32 nChar = *pSrcBuf++; 323 if (nHighSurrogate == 0) 324 { 325 if (ImplIsHighSurrogate(nChar)) 326 { 327 nHighSurrogate = (sal_Unicode) nChar; 328 continue; 329 } 330 } 331 else if (ImplIsLowSurrogate(nChar)) 332 nChar = ImplCombineSurrogates(nHighSurrogate, nChar); 333 else 334 { 335 bUndefined = sal_False; 336 goto bad_input; 337 } 338 339 if (ImplIsLowSurrogate(nChar) || ImplIsNoncharacter(nChar)) 340 { 341 bUndefined = sal_False; 342 goto bad_input; 343 } 344 345 if (nChar < 0x80) 346 if (pDestBufPtr != pDestBufEnd) 347 *pDestBufPtr++ = (sal_Char) nChar; 348 else 349 goto no_output; 350 else 351 { 352 sal_Int32 nOffset = pCns116431992PlaneOffsets[nChar >> 16]; 353 sal_uInt32 nFirst; 354 sal_uInt32 nLast; 355 sal_uInt32 nPlane; 356 if (nOffset == -1) 357 goto bad_input; 358 nOffset 359 = pCns116431992PageOffsets[nOffset + ((nChar & 0xFF00) >> 8)]; 360 if (nOffset == -1) 361 goto bad_input; 362 nFirst = pCns116431992Data[nOffset++]; 363 nLast = pCns116431992Data[nOffset++]; 364 nChar &= 0xFF; 365 if (nChar < nFirst || nChar > nLast) 366 goto bad_input; 367 nOffset += 3 * (nChar - nFirst); 368 nPlane = pCns116431992Data[nOffset++]; 369 if (nPlane == 0) 370 goto bad_input; 371 if (pDestBufEnd - pDestBufPtr < (nPlane == 1 ? 2 : 4)) 372 goto no_output; 373 if (nPlane != 1) 374 { 375 *pDestBufPtr++ = (sal_Char) (unsigned char) 0x8E; 376 *pDestBufPtr++ = (sal_Char) (0xA0 + nPlane); 377 } 378 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset++]); 379 *pDestBufPtr++ = (sal_Char) (0xA0 + pCns116431992Data[nOffset]); 380 } 381 nHighSurrogate = 0; 382 continue; 383 384 bad_input: 385 switch (ImplHandleBadInputUnicodeToTextConversion(bUndefined, 386 nChar, 387 nFlags, 388 &pDestBufPtr, 389 pDestBufEnd, 390 &nInfo, 391 NULL, 392 0, 393 NULL)) 394 { 395 case IMPL_BAD_INPUT_STOP: 396 nHighSurrogate = 0; 397 break; 398 399 case IMPL_BAD_INPUT_CONTINUE: 400 nHighSurrogate = 0; 401 continue; 402 403 case IMPL_BAD_INPUT_NO_OUTPUT: 404 goto no_output; 405 } 406 break; 407 408 no_output: 409 --pSrcBuf; 410 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 411 break; 412 } 413 414 if (nHighSurrogate != 0 415 && (nInfo & (RTL_UNICODETOTEXT_INFO_ERROR 416 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 417 == 0) 418 { 419 if ((nFlags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) 420 nInfo |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 421 else 422 switch (ImplHandleBadInputUnicodeToTextConversion(sal_False, 423 0, 424 nFlags, 425 &pDestBufPtr, 426 pDestBufEnd, 427 &nInfo, 428 NULL, 429 0, 430 NULL)) 431 { 432 case IMPL_BAD_INPUT_STOP: 433 case IMPL_BAD_INPUT_CONTINUE: 434 nHighSurrogate = 0; 435 break; 436 437 case IMPL_BAD_INPUT_NO_OUTPUT: 438 nInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 439 break; 440 } 441 } 442 443 if (pContext) 444 ((ImplUnicodeToTextContext *) pContext)->m_nHighSurrogate 445 = nHighSurrogate; 446 if (pInfo) 447 *pInfo = nInfo; 448 if (pSrcCvtChars) 449 *pSrcCvtChars = nConverted; 450 451 return pDestBufPtr - pDestBuf; 452 } 453