1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir #include "tenchelp.h" 29*cdf0e10cSrcweir #include "unichars.h" 30*cdf0e10cSrcweir #include "rtl/textcvt.h" 31*cdf0e10cSrcweir #include "sal/types.h" 32*cdf0e10cSrcweir 33*cdf0e10cSrcweir static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 34*cdf0e10cSrcweir sal_Char * pBuf, 35*cdf0e10cSrcweir sal_Size nMaxLen); 36*cdf0e10cSrcweir 37*cdf0e10cSrcweir static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 38*cdf0e10cSrcweir sal_Char * pBuf, 39*cdf0e10cSrcweir sal_Size nMaxLen); 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags); 42*cdf0e10cSrcweir 43*cdf0e10cSrcweir sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags, 44*cdf0e10cSrcweir sal_Char * pBuf, 45*cdf0e10cSrcweir sal_Size nMaxLen) 46*cdf0e10cSrcweir { 47*cdf0e10cSrcweir if (nMaxLen == 0) 48*cdf0e10cSrcweir return sal_False; 49*cdf0e10cSrcweir switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 50*cdf0e10cSrcweir { 51*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0: 52*cdf0e10cSrcweir *pBuf = 0x00; 53*cdf0e10cSrcweir break; 54*cdf0e10cSrcweir 55*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK: 56*cdf0e10cSrcweir default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */ 57*cdf0e10cSrcweir *pBuf = 0x3F; 58*cdf0e10cSrcweir break; 59*cdf0e10cSrcweir 60*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE: 61*cdf0e10cSrcweir *pBuf = 0x5F; 62*cdf0e10cSrcweir break; 63*cdf0e10cSrcweir } 64*cdf0e10cSrcweir return sal_True; 65*cdf0e10cSrcweir } 66*cdf0e10cSrcweir 67*cdf0e10cSrcweir sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags, 68*cdf0e10cSrcweir sal_Char * pBuf, 69*cdf0e10cSrcweir sal_Size nMaxLen) 70*cdf0e10cSrcweir { 71*cdf0e10cSrcweir if (nMaxLen == 0) 72*cdf0e10cSrcweir return sal_False; 73*cdf0e10cSrcweir switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) 74*cdf0e10cSrcweir { 75*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_INVALID_0: 76*cdf0e10cSrcweir *pBuf = 0x00; 77*cdf0e10cSrcweir break; 78*cdf0e10cSrcweir 79*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK: 80*cdf0e10cSrcweir default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */ 81*cdf0e10cSrcweir *pBuf = 0x3F; 82*cdf0e10cSrcweir break; 83*cdf0e10cSrcweir 84*cdf0e10cSrcweir case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE: 85*cdf0e10cSrcweir *pBuf = 0x5F; 86*cdf0e10cSrcweir break; 87*cdf0e10cSrcweir } 88*cdf0e10cSrcweir return sal_True; 89*cdf0e10cSrcweir } 90*cdf0e10cSrcweir 91*cdf0e10cSrcweir int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags ) 92*cdf0e10cSrcweir { 93*cdf0e10cSrcweir return 94*cdf0e10cSrcweir ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0 95*cdf0e10cSrcweir && ImplIsZeroWidth(c)) 96*cdf0e10cSrcweir || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0 97*cdf0e10cSrcweir && ImplIsControlOrFormat(c)) 98*cdf0e10cSrcweir || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0 99*cdf0e10cSrcweir && ImplIsPrivateUse(c)); 100*cdf0e10cSrcweir } 101*cdf0e10cSrcweir 102*cdf0e10cSrcweir /* ======================================================================= */ 103*cdf0e10cSrcweir 104*cdf0e10cSrcweir sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags) 105*cdf0e10cSrcweir { 106*cdf0e10cSrcweir return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) 107*cdf0e10cSrcweir == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ? 108*cdf0e10cSrcweir RTL_TEXTCVT_BYTE_PRIVATE_START + cChar : 109*cdf0e10cSrcweir RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER; 110*cdf0e10cSrcweir } 111*cdf0e10cSrcweir 112*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */ 113*cdf0e10cSrcweir 114*cdf0e10cSrcweir sal_Bool 115*cdf0e10cSrcweir ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData, 116*cdf0e10cSrcweir sal_Unicode const ** ppSrcBuf, 117*cdf0e10cSrcweir sal_Unicode const * pEndSrcBuf, 118*cdf0e10cSrcweir sal_Char ** ppDestBuf, 119*cdf0e10cSrcweir sal_Char const * pEndDestBuf, 120*cdf0e10cSrcweir sal_uInt32 nFlags, 121*cdf0e10cSrcweir sal_uInt32 * pInfo) 122*cdf0e10cSrcweir { 123*cdf0e10cSrcweir sal_Unicode c = **ppSrcBuf; 124*cdf0e10cSrcweir 125*cdf0e10cSrcweir (void) pData; /* unused */ 126*cdf0e10cSrcweir 127*cdf0e10cSrcweir /* Should the private character map to one byte */ 128*cdf0e10cSrcweir if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) ) 129*cdf0e10cSrcweir { 130*cdf0e10cSrcweir if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 ) 131*cdf0e10cSrcweir { 132*cdf0e10cSrcweir **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START); 133*cdf0e10cSrcweir (*ppDestBuf)++; 134*cdf0e10cSrcweir (*ppSrcBuf)++; 135*cdf0e10cSrcweir return sal_True; 136*cdf0e10cSrcweir } 137*cdf0e10cSrcweir } 138*cdf0e10cSrcweir 139*cdf0e10cSrcweir /* Should this character ignored (Private, Non Spacing, Control) */ 140*cdf0e10cSrcweir if ( ImplIsUnicodeIgnoreChar( c, nFlags ) ) 141*cdf0e10cSrcweir { 142*cdf0e10cSrcweir (*ppSrcBuf)++; 143*cdf0e10cSrcweir return sal_True; 144*cdf0e10cSrcweir } 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir /* Surrogates Characters should result in */ 147*cdf0e10cSrcweir /* one replacement character */ 148*cdf0e10cSrcweir if (ImplIsHighSurrogate(c)) 149*cdf0e10cSrcweir { 150*cdf0e10cSrcweir if ( *ppSrcBuf == pEndSrcBuf ) 151*cdf0e10cSrcweir { 152*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 153*cdf0e10cSrcweir return sal_False; 154*cdf0e10cSrcweir } 155*cdf0e10cSrcweir 156*cdf0e10cSrcweir c = *((*ppSrcBuf)+1); 157*cdf0e10cSrcweir if (ImplIsLowSurrogate(c)) 158*cdf0e10cSrcweir (*ppSrcBuf)++; 159*cdf0e10cSrcweir else 160*cdf0e10cSrcweir { 161*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID; 162*cdf0e10cSrcweir if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR ) 163*cdf0e10cSrcweir { 164*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 165*cdf0e10cSrcweir return sal_False; 166*cdf0e10cSrcweir } 167*cdf0e10cSrcweir else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE ) 168*cdf0e10cSrcweir { 169*cdf0e10cSrcweir (*ppSrcBuf)++; 170*cdf0e10cSrcweir return sal_True; 171*cdf0e10cSrcweir } 172*cdf0e10cSrcweir else if (ImplGetInvalidAsciiMultiByte(nFlags, 173*cdf0e10cSrcweir *ppDestBuf, 174*cdf0e10cSrcweir pEndDestBuf - *ppDestBuf)) 175*cdf0e10cSrcweir { 176*cdf0e10cSrcweir ++*ppSrcBuf; 177*cdf0e10cSrcweir ++*ppDestBuf; 178*cdf0e10cSrcweir return sal_True; 179*cdf0e10cSrcweir } 180*cdf0e10cSrcweir else 181*cdf0e10cSrcweir { 182*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 183*cdf0e10cSrcweir | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 184*cdf0e10cSrcweir return sal_False; 185*cdf0e10cSrcweir } 186*cdf0e10cSrcweir } 187*cdf0e10cSrcweir } 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED; 190*cdf0e10cSrcweir if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR ) 191*cdf0e10cSrcweir { 192*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR; 193*cdf0e10cSrcweir return sal_False; 194*cdf0e10cSrcweir } 195*cdf0e10cSrcweir else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE ) 196*cdf0e10cSrcweir (*ppSrcBuf)++; 197*cdf0e10cSrcweir else if (ImplGetUndefinedAsciiMultiByte(nFlags, 198*cdf0e10cSrcweir *ppDestBuf, 199*cdf0e10cSrcweir pEndDestBuf - *ppDestBuf)) 200*cdf0e10cSrcweir { 201*cdf0e10cSrcweir ++*ppSrcBuf; 202*cdf0e10cSrcweir ++*ppDestBuf; 203*cdf0e10cSrcweir } 204*cdf0e10cSrcweir else 205*cdf0e10cSrcweir { 206*cdf0e10cSrcweir *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR 207*cdf0e10cSrcweir | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 208*cdf0e10cSrcweir return sal_False; 209*cdf0e10cSrcweir } 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir return sal_True; 212*cdf0e10cSrcweir } 213*cdf0e10cSrcweir 214