1*cdf0e10cSrcweir /************************************************************************* 2*cdf0e10cSrcweir * 3*cdf0e10cSrcweir * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4*cdf0e10cSrcweir * 5*cdf0e10cSrcweir * Copyright 2000, 2010 Oracle and/or its affiliates. 6*cdf0e10cSrcweir * 7*cdf0e10cSrcweir * OpenOffice.org - a multi-platform office productivity suite 8*cdf0e10cSrcweir * 9*cdf0e10cSrcweir * This file is part of OpenOffice.org. 10*cdf0e10cSrcweir * 11*cdf0e10cSrcweir * OpenOffice.org is free software: you can redistribute it and/or modify 12*cdf0e10cSrcweir * it under the terms of the GNU Lesser General Public License version 3 13*cdf0e10cSrcweir * only, as published by the Free Software Foundation. 14*cdf0e10cSrcweir * 15*cdf0e10cSrcweir * OpenOffice.org is distributed in the hope that it will be useful, 16*cdf0e10cSrcweir * but WITHOUT ANY WARRANTY; without even the implied warranty of 17*cdf0e10cSrcweir * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18*cdf0e10cSrcweir * GNU Lesser General Public License version 3 for more details 19*cdf0e10cSrcweir * (a copy is included in the LICENSE file that accompanied this code). 20*cdf0e10cSrcweir * 21*cdf0e10cSrcweir * You should have received a copy of the GNU Lesser General Public License 22*cdf0e10cSrcweir * version 3 along with OpenOffice.org. If not, see 23*cdf0e10cSrcweir * <http://www.openoffice.org/license.html> 24*cdf0e10cSrcweir * for a copy of the LGPLv3 License. 25*cdf0e10cSrcweir * 26*cdf0e10cSrcweir ************************************************************************/ 27*cdf0e10cSrcweir 28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove 29*cdf0e10cSrcweir #include "precompiled_sal.hxx" 30*cdf0e10cSrcweir 31*cdf0e10cSrcweir #include "context.h" 32*cdf0e10cSrcweir #include "converter.h" 33*cdf0e10cSrcweir #include "convertsinglebytetobmpunicode.hxx" 34*cdf0e10cSrcweir #include "unichars.h" 35*cdf0e10cSrcweir 36*cdf0e10cSrcweir #include "osl/diagnose.h" 37*cdf0e10cSrcweir #include "rtl/textcvt.h" 38*cdf0e10cSrcweir #include "sal/types.h" 39*cdf0e10cSrcweir 40*cdf0e10cSrcweir #include <cstddef> 41*cdf0e10cSrcweir 42*cdf0e10cSrcweir sal_Size rtl_textenc_convertSingleByteToBmpUnicode( 43*cdf0e10cSrcweir ImplTextConverterData const * data, void *, sal_Char const * srcBuf, 44*cdf0e10cSrcweir sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars, 45*cdf0e10cSrcweir sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes) 46*cdf0e10cSrcweir { 47*cdf0e10cSrcweir sal_Unicode const * map = static_cast< 48*cdf0e10cSrcweir rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 49*cdf0e10cSrcweir data)->byteToUnicode; 50*cdf0e10cSrcweir sal_uInt32 infoFlags = 0; 51*cdf0e10cSrcweir sal_Size converted = 0; 52*cdf0e10cSrcweir sal_Unicode * destBufPtr = destBuf; 53*cdf0e10cSrcweir sal_Unicode * destBufEnd = destBuf + destChars; 54*cdf0e10cSrcweir for (; converted < srcBytes; ++converted) { 55*cdf0e10cSrcweir bool undefined = true; 56*cdf0e10cSrcweir sal_Char b = *srcBuf++; 57*cdf0e10cSrcweir sal_Unicode c = map[static_cast< sal_uInt8 >(b)]; 58*cdf0e10cSrcweir if (c == 0xFFFF) { 59*cdf0e10cSrcweir goto bad_input; 60*cdf0e10cSrcweir } 61*cdf0e10cSrcweir if (destBufEnd - destBufPtr < 1) { 62*cdf0e10cSrcweir goto no_output; 63*cdf0e10cSrcweir } 64*cdf0e10cSrcweir *destBufPtr++ = c; 65*cdf0e10cSrcweir continue; 66*cdf0e10cSrcweir bad_input: 67*cdf0e10cSrcweir switch (ImplHandleBadInputTextToUnicodeConversion( 68*cdf0e10cSrcweir undefined, false, b, flags, &destBufPtr, destBufEnd, 69*cdf0e10cSrcweir &infoFlags)) 70*cdf0e10cSrcweir { 71*cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP: 72*cdf0e10cSrcweir break; 73*cdf0e10cSrcweir 74*cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE: 75*cdf0e10cSrcweir continue; 76*cdf0e10cSrcweir 77*cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT: 78*cdf0e10cSrcweir goto no_output; 79*cdf0e10cSrcweir } 80*cdf0e10cSrcweir break; 81*cdf0e10cSrcweir no_output: 82*cdf0e10cSrcweir --srcBuf; 83*cdf0e10cSrcweir infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL; 84*cdf0e10cSrcweir break; 85*cdf0e10cSrcweir } 86*cdf0e10cSrcweir if (info != 0) { 87*cdf0e10cSrcweir *info = infoFlags; 88*cdf0e10cSrcweir } 89*cdf0e10cSrcweir if (srcCvtBytes != 0) { 90*cdf0e10cSrcweir *srcCvtBytes = converted; 91*cdf0e10cSrcweir } 92*cdf0e10cSrcweir return destBufPtr - destBuf; 93*cdf0e10cSrcweir } 94*cdf0e10cSrcweir 95*cdf0e10cSrcweir sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( 96*cdf0e10cSrcweir ImplTextConverterData const * data, void * context, 97*cdf0e10cSrcweir sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf, 98*cdf0e10cSrcweir sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info, 99*cdf0e10cSrcweir sal_Size * srcCvtChars) 100*cdf0e10cSrcweir { 101*cdf0e10cSrcweir std::size_t entries = static_cast< 102*cdf0e10cSrcweir rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 103*cdf0e10cSrcweir data)->unicodeToByteEntries; 104*cdf0e10cSrcweir rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast< 105*cdf0e10cSrcweir rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( 106*cdf0e10cSrcweir data)->unicodeToByte; 107*cdf0e10cSrcweir sal_Unicode highSurrogate = 0; 108*cdf0e10cSrcweir sal_uInt32 infoFlags = 0; 109*cdf0e10cSrcweir sal_Size converted = 0; 110*cdf0e10cSrcweir sal_Char * destBufPtr = destBuf; 111*cdf0e10cSrcweir sal_Char * destBufEnd = destBuf + destBytes; 112*cdf0e10cSrcweir if (context != 0) { 113*cdf0e10cSrcweir highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)-> 114*cdf0e10cSrcweir m_nHighSurrogate; 115*cdf0e10cSrcweir } 116*cdf0e10cSrcweir for (; converted < srcChars; ++converted) { 117*cdf0e10cSrcweir bool undefined = true; 118*cdf0e10cSrcweir sal_uInt32 c = *srcBuf++; 119*cdf0e10cSrcweir if (highSurrogate == 0) { 120*cdf0e10cSrcweir if (ImplIsHighSurrogate(c)) { 121*cdf0e10cSrcweir highSurrogate = static_cast< sal_Unicode >(c); 122*cdf0e10cSrcweir continue; 123*cdf0e10cSrcweir } 124*cdf0e10cSrcweir } else if (ImplIsLowSurrogate(c)) { 125*cdf0e10cSrcweir c = ImplCombineSurrogates(highSurrogate, c); 126*cdf0e10cSrcweir } else { 127*cdf0e10cSrcweir undefined = false; 128*cdf0e10cSrcweir goto bad_input; 129*cdf0e10cSrcweir } 130*cdf0e10cSrcweir if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) { 131*cdf0e10cSrcweir undefined = false; 132*cdf0e10cSrcweir goto bad_input; 133*cdf0e10cSrcweir } 134*cdf0e10cSrcweir // Linearly searching through the ranges if probably fastest, assuming 135*cdf0e10cSrcweir // that most converted characters belong to the ASCII subset: 136*cdf0e10cSrcweir for (std::size_t i = 0; i < entries; ++i) { 137*cdf0e10cSrcweir if (c < ranges[i].unicode) { 138*cdf0e10cSrcweir break; 139*cdf0e10cSrcweir } else if (c <= sal::static_int_cast< sal_uInt32 >( 140*cdf0e10cSrcweir ranges[i].unicode + ranges[i].range)) 141*cdf0e10cSrcweir { 142*cdf0e10cSrcweir if (destBufEnd - destBufPtr < 1) { 143*cdf0e10cSrcweir goto no_output; 144*cdf0e10cSrcweir } 145*cdf0e10cSrcweir *destBufPtr++ = static_cast< sal_Char >( 146*cdf0e10cSrcweir ranges[i].byte + (c - ranges[i].unicode)); 147*cdf0e10cSrcweir goto done; 148*cdf0e10cSrcweir } 149*cdf0e10cSrcweir } 150*cdf0e10cSrcweir goto bad_input; 151*cdf0e10cSrcweir done: 152*cdf0e10cSrcweir highSurrogate = 0; 153*cdf0e10cSrcweir continue; 154*cdf0e10cSrcweir bad_input: 155*cdf0e10cSrcweir switch (ImplHandleBadInputUnicodeToTextConversion( 156*cdf0e10cSrcweir undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 157*cdf0e10cSrcweir 0, 0)) 158*cdf0e10cSrcweir { 159*cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP: 160*cdf0e10cSrcweir highSurrogate = 0; 161*cdf0e10cSrcweir break; 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE: 164*cdf0e10cSrcweir highSurrogate = 0; 165*cdf0e10cSrcweir continue; 166*cdf0e10cSrcweir 167*cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT: 168*cdf0e10cSrcweir goto no_output; 169*cdf0e10cSrcweir } 170*cdf0e10cSrcweir break; 171*cdf0e10cSrcweir no_output: 172*cdf0e10cSrcweir --srcBuf; 173*cdf0e10cSrcweir infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 174*cdf0e10cSrcweir break; 175*cdf0e10cSrcweir } 176*cdf0e10cSrcweir if (highSurrogate != 0 177*cdf0e10cSrcweir && ((infoFlags 178*cdf0e10cSrcweir & (RTL_UNICODETOTEXT_INFO_ERROR 179*cdf0e10cSrcweir | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) 180*cdf0e10cSrcweir == 0)) 181*cdf0e10cSrcweir { 182*cdf0e10cSrcweir if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) { 183*cdf0e10cSrcweir infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; 184*cdf0e10cSrcweir } else { 185*cdf0e10cSrcweir switch (ImplHandleBadInputUnicodeToTextConversion( 186*cdf0e10cSrcweir false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0, 187*cdf0e10cSrcweir 0, 0)) 188*cdf0e10cSrcweir { 189*cdf0e10cSrcweir case IMPL_BAD_INPUT_STOP: 190*cdf0e10cSrcweir case IMPL_BAD_INPUT_CONTINUE: 191*cdf0e10cSrcweir highSurrogate = 0; 192*cdf0e10cSrcweir break; 193*cdf0e10cSrcweir 194*cdf0e10cSrcweir case IMPL_BAD_INPUT_NO_OUTPUT: 195*cdf0e10cSrcweir infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; 196*cdf0e10cSrcweir break; 197*cdf0e10cSrcweir } 198*cdf0e10cSrcweir } 199*cdf0e10cSrcweir } 200*cdf0e10cSrcweir if (context != 0) { 201*cdf0e10cSrcweir static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate 202*cdf0e10cSrcweir = highSurrogate; 203*cdf0e10cSrcweir } 204*cdf0e10cSrcweir if (info != 0) { 205*cdf0e10cSrcweir *info = infoFlags; 206*cdf0e10cSrcweir } 207*cdf0e10cSrcweir if (srcCvtChars != 0) { 208*cdf0e10cSrcweir *srcCvtChars = converted; 209*cdf0e10cSrcweir } 210*cdf0e10cSrcweir return destBufPtr - destBuf; 211*cdf0e10cSrcweir } 212