1*b1cdbd2cSJim Jagielski /**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski // MARKER(update_precomp.py): autogen include statement, do not remove
25*b1cdbd2cSJim Jagielski #include "precompiled_sal.hxx"
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski #include "context.h"
28*b1cdbd2cSJim Jagielski #include "converter.h"
29*b1cdbd2cSJim Jagielski #include "convertsinglebytetobmpunicode.hxx"
30*b1cdbd2cSJim Jagielski #include "unichars.h"
31*b1cdbd2cSJim Jagielski
32*b1cdbd2cSJim Jagielski #include "osl/diagnose.h"
33*b1cdbd2cSJim Jagielski #include "rtl/textcvt.h"
34*b1cdbd2cSJim Jagielski #include "sal/types.h"
35*b1cdbd2cSJim Jagielski
36*b1cdbd2cSJim Jagielski #include <cstddef>
37*b1cdbd2cSJim Jagielski
rtl_textenc_convertSingleByteToBmpUnicode(ImplTextConverterData const * data,void *,sal_Char const * srcBuf,sal_Size srcBytes,sal_Unicode * destBuf,sal_Size destChars,sal_uInt32 flags,sal_uInt32 * info,sal_Size * srcCvtBytes)38*b1cdbd2cSJim Jagielski sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
39*b1cdbd2cSJim Jagielski ImplTextConverterData const * data, void *, sal_Char const * srcBuf,
40*b1cdbd2cSJim Jagielski sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
41*b1cdbd2cSJim Jagielski sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
42*b1cdbd2cSJim Jagielski {
43*b1cdbd2cSJim Jagielski sal_Unicode const * map = static_cast<
44*b1cdbd2cSJim Jagielski rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
45*b1cdbd2cSJim Jagielski data)->byteToUnicode;
46*b1cdbd2cSJim Jagielski sal_uInt32 infoFlags = 0;
47*b1cdbd2cSJim Jagielski sal_Size converted = 0;
48*b1cdbd2cSJim Jagielski sal_Unicode * destBufPtr = destBuf;
49*b1cdbd2cSJim Jagielski sal_Unicode * destBufEnd = destBuf + destChars;
50*b1cdbd2cSJim Jagielski for (; converted < srcBytes; ++converted) {
51*b1cdbd2cSJim Jagielski bool undefined = true;
52*b1cdbd2cSJim Jagielski sal_Char b = *srcBuf++;
53*b1cdbd2cSJim Jagielski sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
54*b1cdbd2cSJim Jagielski if (c == 0xFFFF) {
55*b1cdbd2cSJim Jagielski goto bad_input;
56*b1cdbd2cSJim Jagielski }
57*b1cdbd2cSJim Jagielski if (destBufEnd - destBufPtr < 1) {
58*b1cdbd2cSJim Jagielski goto no_output;
59*b1cdbd2cSJim Jagielski }
60*b1cdbd2cSJim Jagielski *destBufPtr++ = c;
61*b1cdbd2cSJim Jagielski continue;
62*b1cdbd2cSJim Jagielski bad_input:
63*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputTextToUnicodeConversion(
64*b1cdbd2cSJim Jagielski undefined, false, b, flags, &destBufPtr, destBufEnd,
65*b1cdbd2cSJim Jagielski &infoFlags))
66*b1cdbd2cSJim Jagielski {
67*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
68*b1cdbd2cSJim Jagielski break;
69*b1cdbd2cSJim Jagielski
70*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
71*b1cdbd2cSJim Jagielski continue;
72*b1cdbd2cSJim Jagielski
73*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
74*b1cdbd2cSJim Jagielski goto no_output;
75*b1cdbd2cSJim Jagielski }
76*b1cdbd2cSJim Jagielski break;
77*b1cdbd2cSJim Jagielski no_output:
78*b1cdbd2cSJim Jagielski --srcBuf;
79*b1cdbd2cSJim Jagielski infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
80*b1cdbd2cSJim Jagielski break;
81*b1cdbd2cSJim Jagielski }
82*b1cdbd2cSJim Jagielski if (info != 0) {
83*b1cdbd2cSJim Jagielski *info = infoFlags;
84*b1cdbd2cSJim Jagielski }
85*b1cdbd2cSJim Jagielski if (srcCvtBytes != 0) {
86*b1cdbd2cSJim Jagielski *srcCvtBytes = converted;
87*b1cdbd2cSJim Jagielski }
88*b1cdbd2cSJim Jagielski return destBufPtr - destBuf;
89*b1cdbd2cSJim Jagielski }
90*b1cdbd2cSJim Jagielski
rtl_textenc_convertBmpUnicodeToSingleByte(ImplTextConverterData const * data,void * context,sal_Unicode const * srcBuf,sal_Size srcChars,sal_Char * destBuf,sal_Size destBytes,sal_uInt32 flags,sal_uInt32 * info,sal_Size * srcCvtChars)91*b1cdbd2cSJim Jagielski sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
92*b1cdbd2cSJim Jagielski ImplTextConverterData const * data, void * context,
93*b1cdbd2cSJim Jagielski sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
94*b1cdbd2cSJim Jagielski sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
95*b1cdbd2cSJim Jagielski sal_Size * srcCvtChars)
96*b1cdbd2cSJim Jagielski {
97*b1cdbd2cSJim Jagielski std::size_t entries = static_cast<
98*b1cdbd2cSJim Jagielski rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
99*b1cdbd2cSJim Jagielski data)->unicodeToByteEntries;
100*b1cdbd2cSJim Jagielski rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
101*b1cdbd2cSJim Jagielski rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
102*b1cdbd2cSJim Jagielski data)->unicodeToByte;
103*b1cdbd2cSJim Jagielski sal_Unicode highSurrogate = 0;
104*b1cdbd2cSJim Jagielski sal_uInt32 infoFlags = 0;
105*b1cdbd2cSJim Jagielski sal_Size converted = 0;
106*b1cdbd2cSJim Jagielski sal_Char * destBufPtr = destBuf;
107*b1cdbd2cSJim Jagielski sal_Char * destBufEnd = destBuf + destBytes;
108*b1cdbd2cSJim Jagielski if (context != 0) {
109*b1cdbd2cSJim Jagielski highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
110*b1cdbd2cSJim Jagielski m_nHighSurrogate;
111*b1cdbd2cSJim Jagielski }
112*b1cdbd2cSJim Jagielski for (; converted < srcChars; ++converted) {
113*b1cdbd2cSJim Jagielski bool undefined = true;
114*b1cdbd2cSJim Jagielski sal_uInt32 c = *srcBuf++;
115*b1cdbd2cSJim Jagielski if (highSurrogate == 0) {
116*b1cdbd2cSJim Jagielski if (ImplIsHighSurrogate(c)) {
117*b1cdbd2cSJim Jagielski highSurrogate = static_cast< sal_Unicode >(c);
118*b1cdbd2cSJim Jagielski continue;
119*b1cdbd2cSJim Jagielski }
120*b1cdbd2cSJim Jagielski } else if (ImplIsLowSurrogate(c)) {
121*b1cdbd2cSJim Jagielski c = ImplCombineSurrogates(highSurrogate, c);
122*b1cdbd2cSJim Jagielski } else {
123*b1cdbd2cSJim Jagielski undefined = false;
124*b1cdbd2cSJim Jagielski goto bad_input;
125*b1cdbd2cSJim Jagielski }
126*b1cdbd2cSJim Jagielski if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
127*b1cdbd2cSJim Jagielski undefined = false;
128*b1cdbd2cSJim Jagielski goto bad_input;
129*b1cdbd2cSJim Jagielski }
130*b1cdbd2cSJim Jagielski // Linearly searching through the ranges if probably fastest, assuming
131*b1cdbd2cSJim Jagielski // that most converted characters belong to the ASCII subset:
132*b1cdbd2cSJim Jagielski for (std::size_t i = 0; i < entries; ++i) {
133*b1cdbd2cSJim Jagielski if (c < ranges[i].unicode) {
134*b1cdbd2cSJim Jagielski break;
135*b1cdbd2cSJim Jagielski } else if (c <= sal::static_int_cast< sal_uInt32 >(
136*b1cdbd2cSJim Jagielski ranges[i].unicode + ranges[i].range))
137*b1cdbd2cSJim Jagielski {
138*b1cdbd2cSJim Jagielski if (destBufEnd - destBufPtr < 1) {
139*b1cdbd2cSJim Jagielski goto no_output;
140*b1cdbd2cSJim Jagielski }
141*b1cdbd2cSJim Jagielski *destBufPtr++ = static_cast< sal_Char >(
142*b1cdbd2cSJim Jagielski ranges[i].byte + (c - ranges[i].unicode));
143*b1cdbd2cSJim Jagielski goto done;
144*b1cdbd2cSJim Jagielski }
145*b1cdbd2cSJim Jagielski }
146*b1cdbd2cSJim Jagielski goto bad_input;
147*b1cdbd2cSJim Jagielski done:
148*b1cdbd2cSJim Jagielski highSurrogate = 0;
149*b1cdbd2cSJim Jagielski continue;
150*b1cdbd2cSJim Jagielski bad_input:
151*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputUnicodeToTextConversion(
152*b1cdbd2cSJim Jagielski undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
153*b1cdbd2cSJim Jagielski 0, 0))
154*b1cdbd2cSJim Jagielski {
155*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
156*b1cdbd2cSJim Jagielski highSurrogate = 0;
157*b1cdbd2cSJim Jagielski break;
158*b1cdbd2cSJim Jagielski
159*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
160*b1cdbd2cSJim Jagielski highSurrogate = 0;
161*b1cdbd2cSJim Jagielski continue;
162*b1cdbd2cSJim Jagielski
163*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
164*b1cdbd2cSJim Jagielski goto no_output;
165*b1cdbd2cSJim Jagielski }
166*b1cdbd2cSJim Jagielski break;
167*b1cdbd2cSJim Jagielski no_output:
168*b1cdbd2cSJim Jagielski --srcBuf;
169*b1cdbd2cSJim Jagielski infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
170*b1cdbd2cSJim Jagielski break;
171*b1cdbd2cSJim Jagielski }
172*b1cdbd2cSJim Jagielski if (highSurrogate != 0
173*b1cdbd2cSJim Jagielski && ((infoFlags
174*b1cdbd2cSJim Jagielski & (RTL_UNICODETOTEXT_INFO_ERROR
175*b1cdbd2cSJim Jagielski | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
176*b1cdbd2cSJim Jagielski == 0))
177*b1cdbd2cSJim Jagielski {
178*b1cdbd2cSJim Jagielski if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
179*b1cdbd2cSJim Jagielski infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
180*b1cdbd2cSJim Jagielski } else {
181*b1cdbd2cSJim Jagielski switch (ImplHandleBadInputUnicodeToTextConversion(
182*b1cdbd2cSJim Jagielski false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
183*b1cdbd2cSJim Jagielski 0, 0))
184*b1cdbd2cSJim Jagielski {
185*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_STOP:
186*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_CONTINUE:
187*b1cdbd2cSJim Jagielski highSurrogate = 0;
188*b1cdbd2cSJim Jagielski break;
189*b1cdbd2cSJim Jagielski
190*b1cdbd2cSJim Jagielski case IMPL_BAD_INPUT_NO_OUTPUT:
191*b1cdbd2cSJim Jagielski infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
192*b1cdbd2cSJim Jagielski break;
193*b1cdbd2cSJim Jagielski }
194*b1cdbd2cSJim Jagielski }
195*b1cdbd2cSJim Jagielski }
196*b1cdbd2cSJim Jagielski if (context != 0) {
197*b1cdbd2cSJim Jagielski static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
198*b1cdbd2cSJim Jagielski = highSurrogate;
199*b1cdbd2cSJim Jagielski }
200*b1cdbd2cSJim Jagielski if (info != 0) {
201*b1cdbd2cSJim Jagielski *info = infoFlags;
202*b1cdbd2cSJim Jagielski }
203*b1cdbd2cSJim Jagielski if (srcCvtChars != 0) {
204*b1cdbd2cSJim Jagielski *srcCvtChars = converted;
205*b1cdbd2cSJim Jagielski }
206*b1cdbd2cSJim Jagielski return destBufPtr - destBuf;
207*b1cdbd2cSJim Jagielski }
208