1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sal.hxx"
26
27 #include "context.h"
28 #include "converter.h"
29 #include "convertsinglebytetobmpunicode.hxx"
30 #include "unichars.h"
31
32 #include "osl/diagnose.h"
33 #include "rtl/textcvt.h"
34 #include "sal/types.h"
35
36 #include <cstddef>
37
rtl_textenc_convertSingleByteToBmpUnicode(ImplTextConverterData const * data,void *,sal_Char const * srcBuf,sal_Size srcBytes,sal_Unicode * destBuf,sal_Size destChars,sal_uInt32 flags,sal_uInt32 * info,sal_Size * srcCvtBytes)38 sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
39 ImplTextConverterData const * data, void *, sal_Char const * srcBuf,
40 sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
41 sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
42 {
43 sal_Unicode const * map = static_cast<
44 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
45 data)->byteToUnicode;
46 sal_uInt32 infoFlags = 0;
47 sal_Size converted = 0;
48 sal_Unicode * destBufPtr = destBuf;
49 sal_Unicode * destBufEnd = destBuf + destChars;
50 for (; converted < srcBytes; ++converted) {
51 bool undefined = true;
52 sal_Char b = *srcBuf++;
53 sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
54 if (c == 0xFFFF) {
55 goto bad_input;
56 }
57 if (destBufEnd - destBufPtr < 1) {
58 goto no_output;
59 }
60 *destBufPtr++ = c;
61 continue;
62 bad_input:
63 switch (ImplHandleBadInputTextToUnicodeConversion(
64 undefined, false, b, flags, &destBufPtr, destBufEnd,
65 &infoFlags))
66 {
67 case IMPL_BAD_INPUT_STOP:
68 break;
69
70 case IMPL_BAD_INPUT_CONTINUE:
71 continue;
72
73 case IMPL_BAD_INPUT_NO_OUTPUT:
74 goto no_output;
75 }
76 break;
77 no_output:
78 --srcBuf;
79 infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
80 break;
81 }
82 if (info != 0) {
83 *info = infoFlags;
84 }
85 if (srcCvtBytes != 0) {
86 *srcCvtBytes = converted;
87 }
88 return destBufPtr - destBuf;
89 }
90
rtl_textenc_convertBmpUnicodeToSingleByte(ImplTextConverterData const * data,void * context,sal_Unicode const * srcBuf,sal_Size srcChars,sal_Char * destBuf,sal_Size destBytes,sal_uInt32 flags,sal_uInt32 * info,sal_Size * srcCvtChars)91 sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
92 ImplTextConverterData const * data, void * context,
93 sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
94 sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
95 sal_Size * srcCvtChars)
96 {
97 std::size_t entries = static_cast<
98 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
99 data)->unicodeToByteEntries;
100 rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
101 rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
102 data)->unicodeToByte;
103 sal_Unicode highSurrogate = 0;
104 sal_uInt32 infoFlags = 0;
105 sal_Size converted = 0;
106 sal_Char * destBufPtr = destBuf;
107 sal_Char * destBufEnd = destBuf + destBytes;
108 if (context != 0) {
109 highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
110 m_nHighSurrogate;
111 }
112 for (; converted < srcChars; ++converted) {
113 bool undefined = true;
114 sal_uInt32 c = *srcBuf++;
115 if (highSurrogate == 0) {
116 if (ImplIsHighSurrogate(c)) {
117 highSurrogate = static_cast< sal_Unicode >(c);
118 continue;
119 }
120 } else if (ImplIsLowSurrogate(c)) {
121 c = ImplCombineSurrogates(highSurrogate, c);
122 } else {
123 undefined = false;
124 goto bad_input;
125 }
126 if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
127 undefined = false;
128 goto bad_input;
129 }
130 // Linearly searching through the ranges if probably fastest, assuming
131 // that most converted characters belong to the ASCII subset:
132 for (std::size_t i = 0; i < entries; ++i) {
133 if (c < ranges[i].unicode) {
134 break;
135 } else if (c <= sal::static_int_cast< sal_uInt32 >(
136 ranges[i].unicode + ranges[i].range))
137 {
138 if (destBufEnd - destBufPtr < 1) {
139 goto no_output;
140 }
141 *destBufPtr++ = static_cast< sal_Char >(
142 ranges[i].byte + (c - ranges[i].unicode));
143 goto done;
144 }
145 }
146 goto bad_input;
147 done:
148 highSurrogate = 0;
149 continue;
150 bad_input:
151 switch (ImplHandleBadInputUnicodeToTextConversion(
152 undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
153 0, 0))
154 {
155 case IMPL_BAD_INPUT_STOP:
156 highSurrogate = 0;
157 break;
158
159 case IMPL_BAD_INPUT_CONTINUE:
160 highSurrogate = 0;
161 continue;
162
163 case IMPL_BAD_INPUT_NO_OUTPUT:
164 goto no_output;
165 }
166 break;
167 no_output:
168 --srcBuf;
169 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
170 break;
171 }
172 if (highSurrogate != 0
173 && ((infoFlags
174 & (RTL_UNICODETOTEXT_INFO_ERROR
175 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
176 == 0))
177 {
178 if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
179 infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
180 } else {
181 switch (ImplHandleBadInputUnicodeToTextConversion(
182 false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
183 0, 0))
184 {
185 case IMPL_BAD_INPUT_STOP:
186 case IMPL_BAD_INPUT_CONTINUE:
187 highSurrogate = 0;
188 break;
189
190 case IMPL_BAD_INPUT_NO_OUTPUT:
191 infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
192 break;
193 }
194 }
195 }
196 if (context != 0) {
197 static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
198 = highSurrogate;
199 }
200 if (info != 0) {
201 *info = infoFlags;
202 }
203 if (srcCvtChars != 0) {
204 *srcCvtChars = converted;
205 }
206 return destBufPtr - destBuf;
207 }
208