xref: /aoo41x/main/vcl/unx/generic/gdi/salcvt.cxx (revision cdf0e10c)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_vcl.hxx"
30 #include "salcvt.hxx"
31 
32 
33 SalConverterCache::SalConverterCache()
34 {
35 }
36 
37 SalConverterCache*
38 SalConverterCache::GetInstance ()
39 {
40 	static SalConverterCache* pCvt = NULL;
41 	if (pCvt == NULL)
42 		pCvt = new SalConverterCache;
43 
44 	return pCvt;
45 }
46 
47 SalConverterCache::~SalConverterCache()
48 {
49 }
50 
51 // ---> FIXME
52 #include <stdio.h>
53 // <---
54 
55 rtl_UnicodeToTextConverter
56 SalConverterCache::GetU2TConverter( rtl_TextEncoding nEncoding )
57 {
58     if( rtl_isOctetTextEncoding( nEncoding ) )
59 	{
60         ConverterT& rConverter( m_aConverters[ nEncoding ] );
61 		if ( rConverter.mpU2T == NULL )
62 		{
63 			rConverter.mpU2T =
64                 rtl_createUnicodeToTextConverter( nEncoding );
65 // ---> FIXME
66 if ( rConverter.mpU2T == NULL )
67 	fprintf( stderr, "failed to create Unicode -> %i converter\n", nEncoding);
68 // <---
69 		}
70 		return rConverter.mpU2T;
71 	}
72 	return NULL;
73 }
74 
75 rtl_TextToUnicodeConverter
76 SalConverterCache::GetT2UConverter( rtl_TextEncoding nEncoding )
77 {
78     if( rtl_isOctetTextEncoding( nEncoding ) )
79 	{
80         ConverterT& rConverter( m_aConverters[ nEncoding ] );
81 		if ( rConverter.mpT2U == NULL )
82 		{
83 			rConverter.mpT2U =
84                 rtl_createTextToUnicodeConverter( nEncoding );
85 // ---> FIXME
86 if ( rConverter.mpT2U == NULL )
87 	fprintf( stderr, "failed to create %i -> Unicode converter\n", nEncoding );
88 // <---
89 		}
90 		return rConverter.mpT2U;
91 	}
92 	return NULL;
93 }
94 
95 Bool
96 SalConverterCache::IsSingleByteEncoding( rtl_TextEncoding nEncoding )
97 {
98     if( rtl_isOctetTextEncoding( nEncoding ) )
99 	{
100         ConverterT& rConverter( m_aConverters[ nEncoding ] );
101 		if ( ! rConverter.mbValid )
102 		{
103 			rConverter.mbValid = True;
104 
105 			rtl_TextEncodingInfo aTextEncInfo;
106 			aTextEncInfo.StructSize = sizeof( aTextEncInfo );
107 			rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo );
108 
109 			if (   aTextEncInfo.MinimumCharSize == aTextEncInfo.MaximumCharSize
110 				&& aTextEncInfo.MinimumCharSize == 1)
111 				rConverter.mbSingleByteEncoding = True;
112 			else
113 				rConverter.mbSingleByteEncoding = False;
114 		}
115 
116 		return rConverter.mbSingleByteEncoding;
117 	}
118 	return False;
119 }
120 
121 // check whether the character set nEncoding contains the unicode
122 // code point nChar. This list has been compiled from the according
123 // ttmap files in /usr/openwin/lib/X11/fonts/TrueType/ttmap/
124 Bool
125 SalConverterCache::EncodingHasChar( rtl_TextEncoding nEncoding,
126 		sal_Unicode nChar )
127 {
128 	Bool bMatch = False;
129 
130 	switch ( nEncoding )
131 	{
132 		case RTL_TEXTENCODING_DONTKNOW:
133 			bMatch = False;
134 			break;
135 
136 		case RTL_TEXTENCODING_MS_1252:
137 		case RTL_TEXTENCODING_ISO_8859_1:
138 		case RTL_TEXTENCODING_ISO_8859_15:
139         // handle iso8859-15 and iso8859-1 the same (and both with euro)
140         // handle them also like ms1252
141         // this is due to the fact that so many X fonts say they are iso8859-1
142         // but have the other glyphs anyway because they are really ms1252
143 			bMatch = 	( /*nChar >= 0x0000 &&*/ nChar <= 0x00ff )
144                 ||	( nChar == 0x20ac )
145                 ||	( nChar == 0x201a )
146                 ||	( nChar == 0x0192 )
147                 ||	( nChar == 0x201e )
148                 ||	( nChar == 0x2026 )
149                 ||	( nChar == 0x2020 )
150                 ||	( nChar == 0x2021 )
151                 ||	( nChar == 0x02c6 )
152                 ||	( nChar == 0x2030 )
153                 ||	( nChar == 0x0160 )
154                 ||	( nChar == 0x2039 )
155                 ||	( nChar == 0x0152 )
156                 ||	( nChar == 0x017d )
157                 ||	( nChar == 0x2018 )
158                 ||	( nChar == 0x2019 )
159                 ||	( nChar == 0x201c )
160                 ||	( nChar == 0x201d )
161                 ||	( nChar == 0x2022 )
162                 ||	( nChar == 0x2013 )
163                 ||	( nChar == 0x2014 )
164                 ||	( nChar == 0x02dc )
165                 ||	( nChar == 0x2122 )
166                 ||	( nChar == 0x0161 )
167                 ||	( nChar == 0x203a )
168                 ||	( nChar == 0x0153 )
169                 ||	( nChar == 0x017e )
170                 ||	( nChar == 0x0178 )
171                 ;
172 			break;
173 
174 		case RTL_TEXTENCODING_ISO_8859_2:
175 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
176 					||	( nChar >= 0x00a0 && nChar <= 0x017e )
177 					||	( nChar >= 0x02c7 && nChar <= 0x02dd );
178 			break;
179 
180 		case RTL_TEXTENCODING_ISO_8859_4:
181 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
182 					||	( nChar >= 0x00a0 && nChar <= 0x017e )
183 					||	( nChar >= 0x02c7 && nChar <= 0x02db );
184 			break;
185 
186 		case RTL_TEXTENCODING_ISO_8859_5:
187 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
188 					||	( nChar >= 0x00a0 && nChar <= 0x00ad )
189 					||	( nChar >= 0x0401 && nChar <= 0x045f )
190 					||	( nChar == 0x2116 );
191 			break;
192 
193 		case RTL_TEXTENCODING_ISO_8859_6:
194 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
195 					||	( nChar >= 0x0600 && nChar <= 0x06ff )
196 					|| 	( nChar >= 0xfb50 && nChar <= 0xfffe );
197 			break;
198 
199 		case RTL_TEXTENCODING_ISO_8859_7:
200 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
201 					||	( nChar >= 0x00a0 && nChar <= 0x00bd )
202 					||	( nChar == 0x02bd )
203 					||	( nChar >= 0x0384 && nChar <= 0x03ce )
204 					||	( nChar >= 0x2014 && nChar <= 0x2019 );
205 			break;
206 
207 		case RTL_TEXTENCODING_ISO_8859_8:
208 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
209 					||	( nChar >= 0x00a0 && nChar <= 0x00f7 )
210 					||	( nChar >= 0x05d0 && nChar <= 0x05ea )
211 					||	( nChar == 0x2017 );
212 			break;
213 
214 		case RTL_TEXTENCODING_ISO_8859_9:
215 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
216 					||	( nChar >= 0x00a0 && nChar <= 0x015f );
217  			break;
218 
219 		case RTL_TEXTENCODING_ISO_8859_13:
220 			bMatch =   	( nChar >= 0x0020 && nChar <= 0x007e )
221 					||	( nChar >= 0x00a0 && nChar <= 0x017e )
222 					||  ( nChar >= 0x2019 && nChar <= 0x201e );
223 			break;
224 
225         /* real case for RTL_TEXTENCODING_ISO_8859_15
226 		case RTL_TEXTENCODING_ISO_8859_15:
227 			bMatch =  	( nChar >= 0x0020 && nChar <= 0x007e )
228 					||	( nChar >= 0x00a0 && nChar <= 0x00ff )
229 					||	( nChar >= 0x0152 && nChar <= 0x017e )
230 					||  ( nChar == 0x20ac );
231 			break;
232         */
233 
234 		case RTL_TEXTENCODING_JIS_X_0201:
235 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
236 					||	( nChar >= 0xff61 && nChar <= 0xff9f );
237 			break;
238 
239 		case RTL_TEXTENCODING_MS_1251:
240 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
241 					||	( nChar >= 0x00a0 && nChar <= 0x00bb )
242 					||	( nChar >= 0x0401 && nChar <= 0x045f )
243 					||	( nChar >= 0x0490 && nChar <= 0x0491 )
244 					||	( nChar >= 0x2013 && nChar <= 0x203a )
245 					||	( nChar >= 0x2116 && nChar <= 0x2122 );
246 			break;
247 
248 		case RTL_TEXTENCODING_KOI8_R:
249 			bMatch = 	( nChar >= 0x0020 && nChar <= 0x007e )
250 					||	( nChar >= 0x00a0 && nChar <= 0x00b7 )
251 					||	( nChar == 0x00f7 )
252 					||	( nChar >= 0x0401 && nChar <= 0x0451 )
253 					||	( nChar >= 0x2219 && nChar <= 0x221a )
254 					||	( nChar >= 0x2248 && nChar <= 0x2265 )
255 					||	( nChar >= 0x2320 && nChar <= 0x2321 )
256 					||	( nChar >= 0x2500 && nChar <= 0x25a0 );
257 			break;
258 
259 		case RTL_TEXTENCODING_UNICODE:
260 			bMatch = True;
261 			break;
262 
263 		case RTL_TEXTENCODING_EUC_KR:
264 		case RTL_TEXTENCODING_BIG5:
265 		case RTL_TEXTENCODING_GBK:
266 		case RTL_TEXTENCODING_GB_2312:
267 		case RTL_TEXTENCODING_MS_1361:
268 		case RTL_TEXTENCODING_JIS_X_0208:
269 
270 			// XXX Big5 and Korean EUC contain Ascii chars, but Solaris
271 			// *-big5-1 and *-ksc5601.1992-3 fonts dont, in general CJK fonts
272 			// are monospaced, so dont trust them for latin chars
273 			if (nChar <= 0xFF)
274 			{
275 				bMatch = False;
276 				break;
277 			}
278 
279 		default:
280 			// XXX really convert the unicode char into the encoding
281 			// and check for conversion errors, this is expensive !
282 			rtl_UnicodeToTextConverter aConverter;
283 			rtl_UnicodeToTextContext   aContext;
284 
285 			aConverter = GetU2TConverter(nEncoding);
286 			aContext   = rtl_createUnicodeToTextContext( aConverter );
287 
288 			// ---> FIXME
289 			if ( aConverter == NULL )
290 				return False;
291 			// <---
292 
293 			sal_Char   pConversionBuffer[ 32 ];
294 			sal_uInt32 nConversionInfo;
295 			sal_Size   nConvertedChars;
296 			sal_Size   nSize;
297 
298 			nSize = rtl_convertUnicodeToText( aConverter, aContext,
299 					&nChar, 1, pConversionBuffer, sizeof(pConversionBuffer),
300 					  RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
301 					| RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR,
302 					&nConversionInfo, &nConvertedChars );
303 
304 			rtl_destroyUnicodeToTextContext( aConverter, aContext );
305 
306 			bMatch =    (nConvertedChars == 1)
307 					 && (nSize == 1 || nSize == 2) // XXX Fix me this is a hack
308 					 && ((nConversionInfo & RTL_UNICODETOTEXT_INFO_ERROR) == 0);
309 			break;
310 	}
311 
312 	return bMatch;
313 }
314 
315 // wrapper for rtl_convertUnicodeToText that handles the usual cases for
316 // textconversion in drawtext and gettextwidth routines
317 sal_Size
318 SalConverterCache::ConvertStringUTF16( const sal_Unicode *pText, int nTextLen,
319 		sal_Char *pBuffer, sal_Size nBufferSize, rtl_TextEncoding nEncoding )
320 {
321 	rtl_UnicodeToTextConverter aConverter = GetU2TConverter(nEncoding);
322 
323 	const sal_uInt32 nCvtFlags =
324 			  RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
325 			| RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK
326 			| RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK ;
327 	sal_uInt32  nCvtInfo;
328 	sal_Size    nCvtChars;
329 
330 	rtl_UnicodeToTextContext aContext =
331 		 	rtl_createUnicodeToTextContext( aConverter );
332 
333 	sal_Size nSize = rtl_convertUnicodeToText( aConverter, aContext,
334 				pText, nTextLen, pBuffer, nBufferSize,
335 				nCvtFlags, &nCvtInfo, &nCvtChars );
336 
337 	rtl_destroyUnicodeToTextContext( aConverter, aContext );
338 
339 	return nSize;
340 }
341 
342