1*c82f2877SAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
3*c82f2877SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
4*c82f2877SAndrew Rist * or more contributor license agreements. See the NOTICE file
5*c82f2877SAndrew Rist * distributed with this work for additional information
6*c82f2877SAndrew Rist * regarding copyright ownership. The ASF licenses this file
7*c82f2877SAndrew Rist * to you under the Apache License, Version 2.0 (the
8*c82f2877SAndrew Rist * "License"); you may not use this file except in compliance
9*c82f2877SAndrew Rist * with the License. You may obtain a copy of the License at
10*c82f2877SAndrew Rist *
11*c82f2877SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
12*c82f2877SAndrew Rist *
13*c82f2877SAndrew Rist * Unless required by applicable law or agreed to in writing,
14*c82f2877SAndrew Rist * software distributed under the License is distributed on an
15*c82f2877SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*c82f2877SAndrew Rist * KIND, either express or implied. See the License for the
17*c82f2877SAndrew Rist * specific language governing permissions and limitations
18*c82f2877SAndrew Rist * under the License.
19*c82f2877SAndrew Rist *
20*c82f2877SAndrew Rist *************************************************************/
21*c82f2877SAndrew Rist
22*c82f2877SAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_vcl.hxx"
26cdf0e10cSrcweir #include "salcvt.hxx"
27cdf0e10cSrcweir
28cdf0e10cSrcweir
SalConverterCache()29cdf0e10cSrcweir SalConverterCache::SalConverterCache()
30cdf0e10cSrcweir {
31cdf0e10cSrcweir }
32cdf0e10cSrcweir
33cdf0e10cSrcweir SalConverterCache*
GetInstance()34cdf0e10cSrcweir SalConverterCache::GetInstance ()
35cdf0e10cSrcweir {
36cdf0e10cSrcweir static SalConverterCache* pCvt = NULL;
37cdf0e10cSrcweir if (pCvt == NULL)
38cdf0e10cSrcweir pCvt = new SalConverterCache;
39cdf0e10cSrcweir
40cdf0e10cSrcweir return pCvt;
41cdf0e10cSrcweir }
42cdf0e10cSrcweir
~SalConverterCache()43cdf0e10cSrcweir SalConverterCache::~SalConverterCache()
44cdf0e10cSrcweir {
45cdf0e10cSrcweir }
46cdf0e10cSrcweir
47cdf0e10cSrcweir // ---> FIXME
48cdf0e10cSrcweir #include <stdio.h>
49cdf0e10cSrcweir // <---
50cdf0e10cSrcweir
51cdf0e10cSrcweir rtl_UnicodeToTextConverter
GetU2TConverter(rtl_TextEncoding nEncoding)52cdf0e10cSrcweir SalConverterCache::GetU2TConverter( rtl_TextEncoding nEncoding )
53cdf0e10cSrcweir {
54cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) )
55cdf0e10cSrcweir {
56cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] );
57cdf0e10cSrcweir if ( rConverter.mpU2T == NULL )
58cdf0e10cSrcweir {
59cdf0e10cSrcweir rConverter.mpU2T =
60cdf0e10cSrcweir rtl_createUnicodeToTextConverter( nEncoding );
61cdf0e10cSrcweir // ---> FIXME
62cdf0e10cSrcweir if ( rConverter.mpU2T == NULL )
63cdf0e10cSrcweir fprintf( stderr, "failed to create Unicode -> %i converter\n", nEncoding);
64cdf0e10cSrcweir // <---
65cdf0e10cSrcweir }
66cdf0e10cSrcweir return rConverter.mpU2T;
67cdf0e10cSrcweir }
68cdf0e10cSrcweir return NULL;
69cdf0e10cSrcweir }
70cdf0e10cSrcweir
71cdf0e10cSrcweir rtl_TextToUnicodeConverter
GetT2UConverter(rtl_TextEncoding nEncoding)72cdf0e10cSrcweir SalConverterCache::GetT2UConverter( rtl_TextEncoding nEncoding )
73cdf0e10cSrcweir {
74cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) )
75cdf0e10cSrcweir {
76cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] );
77cdf0e10cSrcweir if ( rConverter.mpT2U == NULL )
78cdf0e10cSrcweir {
79cdf0e10cSrcweir rConverter.mpT2U =
80cdf0e10cSrcweir rtl_createTextToUnicodeConverter( nEncoding );
81cdf0e10cSrcweir // ---> FIXME
82cdf0e10cSrcweir if ( rConverter.mpT2U == NULL )
83cdf0e10cSrcweir fprintf( stderr, "failed to create %i -> Unicode converter\n", nEncoding );
84cdf0e10cSrcweir // <---
85cdf0e10cSrcweir }
86cdf0e10cSrcweir return rConverter.mpT2U;
87cdf0e10cSrcweir }
88cdf0e10cSrcweir return NULL;
89cdf0e10cSrcweir }
90cdf0e10cSrcweir
91cdf0e10cSrcweir Bool
IsSingleByteEncoding(rtl_TextEncoding nEncoding)92cdf0e10cSrcweir SalConverterCache::IsSingleByteEncoding( rtl_TextEncoding nEncoding )
93cdf0e10cSrcweir {
94cdf0e10cSrcweir if( rtl_isOctetTextEncoding( nEncoding ) )
95cdf0e10cSrcweir {
96cdf0e10cSrcweir ConverterT& rConverter( m_aConverters[ nEncoding ] );
97cdf0e10cSrcweir if ( ! rConverter.mbValid )
98cdf0e10cSrcweir {
99cdf0e10cSrcweir rConverter.mbValid = True;
100cdf0e10cSrcweir
101cdf0e10cSrcweir rtl_TextEncodingInfo aTextEncInfo;
102cdf0e10cSrcweir aTextEncInfo.StructSize = sizeof( aTextEncInfo );
103cdf0e10cSrcweir rtl_getTextEncodingInfo( nEncoding, &aTextEncInfo );
104cdf0e10cSrcweir
105cdf0e10cSrcweir if ( aTextEncInfo.MinimumCharSize == aTextEncInfo.MaximumCharSize
106cdf0e10cSrcweir && aTextEncInfo.MinimumCharSize == 1)
107cdf0e10cSrcweir rConverter.mbSingleByteEncoding = True;
108cdf0e10cSrcweir else
109cdf0e10cSrcweir rConverter.mbSingleByteEncoding = False;
110cdf0e10cSrcweir }
111cdf0e10cSrcweir
112cdf0e10cSrcweir return rConverter.mbSingleByteEncoding;
113cdf0e10cSrcweir }
114cdf0e10cSrcweir return False;
115cdf0e10cSrcweir }
116cdf0e10cSrcweir
117cdf0e10cSrcweir // check whether the character set nEncoding contains the unicode
118cdf0e10cSrcweir // code point nChar. This list has been compiled from the according
119cdf0e10cSrcweir // ttmap files in /usr/openwin/lib/X11/fonts/TrueType/ttmap/
120cdf0e10cSrcweir Bool
EncodingHasChar(rtl_TextEncoding nEncoding,sal_Unicode nChar)121cdf0e10cSrcweir SalConverterCache::EncodingHasChar( rtl_TextEncoding nEncoding,
122cdf0e10cSrcweir sal_Unicode nChar )
123cdf0e10cSrcweir {
124cdf0e10cSrcweir Bool bMatch = False;
125cdf0e10cSrcweir
126cdf0e10cSrcweir switch ( nEncoding )
127cdf0e10cSrcweir {
128cdf0e10cSrcweir case RTL_TEXTENCODING_DONTKNOW:
129cdf0e10cSrcweir bMatch = False;
130cdf0e10cSrcweir break;
131cdf0e10cSrcweir
132cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1252:
133cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_1:
134cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_15:
135cdf0e10cSrcweir // handle iso8859-15 and iso8859-1 the same (and both with euro)
136cdf0e10cSrcweir // handle them also like ms1252
137cdf0e10cSrcweir // this is due to the fact that so many X fonts say they are iso8859-1
138cdf0e10cSrcweir // but have the other glyphs anyway because they are really ms1252
139cdf0e10cSrcweir bMatch = ( /*nChar >= 0x0000 &&*/ nChar <= 0x00ff )
140cdf0e10cSrcweir || ( nChar == 0x20ac )
141cdf0e10cSrcweir || ( nChar == 0x201a )
142cdf0e10cSrcweir || ( nChar == 0x0192 )
143cdf0e10cSrcweir || ( nChar == 0x201e )
144cdf0e10cSrcweir || ( nChar == 0x2026 )
145cdf0e10cSrcweir || ( nChar == 0x2020 )
146cdf0e10cSrcweir || ( nChar == 0x2021 )
147cdf0e10cSrcweir || ( nChar == 0x02c6 )
148cdf0e10cSrcweir || ( nChar == 0x2030 )
149cdf0e10cSrcweir || ( nChar == 0x0160 )
150cdf0e10cSrcweir || ( nChar == 0x2039 )
151cdf0e10cSrcweir || ( nChar == 0x0152 )
152cdf0e10cSrcweir || ( nChar == 0x017d )
153cdf0e10cSrcweir || ( nChar == 0x2018 )
154cdf0e10cSrcweir || ( nChar == 0x2019 )
155cdf0e10cSrcweir || ( nChar == 0x201c )
156cdf0e10cSrcweir || ( nChar == 0x201d )
157cdf0e10cSrcweir || ( nChar == 0x2022 )
158cdf0e10cSrcweir || ( nChar == 0x2013 )
159cdf0e10cSrcweir || ( nChar == 0x2014 )
160cdf0e10cSrcweir || ( nChar == 0x02dc )
161cdf0e10cSrcweir || ( nChar == 0x2122 )
162cdf0e10cSrcweir || ( nChar == 0x0161 )
163cdf0e10cSrcweir || ( nChar == 0x203a )
164cdf0e10cSrcweir || ( nChar == 0x0153 )
165cdf0e10cSrcweir || ( nChar == 0x017e )
166cdf0e10cSrcweir || ( nChar == 0x0178 )
167cdf0e10cSrcweir ;
168cdf0e10cSrcweir break;
169cdf0e10cSrcweir
170cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_2:
171cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
172cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e )
173cdf0e10cSrcweir || ( nChar >= 0x02c7 && nChar <= 0x02dd );
174cdf0e10cSrcweir break;
175cdf0e10cSrcweir
176cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_4:
177cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
178cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e )
179cdf0e10cSrcweir || ( nChar >= 0x02c7 && nChar <= 0x02db );
180cdf0e10cSrcweir break;
181cdf0e10cSrcweir
182cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_5:
183cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
184cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00ad )
185cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x045f )
186cdf0e10cSrcweir || ( nChar == 0x2116 );
187cdf0e10cSrcweir break;
188cdf0e10cSrcweir
189cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_6:
190cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
191cdf0e10cSrcweir || ( nChar >= 0x0600 && nChar <= 0x06ff )
192cdf0e10cSrcweir || ( nChar >= 0xfb50 && nChar <= 0xfffe );
193cdf0e10cSrcweir break;
194cdf0e10cSrcweir
195cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_7:
196cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
197cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00bd )
198cdf0e10cSrcweir || ( nChar == 0x02bd )
199cdf0e10cSrcweir || ( nChar >= 0x0384 && nChar <= 0x03ce )
200cdf0e10cSrcweir || ( nChar >= 0x2014 && nChar <= 0x2019 );
201cdf0e10cSrcweir break;
202cdf0e10cSrcweir
203cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_8:
204cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
205cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00f7 )
206cdf0e10cSrcweir || ( nChar >= 0x05d0 && nChar <= 0x05ea )
207cdf0e10cSrcweir || ( nChar == 0x2017 );
208cdf0e10cSrcweir break;
209cdf0e10cSrcweir
210cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_9:
211cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
212cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x015f );
213cdf0e10cSrcweir break;
214cdf0e10cSrcweir
215cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_13:
216cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
217cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x017e )
218cdf0e10cSrcweir || ( nChar >= 0x2019 && nChar <= 0x201e );
219cdf0e10cSrcweir break;
220cdf0e10cSrcweir
221cdf0e10cSrcweir /* real case for RTL_TEXTENCODING_ISO_8859_15
222cdf0e10cSrcweir case RTL_TEXTENCODING_ISO_8859_15:
223cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
224cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00ff )
225cdf0e10cSrcweir || ( nChar >= 0x0152 && nChar <= 0x017e )
226cdf0e10cSrcweir || ( nChar == 0x20ac );
227cdf0e10cSrcweir break;
228cdf0e10cSrcweir */
229cdf0e10cSrcweir
230cdf0e10cSrcweir case RTL_TEXTENCODING_JIS_X_0201:
231cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
232cdf0e10cSrcweir || ( nChar >= 0xff61 && nChar <= 0xff9f );
233cdf0e10cSrcweir break;
234cdf0e10cSrcweir
235cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1251:
236cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
237cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00bb )
238cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x045f )
239cdf0e10cSrcweir || ( nChar >= 0x0490 && nChar <= 0x0491 )
240cdf0e10cSrcweir || ( nChar >= 0x2013 && nChar <= 0x203a )
241cdf0e10cSrcweir || ( nChar >= 0x2116 && nChar <= 0x2122 );
242cdf0e10cSrcweir break;
243cdf0e10cSrcweir
244cdf0e10cSrcweir case RTL_TEXTENCODING_KOI8_R:
245cdf0e10cSrcweir bMatch = ( nChar >= 0x0020 && nChar <= 0x007e )
246cdf0e10cSrcweir || ( nChar >= 0x00a0 && nChar <= 0x00b7 )
247cdf0e10cSrcweir || ( nChar == 0x00f7 )
248cdf0e10cSrcweir || ( nChar >= 0x0401 && nChar <= 0x0451 )
249cdf0e10cSrcweir || ( nChar >= 0x2219 && nChar <= 0x221a )
250cdf0e10cSrcweir || ( nChar >= 0x2248 && nChar <= 0x2265 )
251cdf0e10cSrcweir || ( nChar >= 0x2320 && nChar <= 0x2321 )
252cdf0e10cSrcweir || ( nChar >= 0x2500 && nChar <= 0x25a0 );
253cdf0e10cSrcweir break;
254cdf0e10cSrcweir
255cdf0e10cSrcweir case RTL_TEXTENCODING_UNICODE:
256cdf0e10cSrcweir bMatch = True;
257cdf0e10cSrcweir break;
258cdf0e10cSrcweir
259cdf0e10cSrcweir case RTL_TEXTENCODING_EUC_KR:
260cdf0e10cSrcweir case RTL_TEXTENCODING_BIG5:
261cdf0e10cSrcweir case RTL_TEXTENCODING_GBK:
262cdf0e10cSrcweir case RTL_TEXTENCODING_GB_2312:
263cdf0e10cSrcweir case RTL_TEXTENCODING_MS_1361:
264cdf0e10cSrcweir case RTL_TEXTENCODING_JIS_X_0208:
265cdf0e10cSrcweir
266cdf0e10cSrcweir // XXX Big5 and Korean EUC contain Ascii chars, but Solaris
267cdf0e10cSrcweir // *-big5-1 and *-ksc5601.1992-3 fonts dont, in general CJK fonts
268cdf0e10cSrcweir // are monospaced, so dont trust them for latin chars
269cdf0e10cSrcweir if (nChar <= 0xFF)
270cdf0e10cSrcweir {
271cdf0e10cSrcweir bMatch = False;
272cdf0e10cSrcweir break;
273cdf0e10cSrcweir }
274cdf0e10cSrcweir
275cdf0e10cSrcweir default:
276cdf0e10cSrcweir // XXX really convert the unicode char into the encoding
277cdf0e10cSrcweir // and check for conversion errors, this is expensive !
278cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter;
279cdf0e10cSrcweir rtl_UnicodeToTextContext aContext;
280cdf0e10cSrcweir
281cdf0e10cSrcweir aConverter = GetU2TConverter(nEncoding);
282cdf0e10cSrcweir aContext = rtl_createUnicodeToTextContext( aConverter );
283cdf0e10cSrcweir
284cdf0e10cSrcweir // ---> FIXME
285cdf0e10cSrcweir if ( aConverter == NULL )
286cdf0e10cSrcweir return False;
287cdf0e10cSrcweir // <---
288cdf0e10cSrcweir
289cdf0e10cSrcweir sal_Char pConversionBuffer[ 32 ];
290cdf0e10cSrcweir sal_uInt32 nConversionInfo;
291cdf0e10cSrcweir sal_Size nConvertedChars;
292cdf0e10cSrcweir sal_Size nSize;
293cdf0e10cSrcweir
294cdf0e10cSrcweir nSize = rtl_convertUnicodeToText( aConverter, aContext,
295cdf0e10cSrcweir &nChar, 1, pConversionBuffer, sizeof(pConversionBuffer),
296cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
297cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR,
298cdf0e10cSrcweir &nConversionInfo, &nConvertedChars );
299cdf0e10cSrcweir
300cdf0e10cSrcweir rtl_destroyUnicodeToTextContext( aConverter, aContext );
301cdf0e10cSrcweir
302cdf0e10cSrcweir bMatch = (nConvertedChars == 1)
303cdf0e10cSrcweir && (nSize == 1 || nSize == 2) // XXX Fix me this is a hack
304cdf0e10cSrcweir && ((nConversionInfo & RTL_UNICODETOTEXT_INFO_ERROR) == 0);
305cdf0e10cSrcweir break;
306cdf0e10cSrcweir }
307cdf0e10cSrcweir
308cdf0e10cSrcweir return bMatch;
309cdf0e10cSrcweir }
310cdf0e10cSrcweir
311cdf0e10cSrcweir // wrapper for rtl_convertUnicodeToText that handles the usual cases for
312cdf0e10cSrcweir // textconversion in drawtext and gettextwidth routines
313cdf0e10cSrcweir sal_Size
ConvertStringUTF16(const sal_Unicode * pText,int nTextLen,sal_Char * pBuffer,sal_Size nBufferSize,rtl_TextEncoding nEncoding)314cdf0e10cSrcweir SalConverterCache::ConvertStringUTF16( const sal_Unicode *pText, int nTextLen,
315cdf0e10cSrcweir sal_Char *pBuffer, sal_Size nBufferSize, rtl_TextEncoding nEncoding )
316cdf0e10cSrcweir {
317cdf0e10cSrcweir rtl_UnicodeToTextConverter aConverter = GetU2TConverter(nEncoding);
318cdf0e10cSrcweir
319cdf0e10cSrcweir const sal_uInt32 nCvtFlags =
320cdf0e10cSrcweir RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
321cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK
322cdf0e10cSrcweir | RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK ;
323cdf0e10cSrcweir sal_uInt32 nCvtInfo;
324cdf0e10cSrcweir sal_Size nCvtChars;
325cdf0e10cSrcweir
326cdf0e10cSrcweir rtl_UnicodeToTextContext aContext =
327cdf0e10cSrcweir rtl_createUnicodeToTextContext( aConverter );
328cdf0e10cSrcweir
329cdf0e10cSrcweir sal_Size nSize = rtl_convertUnicodeToText( aConverter, aContext,
330cdf0e10cSrcweir pText, nTextLen, pBuffer, nBufferSize,
331cdf0e10cSrcweir nCvtFlags, &nCvtInfo, &nCvtChars );
332cdf0e10cSrcweir
333cdf0e10cSrcweir rtl_destroyUnicodeToTextContext( aConverter, aContext );
334cdf0e10cSrcweir
335cdf0e10cSrcweir return nSize;
336cdf0e10cSrcweir }
337cdf0e10cSrcweir
338