xref: /aoo42x/main/sal/textenc/tenchelp.c (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir #include "tenchelp.h"
29*cdf0e10cSrcweir #include "unichars.h"
30*cdf0e10cSrcweir #include "rtl/textcvt.h"
31*cdf0e10cSrcweir #include "sal/types.h"
32*cdf0e10cSrcweir 
33*cdf0e10cSrcweir static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
34*cdf0e10cSrcweir                                                sal_Char * pBuf,
35*cdf0e10cSrcweir                                                sal_Size nMaxLen);
36*cdf0e10cSrcweir 
37*cdf0e10cSrcweir static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
38*cdf0e10cSrcweir                                              sal_Char * pBuf,
39*cdf0e10cSrcweir                                              sal_Size nMaxLen);
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
42*cdf0e10cSrcweir 
43*cdf0e10cSrcweir sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
44*cdf0e10cSrcweir                                         sal_Char * pBuf,
45*cdf0e10cSrcweir                                         sal_Size nMaxLen)
46*cdf0e10cSrcweir {
47*cdf0e10cSrcweir     if (nMaxLen == 0)
48*cdf0e10cSrcweir         return sal_False;
49*cdf0e10cSrcweir     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
50*cdf0e10cSrcweir     {
51*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
52*cdf0e10cSrcweir         *pBuf = 0x00;
53*cdf0e10cSrcweir         break;
54*cdf0e10cSrcweir 
55*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
56*cdf0e10cSrcweir     default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
57*cdf0e10cSrcweir         *pBuf = 0x3F;
58*cdf0e10cSrcweir         break;
59*cdf0e10cSrcweir 
60*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
61*cdf0e10cSrcweir         *pBuf = 0x5F;
62*cdf0e10cSrcweir         break;
63*cdf0e10cSrcweir     }
64*cdf0e10cSrcweir     return sal_True;
65*cdf0e10cSrcweir }
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
68*cdf0e10cSrcweir                                       sal_Char * pBuf,
69*cdf0e10cSrcweir                                       sal_Size nMaxLen)
70*cdf0e10cSrcweir {
71*cdf0e10cSrcweir     if (nMaxLen == 0)
72*cdf0e10cSrcweir         return sal_False;
73*cdf0e10cSrcweir     switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
74*cdf0e10cSrcweir     {
75*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
76*cdf0e10cSrcweir         *pBuf = 0x00;
77*cdf0e10cSrcweir         break;
78*cdf0e10cSrcweir 
79*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
80*cdf0e10cSrcweir     default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
81*cdf0e10cSrcweir         *pBuf = 0x3F;
82*cdf0e10cSrcweir         break;
83*cdf0e10cSrcweir 
84*cdf0e10cSrcweir     case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
85*cdf0e10cSrcweir         *pBuf = 0x5F;
86*cdf0e10cSrcweir         break;
87*cdf0e10cSrcweir     }
88*cdf0e10cSrcweir     return sal_True;
89*cdf0e10cSrcweir }
90*cdf0e10cSrcweir 
91*cdf0e10cSrcweir int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
92*cdf0e10cSrcweir {
93*cdf0e10cSrcweir     return
94*cdf0e10cSrcweir         ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
95*cdf0e10cSrcweir          && ImplIsZeroWidth(c))
96*cdf0e10cSrcweir         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
97*cdf0e10cSrcweir             && ImplIsControlOrFormat(c))
98*cdf0e10cSrcweir         || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
99*cdf0e10cSrcweir             && ImplIsPrivateUse(c));
100*cdf0e10cSrcweir }
101*cdf0e10cSrcweir 
102*cdf0e10cSrcweir /* ======================================================================= */
103*cdf0e10cSrcweir 
104*cdf0e10cSrcweir sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
105*cdf0e10cSrcweir {
106*cdf0e10cSrcweir     return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
107*cdf0e10cSrcweir                    == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
108*cdf0e10cSrcweir                RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
109*cdf0e10cSrcweir                RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
110*cdf0e10cSrcweir }
111*cdf0e10cSrcweir 
112*cdf0e10cSrcweir /* ----------------------------------------------------------------------- */
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir sal_Bool
115*cdf0e10cSrcweir ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
116*cdf0e10cSrcweir                                      sal_Unicode const ** ppSrcBuf,
117*cdf0e10cSrcweir                                      sal_Unicode const * pEndSrcBuf,
118*cdf0e10cSrcweir                                      sal_Char ** ppDestBuf,
119*cdf0e10cSrcweir                                      sal_Char const * pEndDestBuf,
120*cdf0e10cSrcweir                                      sal_uInt32 nFlags,
121*cdf0e10cSrcweir                                      sal_uInt32 * pInfo)
122*cdf0e10cSrcweir {
123*cdf0e10cSrcweir     sal_Unicode c = **ppSrcBuf;
124*cdf0e10cSrcweir 
125*cdf0e10cSrcweir     (void) pData; /* unused */
126*cdf0e10cSrcweir 
127*cdf0e10cSrcweir     /* Should the private character map to one byte */
128*cdf0e10cSrcweir     if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
129*cdf0e10cSrcweir     {
130*cdf0e10cSrcweir         if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
131*cdf0e10cSrcweir         {
132*cdf0e10cSrcweir             **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
133*cdf0e10cSrcweir             (*ppDestBuf)++;
134*cdf0e10cSrcweir             (*ppSrcBuf)++;
135*cdf0e10cSrcweir             return sal_True;
136*cdf0e10cSrcweir         }
137*cdf0e10cSrcweir     }
138*cdf0e10cSrcweir 
139*cdf0e10cSrcweir     /* Should this character ignored (Private, Non Spacing, Control) */
140*cdf0e10cSrcweir     if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
141*cdf0e10cSrcweir     {
142*cdf0e10cSrcweir         (*ppSrcBuf)++;
143*cdf0e10cSrcweir         return sal_True;
144*cdf0e10cSrcweir     }
145*cdf0e10cSrcweir 
146*cdf0e10cSrcweir     /* Surrogates Characters should result in */
147*cdf0e10cSrcweir     /* one replacement character */
148*cdf0e10cSrcweir     if (ImplIsHighSurrogate(c))
149*cdf0e10cSrcweir     {
150*cdf0e10cSrcweir         if ( *ppSrcBuf == pEndSrcBuf )
151*cdf0e10cSrcweir         {
152*cdf0e10cSrcweir             *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
153*cdf0e10cSrcweir             return sal_False;
154*cdf0e10cSrcweir         }
155*cdf0e10cSrcweir 
156*cdf0e10cSrcweir         c = *((*ppSrcBuf)+1);
157*cdf0e10cSrcweir         if (ImplIsLowSurrogate(c))
158*cdf0e10cSrcweir             (*ppSrcBuf)++;
159*cdf0e10cSrcweir         else
160*cdf0e10cSrcweir         {
161*cdf0e10cSrcweir             *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
162*cdf0e10cSrcweir             if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
163*cdf0e10cSrcweir             {
164*cdf0e10cSrcweir                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
165*cdf0e10cSrcweir                 return sal_False;
166*cdf0e10cSrcweir             }
167*cdf0e10cSrcweir             else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
168*cdf0e10cSrcweir             {
169*cdf0e10cSrcweir                 (*ppSrcBuf)++;
170*cdf0e10cSrcweir                 return sal_True;
171*cdf0e10cSrcweir             }
172*cdf0e10cSrcweir             else if (ImplGetInvalidAsciiMultiByte(nFlags,
173*cdf0e10cSrcweir                                                   *ppDestBuf,
174*cdf0e10cSrcweir                                                   pEndDestBuf - *ppDestBuf))
175*cdf0e10cSrcweir             {
176*cdf0e10cSrcweir                 ++*ppSrcBuf;
177*cdf0e10cSrcweir                 ++*ppDestBuf;
178*cdf0e10cSrcweir                 return sal_True;
179*cdf0e10cSrcweir             }
180*cdf0e10cSrcweir             else
181*cdf0e10cSrcweir             {
182*cdf0e10cSrcweir                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
183*cdf0e10cSrcweir                               | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
184*cdf0e10cSrcweir                 return sal_False;
185*cdf0e10cSrcweir             }
186*cdf0e10cSrcweir         }
187*cdf0e10cSrcweir     }
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir     *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
190*cdf0e10cSrcweir     if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
191*cdf0e10cSrcweir     {
192*cdf0e10cSrcweir         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
193*cdf0e10cSrcweir         return sal_False;
194*cdf0e10cSrcweir     }
195*cdf0e10cSrcweir     else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
196*cdf0e10cSrcweir         (*ppSrcBuf)++;
197*cdf0e10cSrcweir     else if (ImplGetUndefinedAsciiMultiByte(nFlags,
198*cdf0e10cSrcweir                                             *ppDestBuf,
199*cdf0e10cSrcweir                                             pEndDestBuf - *ppDestBuf))
200*cdf0e10cSrcweir     {
201*cdf0e10cSrcweir         ++*ppSrcBuf;
202*cdf0e10cSrcweir         ++*ppDestBuf;
203*cdf0e10cSrcweir     }
204*cdf0e10cSrcweir     else
205*cdf0e10cSrcweir     {
206*cdf0e10cSrcweir         *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
207*cdf0e10cSrcweir                       | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
208*cdf0e10cSrcweir         return sal_False;
209*cdf0e10cSrcweir     }
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir     return sal_True;
212*cdf0e10cSrcweir }
213*cdf0e10cSrcweir 
214