1/*************************************************************************
2 *
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
6 *
7 * OpenOffice.org - a multi-platform office productivity suite
8 *
9 * This file is part of OpenOffice.org.
10 *
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
14 *
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
20 *
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org.  If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
25 *
26 ************************************************************************/
27
28#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
29#define __com_sun_star_i18n_XCharacterClassification_idl__
30
31#include <com/sun/star/i18n/ParseResult.idl>
32
33#ifndef __com_sun_star_lang_Locale_idl__
34#include <com/sun/star/lang/Locale.idl>
35#endif
36#ifndef __com_sun_star_uno_XInterface_idl__
37#include <com/sun/star/uno/XInterface.idl>
38#endif
39
40//============================================================================
41
42module com { module sun { module star { module i18n {
43
44//============================================================================
45
46/*
47
48Possible tokens to be parsed with  parse...Token():
49
50UPASCALPHA=[A-Z]
51LOASCALPHA=[a-z]
52ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
53ASCDIGIT=[0-9]
54ASC_UNDERSCORE='_'
55ASC_SPACE=' '
56ASC_HT='\0x9'
57ASC_VT='\0xb'
58ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
59ASC_DBL_QUOTE=\";
60ASC_QUOTE=\'
61UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
62
63ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
64ALNUM=ALPHA|DIGIT
65CHAR=anycharacter
66WS=isWhiteSpace()
67SIGN='+'|'-'
68DECSEP=<locale dependent decimal separator>
69GRPSEP=<locale dependent thousand separator>
70EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
71
72IDENTIFIER=ALPHA *ALNUM
73UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
74ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
75ANY_NAME=1*(ALNUM|DEFCHARS)
76SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
77DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
78ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
79NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
80
81*/
82
83//============================================================================
84
85/**
86    Character classification (upper, lower, digit, letter, number, ...)
87    and generic Unicode enabled parser.
88 */
89
90published interface XCharacterClassification : com::sun::star::uno::XInterface
91{
92    //------------------------------------------------------------------------
93    /** Convert lower case alpha to upper case alpha, starting at
94        position <em>nPos</em> for <em>nCount</em> code points.
95     */
96    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
97                      [in] com::sun::star::lang::Locale aLocale );
98
99    //------------------------------------------------------------------------
100    /** Convert upper case alpha to lower case alpha, starting at
101        position <em>nPos</em> for <em>nCount</em> code points.
102     */
103    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
104                      [in] com::sun::star::lang::Locale aLocale );
105
106    //------------------------------------------------------------------------
107    /** Convert to title case, starting at
108        position <em>nPos</em> for <em>nCount</em> code points.
109     */
110    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
111                      [in] com::sun::star::lang::Locale aLocale );
112
113    //------------------------------------------------------------------------
114    /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
115    short    getType( [in] string aText, [in] long nPos );
116
117    //------------------------------------------------------------------------
118    /** Get <type>DirectionProperty</type> of character at position
119        <em>nPos</em>.
120     */
121    short    getCharacterDirection( [in] string aText, [in] long nPos );
122
123    //------------------------------------------------------------------------
124    /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
125    short    getScript( [in] string aText, [in] long nPos );
126
127    //------------------------------------------------------------------------
128    /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
129    long getCharacterType( [in] string aText, [in] long nPos,
130                           [in] com::sun::star::lang::Locale aLocale );
131
132    //------------------------------------------------------------------------
133    /** Get accumulated <type>KCharacterType</type>s of string starting
134        at position <em>nPos</em> of length <em>nCount</em> code points.
135
136        @returns
137            A number with appropriate flags set to indicate what type of
138            characters the string contains, each flag value being one of
139            KCharacterType values.
140    */
141    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
142                        [in] com::sun::star::lang::Locale aLocale );
143
144
145    //------------------------------------------------------------------------
146    /**
147        Parse a string for a token starting at position <em>nPos</em>.
148
149        <p> A name or identifier must match the
150        <type>KParseTokens</type> criteria passed in
151        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
152        additionally contain characters of
153        <em>aUserDefinedCharactersStart</em> and/or
154        <em>aUserDefinedCharactersCont</em>. </p>
155
156
157        @returns
158            A filled <type>ParseResult</type> structure. If no
159            unambigous token could be parsed,
160            <member>ParseResult::TokenType</member> will be set to
161            <b>0</b> (zero), other fields will contain the values parsed
162            so far.
163
164            <p> If a token may represent either a numeric value or a
165            name according to the passed Start/Cont-Flags/Chars, both
166            <const>KParseType::ASC_NUM</const> (or
167            <const>KParseType::UNI_NUM</const>) and
168            <const>KParseType::IDENTNAME</const> are set in
169            <member>ParseResult::TokenType</member>.
170
171        @param  aText
172            Text to be parsed.
173
174        @param  nPos
175            Position where parsing starts.
176
177        @param  aLocale
178            The locale, for example, for decimal and group separator or
179            character type determination.
180
181        @param  nStartCharFlags
182            A set of <type>KParseTokens</type> constants determining the
183            allowed characters a name or identifier may start with.
184
185        @param  aUserDefinedCharactersStart
186            A set of additionally allowed characters a name or
187            identifier may start with.
188
189        @param  nContCharFlags
190            A set of <type>KParseTokens</type> constants determining the
191            allowed characters a name or identifier may continue with.
192
193        @param  aUserDefinedCharactersCont
194            A set of additionally allowed characters a name or
195            identifier may continue with.
196
197        @example:C++
198        <listing>
199            using namespace ::com::sun::star::i18n;
200            // First character of an identifier may be any alphabetic or underscore.
201            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
202            // Continuing characters may be any alphanumeric or underscore or dot.
203            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
204            // No further characters assumed to be contained in an identifier
205            String aEmptyString;
206            // Parse any token.
207            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
208                nStartFlags, aEmptyString, nContFlags, aEmptyString );
209            // Get parsed token.
210            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
211                fValue = rRes.Value;
212            if ( rRes.TokenType & KParseType::IDENTNAME )
213                aName = aText.Copy( nPos, rRes.EndPos - nPos );
214            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
215                aName = rRes.DequotedNameOrString;
216            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
217                aString = rRes.DequotedNameOrString;
218            else if ( rRes.TokenType & KParseType::BOOLEAN )
219                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
220            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
221                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
222        </listing>
223     */
224
225    ParseResult parseAnyToken(
226                            [in] string aText,
227                            [in] long nPos,
228                            [in] com::sun::star::lang::Locale aLocale,
229                            [in] long nStartCharFlags,
230                            [in] string aUserDefinedCharactersStart,
231                            [in] long nContCharFlags,
232                            [in] string aUserDefinedCharactersCont
233                            );
234
235    //------------------------------------------------------------------------
236    /**
237        Parse a string for a token of type <em>nTokenType</em> starting
238        at position <em>nPos</em>.
239
240        <p> Other parameters are the same as in
241        <member>parseAnyToken</member>. If the actual token does not
242        match the passed <em>nTokenType</em> a
243        <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
244        is returned. </p>
245
246        @param  nTokenType
247            One or more of the <type>KParseType</type> constants.
248
249        @example:C++
250        <listing>
251            // Determine if a given name is a valid name (not quoted) and contains
252            // only allowed characters.
253            using namespace ::com::sun::star::i18n;
254            // First character of an identifier may be any alphanumeric or underscore.
255            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
256            // No further characters assumed to be contained in an identifier start.
257            String aEmptyString;
258            // Continuing characters may be any alphanumeric or underscore.
259            sal_Int32 nContFlags = nStartFlags;
260            // Additionally, continuing characters may contain a blank.
261            String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
262            // Parse predefined (must be an IDENTNAME) token.
263            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
264                nStartFlags, aEmptyString, nContFlags, aContChars );
265            // Test if it is an identifier name and if it only is one
266            // and no more else is following it.
267            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
268        </listing>
269     */
270
271    ParseResult parsePredefinedToken(
272                            [in] long nTokenType,
273                            [in] string aText,
274                            [in] long nPos,
275                            [in] com::sun::star::lang::Locale aLocale,
276                            [in] long nStartCharFlags,
277                            [in] string aUserDefinedCharactersStart,
278                            [in] long nContCharFlags,
279                            [in] string aUserDefinedCharactersCont
280                            );
281};
282
283//=============================================================================
284}; }; }; };
285
286#endif
287