1*b1cdbd2cSJim Jagielski/**************************************************************
2*b1cdbd2cSJim Jagielski *
3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one
4*b1cdbd2cSJim Jagielski * or more contributor license agreements.  See the NOTICE file
5*b1cdbd2cSJim Jagielski * distributed with this work for additional information
6*b1cdbd2cSJim Jagielski * regarding copyright ownership.  The ASF licenses this file
7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the
8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance
9*b1cdbd2cSJim Jagielski * with the License.  You may obtain a copy of the License at
10*b1cdbd2cSJim Jagielski *
11*b1cdbd2cSJim Jagielski *   http://www.apache.org/licenses/LICENSE-2.0
12*b1cdbd2cSJim Jagielski *
13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing,
14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an
15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*b1cdbd2cSJim Jagielski * KIND, either express or implied.  See the License for the
17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations
18*b1cdbd2cSJim Jagielski * under the License.
19*b1cdbd2cSJim Jagielski *
20*b1cdbd2cSJim Jagielski *************************************************************/
21*b1cdbd2cSJim Jagielski
22*b1cdbd2cSJim Jagielski
23*b1cdbd2cSJim Jagielski
24*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_i18n_XCharacterClassification_idl__
25*b1cdbd2cSJim Jagielski#define __com_sun_star_i18n_XCharacterClassification_idl__
26*b1cdbd2cSJim Jagielski
27*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/ParseResult.idl>
28*b1cdbd2cSJim Jagielski
29*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_lang_Locale_idl__
30*b1cdbd2cSJim Jagielski#include <com/sun/star/lang/Locale.idl>
31*b1cdbd2cSJim Jagielski#endif
32*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_uno_XInterface_idl__
33*b1cdbd2cSJim Jagielski#include <com/sun/star/uno/XInterface.idl>
34*b1cdbd2cSJim Jagielski#endif
35*b1cdbd2cSJim Jagielski
36*b1cdbd2cSJim Jagielski//============================================================================
37*b1cdbd2cSJim Jagielski
38*b1cdbd2cSJim Jagielskimodule com { module sun { module star { module i18n {
39*b1cdbd2cSJim Jagielski
40*b1cdbd2cSJim Jagielski//============================================================================
41*b1cdbd2cSJim Jagielski
42*b1cdbd2cSJim Jagielski/*
43*b1cdbd2cSJim Jagielski
44*b1cdbd2cSJim JagielskiPossible tokens to be parsed with  parse...Token():
45*b1cdbd2cSJim Jagielski
46*b1cdbd2cSJim JagielskiUPASCALPHA=[A-Z]
47*b1cdbd2cSJim JagielskiLOASCALPHA=[a-z]
48*b1cdbd2cSJim JagielskiASCALPHA=1*(UPASCALPHA|LOASCALPHA)
49*b1cdbd2cSJim JagielskiASCDIGIT=[0-9]
50*b1cdbd2cSJim JagielskiASC_UNDERSCORE='_'
51*b1cdbd2cSJim JagielskiASC_SPACE=' '
52*b1cdbd2cSJim JagielskiASC_HT='\0x9'
53*b1cdbd2cSJim JagielskiASC_VT='\0xb'
54*b1cdbd2cSJim JagielskiASC_WS=ASC_SPACE|ASC_HT|ASC_VT
55*b1cdbd2cSJim JagielskiASC_DBL_QUOTE=\";
56*b1cdbd2cSJim JagielskiASC_QUOTE=\'
57*b1cdbd2cSJim JagielskiUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
58*b1cdbd2cSJim Jagielski
59*b1cdbd2cSJim JagielskiALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
60*b1cdbd2cSJim JagielskiALNUM=ALPHA|DIGIT
61*b1cdbd2cSJim JagielskiCHAR=anycharacter
62*b1cdbd2cSJim JagielskiWS=isWhiteSpace()
63*b1cdbd2cSJim JagielskiSIGN='+'|'-'
64*b1cdbd2cSJim JagielskiDECSEP=<locale dependent decimal separator>
65*b1cdbd2cSJim JagielskiGRPSEP=<locale dependent thousand separator>
66*b1cdbd2cSJim JagielskiEXPONENT=(E|e)[SIGN]1*ASC_DIGIT
67*b1cdbd2cSJim Jagielski
68*b1cdbd2cSJim JagielskiIDENTIFIER=ALPHA *ALNUM
69*b1cdbd2cSJim JagielskiUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
70*b1cdbd2cSJim JagielskiALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
71*b1cdbd2cSJim JagielskiANY_NAME=1*(ALNUM|DEFCHARS)
72*b1cdbd2cSJim JagielskiSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
73*b1cdbd2cSJim JagielskiDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
74*b1cdbd2cSJim JagielskiASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
75*b1cdbd2cSJim JagielskiNUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
76*b1cdbd2cSJim Jagielski
77*b1cdbd2cSJim Jagielski*/
78*b1cdbd2cSJim Jagielski
79*b1cdbd2cSJim Jagielski//============================================================================
80*b1cdbd2cSJim Jagielski
81*b1cdbd2cSJim Jagielski/**
82*b1cdbd2cSJim Jagielski    Character classification (upper, lower, digit, letter, number, ...)
83*b1cdbd2cSJim Jagielski    and generic Unicode enabled parser.
84*b1cdbd2cSJim Jagielski */
85*b1cdbd2cSJim Jagielski
86*b1cdbd2cSJim Jagielskipublished interface XCharacterClassification : com::sun::star::uno::XInterface
87*b1cdbd2cSJim Jagielski{
88*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
89*b1cdbd2cSJim Jagielski    /** Convert lower case alpha to upper case alpha, starting at
90*b1cdbd2cSJim Jagielski        position <em>nPos</em> for <em>nCount</em> code points.
91*b1cdbd2cSJim Jagielski     */
92*b1cdbd2cSJim Jagielski    string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
93*b1cdbd2cSJim Jagielski                      [in] com::sun::star::lang::Locale aLocale );
94*b1cdbd2cSJim Jagielski
95*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
96*b1cdbd2cSJim Jagielski    /** Convert upper case alpha to lower case alpha, starting at
97*b1cdbd2cSJim Jagielski        position <em>nPos</em> for <em>nCount</em> code points.
98*b1cdbd2cSJim Jagielski     */
99*b1cdbd2cSJim Jagielski    string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
100*b1cdbd2cSJim Jagielski                      [in] com::sun::star::lang::Locale aLocale );
101*b1cdbd2cSJim Jagielski
102*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
103*b1cdbd2cSJim Jagielski    /** Convert to title case, starting at
104*b1cdbd2cSJim Jagielski        position <em>nPos</em> for <em>nCount</em> code points.
105*b1cdbd2cSJim Jagielski     */
106*b1cdbd2cSJim Jagielski    string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
107*b1cdbd2cSJim Jagielski                      [in] com::sun::star::lang::Locale aLocale );
108*b1cdbd2cSJim Jagielski
109*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
110*b1cdbd2cSJim Jagielski    /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
111*b1cdbd2cSJim Jagielski    short    getType( [in] string aText, [in] long nPos );
112*b1cdbd2cSJim Jagielski
113*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
114*b1cdbd2cSJim Jagielski    /** Get <type>DirectionProperty</type> of character at position
115*b1cdbd2cSJim Jagielski        <em>nPos</em>.
116*b1cdbd2cSJim Jagielski     */
117*b1cdbd2cSJim Jagielski    short    getCharacterDirection( [in] string aText, [in] long nPos );
118*b1cdbd2cSJim Jagielski
119*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
120*b1cdbd2cSJim Jagielski    /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
121*b1cdbd2cSJim Jagielski    short    getScript( [in] string aText, [in] long nPos );
122*b1cdbd2cSJim Jagielski
123*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
124*b1cdbd2cSJim Jagielski    /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
125*b1cdbd2cSJim Jagielski    long getCharacterType( [in] string aText, [in] long nPos,
126*b1cdbd2cSJim Jagielski                           [in] com::sun::star::lang::Locale aLocale );
127*b1cdbd2cSJim Jagielski
128*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
129*b1cdbd2cSJim Jagielski    /** Get accumulated <type>KCharacterType</type>s of string starting
130*b1cdbd2cSJim Jagielski        at position <em>nPos</em> of length <em>nCount</em> code points.
131*b1cdbd2cSJim Jagielski
132*b1cdbd2cSJim Jagielski        @returns
133*b1cdbd2cSJim Jagielski            A number with appropriate flags set to indicate what type of
134*b1cdbd2cSJim Jagielski            characters the string contains, each flag value being one of
135*b1cdbd2cSJim Jagielski            KCharacterType values.
136*b1cdbd2cSJim Jagielski    */
137*b1cdbd2cSJim Jagielski    long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
138*b1cdbd2cSJim Jagielski                        [in] com::sun::star::lang::Locale aLocale );
139*b1cdbd2cSJim Jagielski
140*b1cdbd2cSJim Jagielski
141*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
142*b1cdbd2cSJim Jagielski    /**
143*b1cdbd2cSJim Jagielski        Parse a string for a token starting at position <em>nPos</em>.
144*b1cdbd2cSJim Jagielski
145*b1cdbd2cSJim Jagielski        <p> A name or identifier must match the
146*b1cdbd2cSJim Jagielski        <type>KParseTokens</type> criteria passed in
147*b1cdbd2cSJim Jagielski        <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
148*b1cdbd2cSJim Jagielski        additionally contain characters of
149*b1cdbd2cSJim Jagielski        <em>aUserDefinedCharactersStart</em> and/or
150*b1cdbd2cSJim Jagielski        <em>aUserDefinedCharactersCont</em>. </p>
151*b1cdbd2cSJim Jagielski
152*b1cdbd2cSJim Jagielski
153*b1cdbd2cSJim Jagielski        @returns
154*b1cdbd2cSJim Jagielski            A filled <type>ParseResult</type> structure. If no
155*b1cdbd2cSJim Jagielski            unambigous token could be parsed,
156*b1cdbd2cSJim Jagielski            <member>ParseResult::TokenType</member> will be set to
157*b1cdbd2cSJim Jagielski            <b>0</b> (zero), other fields will contain the values parsed
158*b1cdbd2cSJim Jagielski            so far.
159*b1cdbd2cSJim Jagielski
160*b1cdbd2cSJim Jagielski            <p> If a token may represent either a numeric value or a
161*b1cdbd2cSJim Jagielski            name according to the passed Start/Cont-Flags/Chars, both
162*b1cdbd2cSJim Jagielski            <const>KParseType::ASC_NUM</const> (or
163*b1cdbd2cSJim Jagielski            <const>KParseType::UNI_NUM</const>) and
164*b1cdbd2cSJim Jagielski            <const>KParseType::IDENTNAME</const> are set in
165*b1cdbd2cSJim Jagielski            <member>ParseResult::TokenType</member>.
166*b1cdbd2cSJim Jagielski
167*b1cdbd2cSJim Jagielski        @param  aText
168*b1cdbd2cSJim Jagielski            Text to be parsed.
169*b1cdbd2cSJim Jagielski
170*b1cdbd2cSJim Jagielski        @param  nPos
171*b1cdbd2cSJim Jagielski            Position where parsing starts.
172*b1cdbd2cSJim Jagielski
173*b1cdbd2cSJim Jagielski        @param  aLocale
174*b1cdbd2cSJim Jagielski            The locale, for example, for decimal and group separator or
175*b1cdbd2cSJim Jagielski            character type determination.
176*b1cdbd2cSJim Jagielski
177*b1cdbd2cSJim Jagielski        @param  nStartCharFlags
178*b1cdbd2cSJim Jagielski            A set of <type>KParseTokens</type> constants determining the
179*b1cdbd2cSJim Jagielski            allowed characters a name or identifier may start with.
180*b1cdbd2cSJim Jagielski
181*b1cdbd2cSJim Jagielski        @param  aUserDefinedCharactersStart
182*b1cdbd2cSJim Jagielski            A set of additionally allowed characters a name or
183*b1cdbd2cSJim Jagielski            identifier may start with.
184*b1cdbd2cSJim Jagielski
185*b1cdbd2cSJim Jagielski        @param  nContCharFlags
186*b1cdbd2cSJim Jagielski            A set of <type>KParseTokens</type> constants determining the
187*b1cdbd2cSJim Jagielski            allowed characters a name or identifier may continue with.
188*b1cdbd2cSJim Jagielski
189*b1cdbd2cSJim Jagielski        @param  aUserDefinedCharactersCont
190*b1cdbd2cSJim Jagielski            A set of additionally allowed characters a name or
191*b1cdbd2cSJim Jagielski            identifier may continue with.
192*b1cdbd2cSJim Jagielski
193*b1cdbd2cSJim Jagielski        @example:C++
194*b1cdbd2cSJim Jagielski        <listing>
195*b1cdbd2cSJim Jagielski            using namespace ::com::sun::star::i18n;
196*b1cdbd2cSJim Jagielski            // First character of an identifier may be any alphabetic or underscore.
197*b1cdbd2cSJim Jagielski            sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
198*b1cdbd2cSJim Jagielski            // Continuing characters may be any alphanumeric or underscore or dot.
199*b1cdbd2cSJim Jagielski            sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
200*b1cdbd2cSJim Jagielski            // No further characters assumed to be contained in an identifier
201*b1cdbd2cSJim Jagielski            String aEmptyString;
202*b1cdbd2cSJim Jagielski            // Parse any token.
203*b1cdbd2cSJim Jagielski            ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
204*b1cdbd2cSJim Jagielski                nStartFlags, aEmptyString, nContFlags, aEmptyString );
205*b1cdbd2cSJim Jagielski            // Get parsed token.
206*b1cdbd2cSJim Jagielski            if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
207*b1cdbd2cSJim Jagielski                fValue = rRes.Value;
208*b1cdbd2cSJim Jagielski            if ( rRes.TokenType & KParseType::IDENTNAME )
209*b1cdbd2cSJim Jagielski                aName = aText.Copy( nPos, rRes.EndPos - nPos );
210*b1cdbd2cSJim Jagielski            else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
211*b1cdbd2cSJim Jagielski                aName = rRes.DequotedNameOrString;
212*b1cdbd2cSJim Jagielski            else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
213*b1cdbd2cSJim Jagielski                aString = rRes.DequotedNameOrString;
214*b1cdbd2cSJim Jagielski            else if ( rRes.TokenType & KParseType::BOOLEAN )
215*b1cdbd2cSJim Jagielski                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
216*b1cdbd2cSJim Jagielski            else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
217*b1cdbd2cSJim Jagielski                aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
218*b1cdbd2cSJim Jagielski        </listing>
219*b1cdbd2cSJim Jagielski     */
220*b1cdbd2cSJim Jagielski
221*b1cdbd2cSJim Jagielski    ParseResult parseAnyToken(
222*b1cdbd2cSJim Jagielski                            [in] string aText,
223*b1cdbd2cSJim Jagielski                            [in] long nPos,
224*b1cdbd2cSJim Jagielski                            [in] com::sun::star::lang::Locale aLocale,
225*b1cdbd2cSJim Jagielski                            [in] long nStartCharFlags,
226*b1cdbd2cSJim Jagielski                            [in] string aUserDefinedCharactersStart,
227*b1cdbd2cSJim Jagielski                            [in] long nContCharFlags,
228*b1cdbd2cSJim Jagielski                            [in] string aUserDefinedCharactersCont
229*b1cdbd2cSJim Jagielski                            );
230*b1cdbd2cSJim Jagielski
231*b1cdbd2cSJim Jagielski    //------------------------------------------------------------------------
232*b1cdbd2cSJim Jagielski    /**
233*b1cdbd2cSJim Jagielski        Parse a string for a token of type <em>nTokenType</em> starting
234*b1cdbd2cSJim Jagielski        at position <em>nPos</em>.
235*b1cdbd2cSJim Jagielski
236*b1cdbd2cSJim Jagielski        <p> Other parameters are the same as in
237*b1cdbd2cSJim Jagielski        <member>parseAnyToken</member>. If the actual token does not
238*b1cdbd2cSJim Jagielski        match the passed <em>nTokenType</em> a
239*b1cdbd2cSJim Jagielski        <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
240*b1cdbd2cSJim Jagielski        is returned. </p>
241*b1cdbd2cSJim Jagielski
242*b1cdbd2cSJim Jagielski        @param  nTokenType
243*b1cdbd2cSJim Jagielski            One or more of the <type>KParseType</type> constants.
244*b1cdbd2cSJim Jagielski
245*b1cdbd2cSJim Jagielski        @example:C++
246*b1cdbd2cSJim Jagielski        <listing>
247*b1cdbd2cSJim Jagielski            // Determine if a given name is a valid name (not quoted) and contains
248*b1cdbd2cSJim Jagielski            // only allowed characters.
249*b1cdbd2cSJim Jagielski            using namespace ::com::sun::star::i18n;
250*b1cdbd2cSJim Jagielski            // First character of an identifier may be any alphanumeric or underscore.
251*b1cdbd2cSJim Jagielski            sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
252*b1cdbd2cSJim Jagielski            // No further characters assumed to be contained in an identifier start.
253*b1cdbd2cSJim Jagielski            String aEmptyString;
254*b1cdbd2cSJim Jagielski            // Continuing characters may be any alphanumeric or underscore.
255*b1cdbd2cSJim Jagielski            sal_Int32 nContFlags = nStartFlags;
256*b1cdbd2cSJim Jagielski            // Additionally, continuing characters may contain a blank.
257*b1cdbd2cSJim Jagielski            String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
258*b1cdbd2cSJim Jagielski            // Parse predefined (must be an IDENTNAME) token.
259*b1cdbd2cSJim Jagielski            ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
260*b1cdbd2cSJim Jagielski                nStartFlags, aEmptyString, nContFlags, aContChars );
261*b1cdbd2cSJim Jagielski            // Test if it is an identifier name and if it only is one
262*b1cdbd2cSJim Jagielski            // and no more else is following it.
263*b1cdbd2cSJim Jagielski            bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
264*b1cdbd2cSJim Jagielski        </listing>
265*b1cdbd2cSJim Jagielski     */
266*b1cdbd2cSJim Jagielski
267*b1cdbd2cSJim Jagielski    ParseResult parsePredefinedToken(
268*b1cdbd2cSJim Jagielski                            [in] long nTokenType,
269*b1cdbd2cSJim Jagielski                            [in] string aText,
270*b1cdbd2cSJim Jagielski                            [in] long nPos,
271*b1cdbd2cSJim Jagielski                            [in] com::sun::star::lang::Locale aLocale,
272*b1cdbd2cSJim Jagielski                            [in] long nStartCharFlags,
273*b1cdbd2cSJim Jagielski                            [in] string aUserDefinedCharactersStart,
274*b1cdbd2cSJim Jagielski                            [in] long nContCharFlags,
275*b1cdbd2cSJim Jagielski                            [in] string aUserDefinedCharactersCont
276*b1cdbd2cSJim Jagielski                            );
277*b1cdbd2cSJim Jagielski};
278*b1cdbd2cSJim Jagielski
279*b1cdbd2cSJim Jagielski//=============================================================================
280*b1cdbd2cSJim Jagielski}; }; }; };
281*b1cdbd2cSJim Jagielski
282*b1cdbd2cSJim Jagielski#endif
283