1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
25 #define __com_sun_star_i18n_XCharacterClassification_idl__
26 
27 #include <com/sun/star/i18n/ParseResult.idl>
28 
29 #ifndef __com_sun_star_lang_Locale_idl__
30 #include <com/sun/star/lang/Locale.idl>
31 #endif
32 #ifndef __com_sun_star_uno_XInterface_idl__
33 #include <com/sun/star/uno/XInterface.idl>
34 #endif
35 
36 //============================================================================
37 
38 module com { module sun { module star { module i18n {
39 
40 //============================================================================
41 
42 /*
43 
44 Possible tokens to be parsed with parse...Token():
45 
46 UPASCALPHA=[A-Z]
47 LOASCALPHA=[a-z]
48 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
49 ASCDIGIT=[0-9]
50 ASC_UNDERSCORE='_'
51 ASC_SPACE=' '
52 ASC_HT='\0x9'
53 ASC_VT='\0xb'
54 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
55 ASC_DBL_QUOTE=\";
56 ASC_QUOTE=\'
57 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
58 
59 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
60 ALNUM=ALPHA|DIGIT
61 CHAR=anycharacter
62 WS=isWhiteSpace()
63 SIGN='+'|'-'
64 DECSEP=<locale dependent decimal separator>
65 GRPSEP=<locale dependent thousand separator>
66 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
67 
68 IDENTIFIER=ALPHA *ALNUM
69 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
70 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
71 ANY_NAME=1*(ALNUM|DEFCHARS)
72 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
73 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
74 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
75 NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
76 
77 */
78 
79 //============================================================================
80 
81 /**
82     Character classification (upper, lower, digit, letter, number, ...)
83     and generic Unicode enabled parser.
84  */
85 
86 published interface XCharacterClassification : com::sun::star::uno::XInterface
87 {
88     //------------------------------------------------------------------------
89     /** Convert lower case alpha to upper case alpha, starting at
90         position <em>nPos</em> for <em>nCount</em> code points.
91      */
92     string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
93                       [in] com::sun::star::lang::Locale aLocale );
94 
95     //------------------------------------------------------------------------
96     /** Convert upper case alpha to lower case alpha, starting at
97         position <em>nPos</em> for <em>nCount</em> code points.
98      */
99     string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
100                       [in] com::sun::star::lang::Locale aLocale );
101 
102     //------------------------------------------------------------------------
103     /** Convert to title case, starting at
104         position <em>nPos</em> for <em>nCount</em> code points.
105      */
106     string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
107                       [in] com::sun::star::lang::Locale aLocale );
108 
109     //------------------------------------------------------------------------
110     /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
111     short    getType( [in] string aText, [in] long nPos );
112 
113     //------------------------------------------------------------------------
114     /** Get <type>DirectionProperty</type> of character at position
115         <em>nPos</em>.
116      */
117     short    getCharacterDirection( [in] string aText, [in] long nPos );
118 
119     //------------------------------------------------------------------------
120     /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
121     short    getScript( [in] string aText, [in] long nPos );
122 
123     //------------------------------------------------------------------------
124     /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
125     long getCharacterType( [in] string aText, [in] long nPos,
126                            [in] com::sun::star::lang::Locale aLocale );
127 
128     //------------------------------------------------------------------------
129     /** Get accumulated <type>KCharacterType</type>s of string starting
130         at position <em>nPos</em> of length <em>nCount</em> code points.
131 
132         @returns
133             A number with appropriate flags set to indicate what type of
134             characters the string contains, each flag value being one of
135             KCharacterType values.
136     */
137     long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
138                         [in] com::sun::star::lang::Locale aLocale );
139 
140 
141     //------------------------------------------------------------------------
142     /**
143         Parse a string for a token starting at position <em>nPos</em>.
144 
145         <p> A name or identifier must match the
146         <type>KParseTokens</type> criteria passed in
147         <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
148         additionally contain characters of
149         <em>aUserDefinedCharactersStart</em> and/or
150         <em>aUserDefinedCharactersCont</em>. </p>
151 
152 
153         @returns
154             A filled <type>ParseResult</type> structure. If no
155             unambiguous token could be parsed,
156             <member>ParseResult::TokenType</member> will be set to
157             <b>0</b> (zero), other fields will contain the values parsed
158             so far.
159 
160             <p> If a token may represent either a numeric value or a
161             name according to the passed Start/Cont-Flags/Chars, both
162             <const>KParseType::ASC_NUM</const> (or
163             <const>KParseType::UNI_NUM</const>) and
164             <const>KParseType::IDENTNAME</const> are set in
165             <member>ParseResult::TokenType</member>.
166 
167         @param  aText
168             Text to be parsed.
169 
170         @param  nPos
171             Position where parsing starts.
172 
173         @param  aLocale
174             The locale, for example, for decimal and group separator or
175             character type determination.
176 
177         @param  nStartCharFlags
178             A set of <type>KParseTokens</type> constants determining the
179             allowed characters a name or identifier may start with.
180 
181         @param  aUserDefinedCharactersStart
182             A set of additionally allowed characters a name or
183             identifier may start with.
184 
185         @param  nContCharFlags
186             A set of <type>KParseTokens</type> constants determining the
187             allowed characters a name or identifier may continue with.
188 
189         @param  aUserDefinedCharactersCont
190             A set of additionally allowed characters a name or
191             identifier may continue with.
192 
193         @example:C++
194         <listing>
195             using namespace ::com::sun::star::i18n;
196             // First character of an identifier may be any alphabetic or underscore.
197             sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
198             // Continuing characters may be any alphanumeric or underscore or dot.
199             sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
200             // No further characters assumed to be contained in an identifier
201             String aEmptyString;
202             // Parse any token.
203             ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
204                 nStartFlags, aEmptyString, nContFlags, aEmptyString );
205             // Get parsed token.
206             if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
207                 fValue = rRes.Value;
208             if ( rRes.TokenType & KParseType::IDENTNAME )
209                 aName = aText.Copy( nPos, rRes.EndPos - nPos );
210             else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
211                 aName = rRes.DequotedNameOrString;
212             else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
213                 aString = rRes.DequotedNameOrString;
214             else if ( rRes.TokenType & KParseType::BOOLEAN )
215                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
216             else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
217                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
218         </listing>
219      */
220 
221     ParseResult parseAnyToken(
222                             [in] string aText,
223                             [in] long nPos,
224                             [in] com::sun::star::lang::Locale aLocale,
225                             [in] long nStartCharFlags,
226                             [in] string aUserDefinedCharactersStart,
227                             [in] long nContCharFlags,
228                             [in] string aUserDefinedCharactersCont
229                             );
230 
231     //------------------------------------------------------------------------
232     /**
233         Parse a string for a token of type <em>nTokenType</em> starting
234         at position <em>nPos</em>.
235 
236         <p> Other parameters are the same as in
237         <member>parseAnyToken</member>. If the actual token does not
238         match the passed <em>nTokenType</em> a
239         <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
240         is returned. </p>
241 
242         @param  nTokenType
243             One or more of the <type>KParseType</type> constants.
244 
245         @example:C++
246         <listing>
247             // Determine if a given name is a valid name (not quoted) and contains
248             // only allowed characters.
249             using namespace ::com::sun::star::i18n;
250             // First character of an identifier may be any alphanumeric or underscore.
251             sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
252             // No further characters assumed to be contained in an identifier start.
253             String aEmptyString;
254             // Continuing characters may be any alphanumeric or underscore.
255             sal_Int32 nContFlags = nStartFlags;
256             // Additionally, continuing characters may contain a blank.
257             String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
258             // Parse predefined (must be an IDENTNAME) token.
259             ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
260                 nStartFlags, aEmptyString, nContFlags, aContChars );
261             // Test if it is an identifier name and if it only is one
262             // and no more else is following it.
263             bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
264         </listing>
265      */
266 
267     ParseResult parsePredefinedToken(
268                             [in] long nTokenType,
269                             [in] string aText,
270                             [in] long nPos,
271                             [in] com::sun::star::lang::Locale aLocale,
272                             [in] long nStartCharFlags,
273                             [in] string aUserDefinedCharactersStart,
274                             [in] long nContCharFlags,
275                             [in] string aUserDefinedCharactersCont
276                             );
277 };
278 
279 //=============================================================================
280 
281 }; }; }; };
282 
283 #endif
284