1*b1cdbd2cSJim Jagielski/************************************************************** 2*b1cdbd2cSJim Jagielski * 3*b1cdbd2cSJim Jagielski * Licensed to the Apache Software Foundation (ASF) under one 4*b1cdbd2cSJim Jagielski * or more contributor license agreements. See the NOTICE file 5*b1cdbd2cSJim Jagielski * distributed with this work for additional information 6*b1cdbd2cSJim Jagielski * regarding copyright ownership. The ASF licenses this file 7*b1cdbd2cSJim Jagielski * to you under the Apache License, Version 2.0 (the 8*b1cdbd2cSJim Jagielski * "License"); you may not use this file except in compliance 9*b1cdbd2cSJim Jagielski * with the License. You may obtain a copy of the License at 10*b1cdbd2cSJim Jagielski * 11*b1cdbd2cSJim Jagielski * http://www.apache.org/licenses/LICENSE-2.0 12*b1cdbd2cSJim Jagielski * 13*b1cdbd2cSJim Jagielski * Unless required by applicable law or agreed to in writing, 14*b1cdbd2cSJim Jagielski * software distributed under the License is distributed on an 15*b1cdbd2cSJim Jagielski * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*b1cdbd2cSJim Jagielski * KIND, either express or implied. See the License for the 17*b1cdbd2cSJim Jagielski * specific language governing permissions and limitations 18*b1cdbd2cSJim Jagielski * under the License. 19*b1cdbd2cSJim Jagielski * 20*b1cdbd2cSJim Jagielski *************************************************************/ 21*b1cdbd2cSJim Jagielski 22*b1cdbd2cSJim Jagielski 23*b1cdbd2cSJim Jagielski 24*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_i18n_XCharacterClassification_idl__ 25*b1cdbd2cSJim Jagielski#define __com_sun_star_i18n_XCharacterClassification_idl__ 26*b1cdbd2cSJim Jagielski 27*b1cdbd2cSJim Jagielski#include <com/sun/star/i18n/ParseResult.idl> 28*b1cdbd2cSJim Jagielski 29*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_lang_Locale_idl__ 30*b1cdbd2cSJim Jagielski#include <com/sun/star/lang/Locale.idl> 31*b1cdbd2cSJim Jagielski#endif 32*b1cdbd2cSJim Jagielski#ifndef __com_sun_star_uno_XInterface_idl__ 33*b1cdbd2cSJim Jagielski#include <com/sun/star/uno/XInterface.idl> 34*b1cdbd2cSJim Jagielski#endif 35*b1cdbd2cSJim Jagielski 36*b1cdbd2cSJim Jagielski//============================================================================ 37*b1cdbd2cSJim Jagielski 38*b1cdbd2cSJim Jagielskimodule com { module sun { module star { module i18n { 39*b1cdbd2cSJim Jagielski 40*b1cdbd2cSJim Jagielski//============================================================================ 41*b1cdbd2cSJim Jagielski 42*b1cdbd2cSJim Jagielski/* 43*b1cdbd2cSJim Jagielski 44*b1cdbd2cSJim JagielskiPossible tokens to be parsed with parse...Token(): 45*b1cdbd2cSJim Jagielski 46*b1cdbd2cSJim JagielskiUPASCALPHA=[A-Z] 47*b1cdbd2cSJim JagielskiLOASCALPHA=[a-z] 48*b1cdbd2cSJim JagielskiASCALPHA=1*(UPASCALPHA|LOASCALPHA) 49*b1cdbd2cSJim JagielskiASCDIGIT=[0-9] 50*b1cdbd2cSJim JagielskiASC_UNDERSCORE='_' 51*b1cdbd2cSJim JagielskiASC_SPACE=' ' 52*b1cdbd2cSJim JagielskiASC_HT='\0x9' 53*b1cdbd2cSJim JagielskiASC_VT='\0xb' 54*b1cdbd2cSJim JagielskiASC_WS=ASC_SPACE|ASC_HT|ASC_VT 55*b1cdbd2cSJim JagielskiASC_DBL_QUOTE=\"; 56*b1cdbd2cSJim JagielskiASC_QUOTE=\' 57*b1cdbd2cSJim JagielskiUPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE) 58*b1cdbd2cSJim Jagielski 59*b1cdbd2cSJim JagielskiALPHA,DIGIT are the tokens which return true for isAlpha and isDigit 60*b1cdbd2cSJim JagielskiALNUM=ALPHA|DIGIT 61*b1cdbd2cSJim JagielskiCHAR=anycharacter 62*b1cdbd2cSJim JagielskiWS=isWhiteSpace() 63*b1cdbd2cSJim JagielskiSIGN='+'|'-' 64*b1cdbd2cSJim JagielskiDECSEP=<locale dependent decimal separator> 65*b1cdbd2cSJim JagielskiGRPSEP=<locale dependent thousand separator> 66*b1cdbd2cSJim JagielskiEXPONENT=(E|e)[SIGN]1*ASC_DIGIT 67*b1cdbd2cSJim Jagielski 68*b1cdbd2cSJim JagielskiIDENTIFIER=ALPHA *ALNUM 69*b1cdbd2cSJim JagielskiUIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE) 70*b1cdbd2cSJim JagielskiALPHA_NAME=ALPHA *(ALNUM|DEFCHARS) 71*b1cdbd2cSJim JagielskiANY_NAME=1*(ALNUM|DEFCHARS) 72*b1cdbd2cSJim JagielskiSINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE 73*b1cdbd2cSJim JagielskiDOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE 74*b1cdbd2cSJim JagielskiASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT] 75*b1cdbd2cSJim JagielskiNUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT] 76*b1cdbd2cSJim Jagielski 77*b1cdbd2cSJim Jagielski*/ 78*b1cdbd2cSJim Jagielski 79*b1cdbd2cSJim Jagielski//============================================================================ 80*b1cdbd2cSJim Jagielski 81*b1cdbd2cSJim Jagielski/** 82*b1cdbd2cSJim Jagielski Character classification (upper, lower, digit, letter, number, ...) 83*b1cdbd2cSJim Jagielski and generic Unicode enabled parser. 84*b1cdbd2cSJim Jagielski */ 85*b1cdbd2cSJim Jagielski 86*b1cdbd2cSJim Jagielskipublished interface XCharacterClassification : com::sun::star::uno::XInterface 87*b1cdbd2cSJim Jagielski{ 88*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 89*b1cdbd2cSJim Jagielski /** Convert lower case alpha to upper case alpha, starting at 90*b1cdbd2cSJim Jagielski position <em>nPos</em> for <em>nCount</em> code points. 91*b1cdbd2cSJim Jagielski */ 92*b1cdbd2cSJim Jagielski string toUpper( [in] string aText, [in] long nPos, [in] long nCount, 93*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale ); 94*b1cdbd2cSJim Jagielski 95*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 96*b1cdbd2cSJim Jagielski /** Convert upper case alpha to lower case alpha, starting at 97*b1cdbd2cSJim Jagielski position <em>nPos</em> for <em>nCount</em> code points. 98*b1cdbd2cSJim Jagielski */ 99*b1cdbd2cSJim Jagielski string toLower( [in] string aText, [in] long nPos, [in] long nCount, 100*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale ); 101*b1cdbd2cSJim Jagielski 102*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 103*b1cdbd2cSJim Jagielski /** Convert to title case, starting at 104*b1cdbd2cSJim Jagielski position <em>nPos</em> for <em>nCount</em> code points. 105*b1cdbd2cSJim Jagielski */ 106*b1cdbd2cSJim Jagielski string toTitle( [in] string aText, [in] long nPos, [in] long nCount, 107*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale ); 108*b1cdbd2cSJim Jagielski 109*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 110*b1cdbd2cSJim Jagielski /// Get <type>UnicodeType</type> of character at position <em>nPos</em>. 111*b1cdbd2cSJim Jagielski short getType( [in] string aText, [in] long nPos ); 112*b1cdbd2cSJim Jagielski 113*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 114*b1cdbd2cSJim Jagielski /** Get <type>DirectionProperty</type> of character at position 115*b1cdbd2cSJim Jagielski <em>nPos</em>. 116*b1cdbd2cSJim Jagielski */ 117*b1cdbd2cSJim Jagielski short getCharacterDirection( [in] string aText, [in] long nPos ); 118*b1cdbd2cSJim Jagielski 119*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 120*b1cdbd2cSJim Jagielski /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>. 121*b1cdbd2cSJim Jagielski short getScript( [in] string aText, [in] long nPos ); 122*b1cdbd2cSJim Jagielski 123*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 124*b1cdbd2cSJim Jagielski /// Get <type>KCharacterType</type> of character at position <em>nPos</em>. 125*b1cdbd2cSJim Jagielski long getCharacterType( [in] string aText, [in] long nPos, 126*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale ); 127*b1cdbd2cSJim Jagielski 128*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 129*b1cdbd2cSJim Jagielski /** Get accumulated <type>KCharacterType</type>s of string starting 130*b1cdbd2cSJim Jagielski at position <em>nPos</em> of length <em>nCount</em> code points. 131*b1cdbd2cSJim Jagielski 132*b1cdbd2cSJim Jagielski @returns 133*b1cdbd2cSJim Jagielski A number with appropriate flags set to indicate what type of 134*b1cdbd2cSJim Jagielski characters the string contains, each flag value being one of 135*b1cdbd2cSJim Jagielski KCharacterType values. 136*b1cdbd2cSJim Jagielski */ 137*b1cdbd2cSJim Jagielski long getStringType( [in] string aText, [in] long nPos, [in] long nCount, 138*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale ); 139*b1cdbd2cSJim Jagielski 140*b1cdbd2cSJim Jagielski 141*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 142*b1cdbd2cSJim Jagielski /** 143*b1cdbd2cSJim Jagielski Parse a string for a token starting at position <em>nPos</em>. 144*b1cdbd2cSJim Jagielski 145*b1cdbd2cSJim Jagielski <p> A name or identifier must match the 146*b1cdbd2cSJim Jagielski <type>KParseTokens</type> criteria passed in 147*b1cdbd2cSJim Jagielski <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may 148*b1cdbd2cSJim Jagielski additionally contain characters of 149*b1cdbd2cSJim Jagielski <em>aUserDefinedCharactersStart</em> and/or 150*b1cdbd2cSJim Jagielski <em>aUserDefinedCharactersCont</em>. </p> 151*b1cdbd2cSJim Jagielski 152*b1cdbd2cSJim Jagielski 153*b1cdbd2cSJim Jagielski @returns 154*b1cdbd2cSJim Jagielski A filled <type>ParseResult</type> structure. If no 155*b1cdbd2cSJim Jagielski unambigous token could be parsed, 156*b1cdbd2cSJim Jagielski <member>ParseResult::TokenType</member> will be set to 157*b1cdbd2cSJim Jagielski <b>0</b> (zero), other fields will contain the values parsed 158*b1cdbd2cSJim Jagielski so far. 159*b1cdbd2cSJim Jagielski 160*b1cdbd2cSJim Jagielski <p> If a token may represent either a numeric value or a 161*b1cdbd2cSJim Jagielski name according to the passed Start/Cont-Flags/Chars, both 162*b1cdbd2cSJim Jagielski <const>KParseType::ASC_NUM</const> (or 163*b1cdbd2cSJim Jagielski <const>KParseType::UNI_NUM</const>) and 164*b1cdbd2cSJim Jagielski <const>KParseType::IDENTNAME</const> are set in 165*b1cdbd2cSJim Jagielski <member>ParseResult::TokenType</member>. 166*b1cdbd2cSJim Jagielski 167*b1cdbd2cSJim Jagielski @param aText 168*b1cdbd2cSJim Jagielski Text to be parsed. 169*b1cdbd2cSJim Jagielski 170*b1cdbd2cSJim Jagielski @param nPos 171*b1cdbd2cSJim Jagielski Position where parsing starts. 172*b1cdbd2cSJim Jagielski 173*b1cdbd2cSJim Jagielski @param aLocale 174*b1cdbd2cSJim Jagielski The locale, for example, for decimal and group separator or 175*b1cdbd2cSJim Jagielski character type determination. 176*b1cdbd2cSJim Jagielski 177*b1cdbd2cSJim Jagielski @param nStartCharFlags 178*b1cdbd2cSJim Jagielski A set of <type>KParseTokens</type> constants determining the 179*b1cdbd2cSJim Jagielski allowed characters a name or identifier may start with. 180*b1cdbd2cSJim Jagielski 181*b1cdbd2cSJim Jagielski @param aUserDefinedCharactersStart 182*b1cdbd2cSJim Jagielski A set of additionally allowed characters a name or 183*b1cdbd2cSJim Jagielski identifier may start with. 184*b1cdbd2cSJim Jagielski 185*b1cdbd2cSJim Jagielski @param nContCharFlags 186*b1cdbd2cSJim Jagielski A set of <type>KParseTokens</type> constants determining the 187*b1cdbd2cSJim Jagielski allowed characters a name or identifier may continue with. 188*b1cdbd2cSJim Jagielski 189*b1cdbd2cSJim Jagielski @param aUserDefinedCharactersCont 190*b1cdbd2cSJim Jagielski A set of additionally allowed characters a name or 191*b1cdbd2cSJim Jagielski identifier may continue with. 192*b1cdbd2cSJim Jagielski 193*b1cdbd2cSJim Jagielski @example:C++ 194*b1cdbd2cSJim Jagielski <listing> 195*b1cdbd2cSJim Jagielski using namespace ::com::sun::star::i18n; 196*b1cdbd2cSJim Jagielski // First character of an identifier may be any alphabetic or underscore. 197*b1cdbd2cSJim Jagielski sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE; 198*b1cdbd2cSJim Jagielski // Continuing characters may be any alphanumeric or underscore or dot. 199*b1cdbd2cSJim Jagielski sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT; 200*b1cdbd2cSJim Jagielski // No further characters assumed to be contained in an identifier 201*b1cdbd2cSJim Jagielski String aEmptyString; 202*b1cdbd2cSJim Jagielski // Parse any token. 203*b1cdbd2cSJim Jagielski ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale, 204*b1cdbd2cSJim Jagielski nStartFlags, aEmptyString, nContFlags, aEmptyString ); 205*b1cdbd2cSJim Jagielski // Get parsed token. 206*b1cdbd2cSJim Jagielski if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) ) 207*b1cdbd2cSJim Jagielski fValue = rRes.Value; 208*b1cdbd2cSJim Jagielski if ( rRes.TokenType & KParseType::IDENTNAME ) 209*b1cdbd2cSJim Jagielski aName = aText.Copy( nPos, rRes.EndPos - nPos ); 210*b1cdbd2cSJim Jagielski else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME ) 211*b1cdbd2cSJim Jagielski aName = rRes.DequotedNameOrString; 212*b1cdbd2cSJim Jagielski else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING ) 213*b1cdbd2cSJim Jagielski aString = rRes.DequotedNameOrString; 214*b1cdbd2cSJim Jagielski else if ( rRes.TokenType & KParseType::BOOLEAN ) 215*b1cdbd2cSJim Jagielski aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 216*b1cdbd2cSJim Jagielski else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR ) 217*b1cdbd2cSJim Jagielski aSymbol = aText.Copy( nPos, rRes.EndPos - nPos ); 218*b1cdbd2cSJim Jagielski </listing> 219*b1cdbd2cSJim Jagielski */ 220*b1cdbd2cSJim Jagielski 221*b1cdbd2cSJim Jagielski ParseResult parseAnyToken( 222*b1cdbd2cSJim Jagielski [in] string aText, 223*b1cdbd2cSJim Jagielski [in] long nPos, 224*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale, 225*b1cdbd2cSJim Jagielski [in] long nStartCharFlags, 226*b1cdbd2cSJim Jagielski [in] string aUserDefinedCharactersStart, 227*b1cdbd2cSJim Jagielski [in] long nContCharFlags, 228*b1cdbd2cSJim Jagielski [in] string aUserDefinedCharactersCont 229*b1cdbd2cSJim Jagielski ); 230*b1cdbd2cSJim Jagielski 231*b1cdbd2cSJim Jagielski //------------------------------------------------------------------------ 232*b1cdbd2cSJim Jagielski /** 233*b1cdbd2cSJim Jagielski Parse a string for a token of type <em>nTokenType</em> starting 234*b1cdbd2cSJim Jagielski at position <em>nPos</em>. 235*b1cdbd2cSJim Jagielski 236*b1cdbd2cSJim Jagielski <p> Other parameters are the same as in 237*b1cdbd2cSJim Jagielski <member>parseAnyToken</member>. If the actual token does not 238*b1cdbd2cSJim Jagielski match the passed <em>nTokenType</em> a 239*b1cdbd2cSJim Jagielski <member>ParseResult::TokenType</member> set to <b>0</b> (zero) 240*b1cdbd2cSJim Jagielski is returned. </p> 241*b1cdbd2cSJim Jagielski 242*b1cdbd2cSJim Jagielski @param nTokenType 243*b1cdbd2cSJim Jagielski One or more of the <type>KParseType</type> constants. 244*b1cdbd2cSJim Jagielski 245*b1cdbd2cSJim Jagielski @example:C++ 246*b1cdbd2cSJim Jagielski <listing> 247*b1cdbd2cSJim Jagielski // Determine if a given name is a valid name (not quoted) and contains 248*b1cdbd2cSJim Jagielski // only allowed characters. 249*b1cdbd2cSJim Jagielski using namespace ::com::sun::star::i18n; 250*b1cdbd2cSJim Jagielski // First character of an identifier may be any alphanumeric or underscore. 251*b1cdbd2cSJim Jagielski sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE; 252*b1cdbd2cSJim Jagielski // No further characters assumed to be contained in an identifier start. 253*b1cdbd2cSJim Jagielski String aEmptyString; 254*b1cdbd2cSJim Jagielski // Continuing characters may be any alphanumeric or underscore. 255*b1cdbd2cSJim Jagielski sal_Int32 nContFlags = nStartFlags; 256*b1cdbd2cSJim Jagielski // Additionally, continuing characters may contain a blank. 257*b1cdbd2cSJim Jagielski String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") ); 258*b1cdbd2cSJim Jagielski // Parse predefined (must be an IDENTNAME) token. 259*b1cdbd2cSJim Jagielski ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale, 260*b1cdbd2cSJim Jagielski nStartFlags, aEmptyString, nContFlags, aContChars ); 261*b1cdbd2cSJim Jagielski // Test if it is an identifier name and if it only is one 262*b1cdbd2cSJim Jagielski // and no more else is following it. 263*b1cdbd2cSJim Jagielski bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len(); 264*b1cdbd2cSJim Jagielski </listing> 265*b1cdbd2cSJim Jagielski */ 266*b1cdbd2cSJim Jagielski 267*b1cdbd2cSJim Jagielski ParseResult parsePredefinedToken( 268*b1cdbd2cSJim Jagielski [in] long nTokenType, 269*b1cdbd2cSJim Jagielski [in] string aText, 270*b1cdbd2cSJim Jagielski [in] long nPos, 271*b1cdbd2cSJim Jagielski [in] com::sun::star::lang::Locale aLocale, 272*b1cdbd2cSJim Jagielski [in] long nStartCharFlags, 273*b1cdbd2cSJim Jagielski [in] string aUserDefinedCharactersStart, 274*b1cdbd2cSJim Jagielski [in] long nContCharFlags, 275*b1cdbd2cSJim Jagielski [in] string aUserDefinedCharactersCont 276*b1cdbd2cSJim Jagielski ); 277*b1cdbd2cSJim Jagielski}; 278*b1cdbd2cSJim Jagielski 279*b1cdbd2cSJim Jagielski//============================================================================= 280*b1cdbd2cSJim Jagielski}; }; }; }; 281*b1cdbd2cSJim Jagielski 282*b1cdbd2cSJim Jagielski#endif 283