1 /************************************************************** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, 14 * software distributed under the License is distributed on an 15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 * KIND, either express or implied. See the License for the 17 * specific language governing permissions and limitations 18 * under the License. 19 * 20 *************************************************************/ 21 22 23 24 // MARKER(update_precomp.py): autogen include statement, do not remove 25 #include "precompiled_svtools.hxx" 26 27 #include <svtools/syntaxhighlight.hxx> 28 29 #include <unotools/charclass.hxx> 30 #include <tools/debug.hxx> 31 32 33 // ########################################################################## 34 // ATTENTION: all these words needs to be in small caps 35 // ########################################################################## 36 static const char* strListBasicKeyWords[] = { 37 "access", 38 "alias", 39 "and", 40 "any", 41 "append", 42 "as", 43 "base", 44 "binary", 45 "boolean", 46 "byref", 47 "byte", 48 "byval", 49 "call", 50 "case", 51 "cdecl", 52 "classmodule", 53 "close", 54 "compare", 55 "compatible", 56 "const", 57 "currency", 58 "date", 59 "declare", 60 "defbool", 61 "defcur", 62 "defdate", 63 "defdbl", 64 "deferr", 65 "defint", 66 "deflng", 67 "defobj", 68 "defsng", 69 "defstr", 70 "defvar", 71 "dim", 72 "do", 73 "double", 74 "each", 75 "else", 76 "elseif", 77 "end", 78 "end enum", 79 "end function", 80 "end if", 81 "end select", 82 "end sub", 83 "end type", 84 "endif", 85 "enum", 86 "eqv", 87 "erase", 88 "error", 89 "exit", 90 "explicit", 91 "for", 92 "function", 93 "get", 94 "global", 95 "gosub", 96 "goto", 97 "if", 98 "imp", 99 "implements", 100 "in", 101 "input", 102 "integer", 103 "is", 104 "let", 105 "lib", 106 "like", 107 "line", 108 "line input", 109 "local", 110 "lock", 111 "long", 112 "loop", 113 "lprint", 114 "lset", 115 "mod", 116 "name", 117 "new", 118 "next", 119 "not", 120 "object", 121 "on", 122 "open", 123 "option", 124 "optional", 125 "or", 126 "output", 127 "preserve", 128 "print", 129 "private", 130 "property", 131 "public", 132 "random", 133 "read", 134 "redim", 135 "rem", 136 "resume", 137 "return", 138 "rset", 139 "select", 140 "set", 141 "shared", 142 "single", 143 "static", 144 "step", 145 "stop", 146 "string", 147 "sub", 148 "system", 149 "text", 150 "then", 151 "to", 152 "type", 153 "typeof", 154 "until", 155 "variant", 156 "wend", 157 "while", 158 "with", 159 "write", 160 "xor" 161 }; 162 163 164 static const char* strListSqlKeyWords[] = { 165 "all", 166 "and", 167 "any", 168 "as", 169 "asc", 170 "avg", 171 "between", 172 "by", 173 "cast", 174 "corresponding", 175 "count", 176 "create", 177 "cross", 178 "delete", 179 "desc", 180 "distinct", 181 "drop", 182 "escape", 183 "except", 184 "exists", 185 "false", 186 "from", 187 "full", 188 "global", 189 "group", 190 "having", 191 "in", 192 "inner", 193 "insert", 194 "intersect", 195 "into", 196 "is", 197 "join", 198 "left", 199 "like", 200 "local", 201 "match", 202 "max", 203 "min", 204 "natural", 205 "not", 206 "null", 207 "on", 208 "or", 209 "order", 210 "outer", 211 "right", 212 "select", 213 "set", 214 "some", 215 "sum", 216 "table", 217 "temporary", 218 "true", 219 "union", 220 "unique", 221 "unknown", 222 "update", 223 "using", 224 "values", 225 "where" 226 }; 227 228 229 extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 ) 230 { 231 return strcmp( (char *)arg1, *(char **)arg2 ); 232 } 233 234 235 class LetterTable 236 { 237 bool IsLetterTab[256]; 238 239 public: 240 LetterTable( void ); 241 242 inline bool isLetter( sal_Unicode c ) 243 { 244 bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c ); 245 return bRet; 246 } 247 bool isLetterUnicode( sal_Unicode c ); 248 }; 249 250 class BasicSimpleCharClass 251 { 252 static LetterTable aLetterTable; 253 254 public: 255 static sal_Bool isAlpha( sal_Unicode c, bool bCompatible ) 256 { 257 sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 258 || (bCompatible && aLetterTable.isLetter( c )); 259 return bRet; 260 } 261 262 static sal_Bool isDigit( sal_Unicode c ) 263 { 264 sal_Bool bRet = (c >= '0' && c <= '9'); 265 return bRet; 266 } 267 268 static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible ) 269 { 270 sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible ); 271 return bRet; 272 } 273 }; 274 275 LetterTable BasicSimpleCharClass::aLetterTable; 276 277 LetterTable::LetterTable( void ) 278 { 279 for( int i = 0 ; i < 256 ; ++i ) 280 IsLetterTab[i] = false; 281 282 IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT 283 IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT 284 IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT 285 IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE 286 IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS 287 IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE 288 IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE 289 IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA 290 IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT 291 IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT 292 IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT 293 IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS 294 IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT 295 IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT 296 IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT 297 IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS 298 IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH 299 IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE 300 IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT 301 IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT 302 IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT 303 IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE 304 IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS 305 IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE 306 IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT 307 IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT 308 IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT 309 IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS 310 IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT 311 IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN 312 IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S 313 IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT 314 IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT 315 IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT 316 IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE 317 IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS 318 IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE 319 IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE 320 IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA 321 IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT 322 IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT 323 IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT 324 IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS 325 IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT 326 IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT 327 IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT 328 IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS 329 IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH 330 IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE 331 IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT 332 IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT 333 IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT 334 IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE 335 IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS 336 IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR 337 IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT 338 IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT 339 IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT 340 IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS 341 IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT 342 IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN 343 IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS 344 } 345 346 bool LetterTable::isLetterUnicode( sal_Unicode c ) 347 { 348 static CharClass* pCharClass = NULL; 349 if( pCharClass == NULL ) 350 pCharClass = new CharClass( Application::GetSettings().GetLocale() ); 351 String aStr( c ); 352 bool bRet = pCharClass->isLetter( aStr, 0 ); 353 return bRet; 354 } 355 356 // Hilfsfunktion: Zeichen-Flag Testen 357 sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags ) 358 { 359 bool bRet = false; 360 if( c != 0 && c <= 255 ) 361 { 362 bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 ); 363 } 364 else if( c > 255 ) 365 { 366 bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0 367 ? BasicSimpleCharClass::isAlpha( c, true ) : false; 368 } 369 return bRet; 370 } 371 372 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount ) 373 { 374 ppListKeyWords = ppKeyWords; 375 nKeyWordCount = nCount; 376 } 377 378 // Neues Token holen 379 sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType, 380 /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos ) 381 { 382 reType = TT_UNKNOWN; 383 384 // Position merken 385 rpStartPos = mpActualPos; 386 387 // Zeichen untersuchen 388 sal_Unicode c = peekChar(); 389 if( c == CHAR_EOF ) 390 return sal_False; 391 392 // Zeichen lesen 393 getChar(); 394 395 //*** Alle Moeglichkeiten durchgehen *** 396 // Space? 397 if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) ) 398 { 399 while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True ) 400 getChar(); 401 402 reType = TT_WHITESPACE; 403 } 404 405 // Identifier? 406 else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) ) 407 { 408 sal_Bool bIdentifierChar; 409 do 410 { 411 // Naechstes Zeichen holen 412 c = peekChar(); 413 bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER ); 414 if( bIdentifierChar ) 415 getChar(); 416 } 417 while( bIdentifierChar ); 418 419 reType = TT_IDENTIFIER; 420 421 // Schluesselwort-Tabelle 422 if (ppListKeyWords != NULL) 423 { 424 int nCount = mpActualPos - rpStartPos; 425 426 // No keyword if string contains char > 255 427 bool bCanBeKeyword = true; 428 for( int i = 0 ; i < nCount ; i++ ) 429 { 430 if( rpStartPos[i] > 255 ) 431 { 432 bCanBeKeyword = false; 433 break; 434 } 435 } 436 437 if( bCanBeKeyword ) 438 { 439 String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) ); 440 ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US ); 441 aByteStr.ToLowerAscii(); 442 if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ), 443 compare_strings ) ) 444 { 445 reType = TT_KEYWORDS; 446 447 if ( aByteStr.Equals( "rem" ) ) 448 { 449 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 450 sal_Unicode cPeek = peekChar(); 451 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 452 { 453 c = getChar(); 454 cPeek = peekChar(); 455 } 456 457 reType = TT_COMMENT; 458 } 459 } 460 } 461 } 462 } 463 464 // Operator? 465 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there 466 else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) ) 467 { 468 // paramters for SQL view 469 if ( (c==':') || (c=='?')) 470 { 471 if (c!='?') 472 { 473 sal_Bool bIdentifierChar; 474 do 475 { 476 // Naechstes Zeichen holen 477 c = peekChar(); 478 bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true ); 479 if( bIdentifierChar ) 480 getChar(); 481 } 482 while( bIdentifierChar ); 483 } 484 reType = TT_PARAMETER; 485 } 486 else if ((c=='-')) 487 { 488 sal_Unicode cPeekNext = peekChar(); 489 if (cPeekNext=='-') 490 { 491 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 492 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 493 { 494 getChar(); 495 cPeekNext = peekChar(); 496 } 497 reType = TT_COMMENT; 498 } 499 } 500 else if (c=='/') 501 { 502 sal_Unicode cPeekNext = peekChar(); 503 if (cPeekNext=='/') 504 { 505 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 506 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False ) 507 { 508 getChar(); 509 cPeekNext = peekChar(); 510 } 511 reType = TT_COMMENT; 512 } 513 } 514 else 515 { 516 // Kommentar ? 517 if ( c == '\'' ) 518 { 519 c = getChar(); // '/' entfernen 520 521 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen 522 sal_Unicode cPeek = c; 523 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False ) 524 { 525 getChar(); 526 cPeek = peekChar(); 527 } 528 529 reType = TT_COMMENT; 530 } 531 532 // Echter Operator, kann hier einfach behandelt werden, 533 // da nicht der wirkliche Operator, wie z.B. += interessiert, 534 // sondern nur die Tatsache, dass es sich um einen handelt. 535 if( reType != TT_COMMENT ) 536 { 537 reType = TT_OPERATOR; 538 } 539 540 } 541 } 542 543 // Objekt-Trenner? Muss vor Number abgehandelt werden 544 else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) ) 545 { 546 reType = TT_OPERATOR; 547 } 548 549 // Zahl? 550 else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True ) 551 { 552 reType = TT_NUMBER; 553 554 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert 555 int nRadix = 10; 556 557 // Ist es eine Hex- oder Oct-Zahl? 558 if( c == '&' ) 559 { 560 // Octal? 561 if( peekChar() == 'o' || peekChar() == 'O' ) 562 { 563 // o entfernen 564 getChar(); 565 nRadix = 8; // Octal-Basis 566 567 // Alle Ziffern einlesen 568 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) ) 569 c = getChar(); 570 } 571 // Hex? 572 else if( peekChar() == 'h' || peekChar() == 'H' ) 573 { 574 // x entfernen 575 getChar(); 576 nRadix = 16; // Hex-Basis 577 578 // Alle Ziffern einlesen und puffern 579 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) ) 580 c = getChar(); 581 } 582 else 583 { 584 reType = TT_OPERATOR; 585 } 586 } 587 588 // Wenn nicht Oct oder Hex als double ansehen 589 if( reType == TT_NUMBER && nRadix == 10 ) 590 { 591 // Flag, ob das letzte Zeichen ein Exponent war 592 sal_Bool bAfterExpChar = sal_False; 593 594 // Alle Ziffern einlesen 595 while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) || 596 (bAfterExpChar && peekChar() == '+' ) || 597 (bAfterExpChar && peekChar() == '-' ) ) 598 // Nach Exponent auch +/- OK 599 { 600 c = getChar(); // Zeichen lesen 601 bAfterExpChar = ( c == 'e' || c == 'E' ); 602 } 603 } 604 605 // reType = TT_NUMBER; 606 } 607 608 // String? 609 else if( testCharFlags( c, CHAR_START_STRING ) == sal_True ) 610 { 611 // Merken, welches Zeichen den String eroeffnet hat 612 sal_Unicode cEndString = c; 613 if( c == '[' ) 614 cEndString = ']'; 615 616 // Alle Ziffern einlesen und puffern 617 while( peekChar() != cEndString ) 618 { 619 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht 620 if( peekChar() == CHAR_EOF ) 621 { 622 // ERROR: unterminated string literal 623 reType = TT_ERROR; 624 break; 625 } 626 c = getChar(); 627 if( testCharFlags( c, CHAR_EOL ) == sal_True ) 628 { 629 // ERROR: unterminated string literal 630 reType = TT_ERROR; 631 break; 632 } 633 } 634 635 // Zeichen lesen 636 if( reType != TT_ERROR ) 637 { 638 getChar(); 639 if( cEndString == ']' ) 640 reType = TT_IDENTIFIER; 641 else 642 reType = TT_STRING; 643 } 644 } 645 646 // Zeilenende? 647 else if( testCharFlags( c, CHAR_EOL ) == sal_True ) 648 { 649 // Falls ein weiteres anderes EOL-Char folgt, weg damit 650 sal_Unicode cNext = peekChar(); 651 if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True ) 652 getChar(); 653 654 // Positions-Daten auf Zeilen-Beginn setzen 655 nCol = 0; 656 nLine++; 657 658 reType = TT_EOL; 659 } 660 661 // Alles andere bleibt TT_UNKNOWN 662 663 664 // End-Position eintragen 665 rpEndPos = mpActualPos; 666 return sal_True; 667 } 668 669 String SimpleTokenizer_Impl::getTokStr 670 ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 671 { 672 return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 673 } 674 675 #ifdef DBG_UTIL 676 // TEST: Token ausgeben 677 String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType, 678 /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos ) 679 { 680 String aOut; 681 switch( eType ) 682 { 683 case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break; 684 case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break; 685 case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break; 686 case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break; 687 case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break; 688 case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break; 689 case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break; 690 case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break; 691 case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break; 692 case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break; 693 case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break; 694 } 695 if( eType != TT_EOL ) 696 { 697 aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) ); 698 } 699 aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") ); 700 return aOut; 701 } 702 #endif 703 704 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang) 705 { 706 memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) ); 707 708 // Zeichen-Tabelle fuellen 709 sal_uInt16 i; 710 711 // Zulaessige Zeichen fuer Identifier 712 sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ); 713 for( i = 'a' ; i <= 'z' ; i++ ) 714 aCharTypeTab[i] |= nHelpMask; 715 for( i = 'A' ; i <= 'Z' ; i++ ) 716 aCharTypeTab[i] |= nHelpMask; 717 // '_' extra eintragen 718 aCharTypeTab[(int)'_'] |= nHelpMask; 719 // AB 23.6.97: '$' ist auch erlaubt 720 aCharTypeTab[(int)'$'] |= nHelpMask; 721 722 // Ziffern (Identifier und Number ist moeglich) 723 nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER | 724 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER ); 725 for( i = '0' ; i <= '9' ; i++ ) 726 aCharTypeTab[i] |= nHelpMask; 727 728 // e und E sowie . von Hand ergaenzen 729 aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER; 730 aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER; 731 aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER ); 732 aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER; 733 734 // Hex-Ziffern 735 for( i = 'a' ; i <= 'f' ; i++ ) 736 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 737 for( i = 'A' ; i <= 'F' ; i++ ) 738 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER; 739 740 // Oct-Ziffern 741 for( i = '0' ; i <= '7' ; i++ ) 742 aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER; 743 744 // String-Beginn/End-Zeichen 745 aCharTypeTab[(int)'\''] |= CHAR_START_STRING; 746 aCharTypeTab[(int)'\"'] |= CHAR_START_STRING; 747 aCharTypeTab[(int)'['] |= CHAR_START_STRING; 748 aCharTypeTab[(int)'`'] |= CHAR_START_STRING; 749 750 // Operator-Zeichen 751 aCharTypeTab[(int)'!'] |= CHAR_OPERATOR; 752 aCharTypeTab[(int)'%'] |= CHAR_OPERATOR; 753 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140 754 aCharTypeTab[(int)'('] |= CHAR_OPERATOR; 755 aCharTypeTab[(int)')'] |= CHAR_OPERATOR; 756 aCharTypeTab[(int)'*'] |= CHAR_OPERATOR; 757 aCharTypeTab[(int)'+'] |= CHAR_OPERATOR; 758 aCharTypeTab[(int)','] |= CHAR_OPERATOR; 759 aCharTypeTab[(int)'-'] |= CHAR_OPERATOR; 760 aCharTypeTab[(int)'/'] |= CHAR_OPERATOR; 761 aCharTypeTab[(int)':'] |= CHAR_OPERATOR; 762 aCharTypeTab[(int)'<'] |= CHAR_OPERATOR; 763 aCharTypeTab[(int)'='] |= CHAR_OPERATOR; 764 aCharTypeTab[(int)'>'] |= CHAR_OPERATOR; 765 aCharTypeTab[(int)'?'] |= CHAR_OPERATOR; 766 aCharTypeTab[(int)'^'] |= CHAR_OPERATOR; 767 aCharTypeTab[(int)'|'] |= CHAR_OPERATOR; 768 aCharTypeTab[(int)'~'] |= CHAR_OPERATOR; 769 aCharTypeTab[(int)'{'] |= CHAR_OPERATOR; 770 aCharTypeTab[(int)'}'] |= CHAR_OPERATOR; 771 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826 772 aCharTypeTab[(int)']'] |= CHAR_OPERATOR; 773 aCharTypeTab[(int)';'] |= CHAR_OPERATOR; 774 775 // Space 776 aCharTypeTab[(int)' ' ] |= CHAR_SPACE; 777 aCharTypeTab[(int)'\t'] |= CHAR_SPACE; 778 779 // Zeilen-Ende-Zeichen 780 aCharTypeTab[(int)'\r'] |= CHAR_EOL; 781 aCharTypeTab[(int)'\n'] |= CHAR_EOL; 782 783 ppListKeyWords = NULL; 784 } 785 786 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void ) 787 { 788 } 789 790 SimpleTokenizer_Impl* getSimpleTokenizer( void ) 791 { 792 static SimpleTokenizer_Impl* pSimpleTokenizer = NULL; 793 if( !pSimpleTokenizer ) 794 pSimpleTokenizer = new SimpleTokenizer_Impl(); 795 return pSimpleTokenizer; 796 } 797 798 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul 799 sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource ) 800 { 801 // Position auf den Anfang des Source-Strings setzen 802 mpStringBegin = mpActualPos = aSource->GetBuffer(); 803 804 // Zeile und Spalte initialisieren 805 nLine = nParseLine; 806 nCol = 0L; 807 808 // Variablen fuer die Out-Parameter 809 TokenTypes eType; 810 const sal_Unicode* pStartPos; 811 const sal_Unicode* pEndPos; 812 813 // Schleife ueber alle Tokens 814 sal_uInt16 nTokenCount = 0; 815 while( getNextToken( eType, pStartPos, pEndPos ) ) 816 nTokenCount++; 817 818 return nTokenCount; 819 } 820 821 void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine, 822 /*out*/HighlightPortions& portions ) 823 { 824 // Position auf den Anfang des Source-Strings setzen 825 mpStringBegin = mpActualPos = rLine.GetBuffer(); 826 827 // Zeile und Spalte initialisieren 828 nLine = nParseLine; 829 nCol = 0L; 830 831 // Variablen fuer die Out-Parameter 832 TokenTypes eType; 833 const sal_Unicode* pStartPos; 834 const sal_Unicode* pEndPos; 835 836 // Schleife ueber alle Tokens 837 while( getNextToken( eType, pStartPos, pEndPos ) ) 838 { 839 HighlightPortion portion; 840 841 portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin); 842 portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin); 843 portion.tokenType = eType; 844 845 portions.push_back(portion); 846 } 847 } 848 849 850 ////////////////////////////////////////////////////////////////////////// 851 // Implementierung des SyntaxHighlighter 852 853 SyntaxHighlighter::SyntaxHighlighter() 854 { 855 m_pSimpleTokenizer = 0; 856 m_pKeyWords = NULL; 857 m_nKeyWordCount = 0; 858 } 859 860 SyntaxHighlighter::~SyntaxHighlighter() 861 { 862 delete m_pSimpleTokenizer; 863 delete m_pKeyWords; 864 } 865 866 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ ) 867 { 868 eLanguage = eLanguage_; 869 delete m_pSimpleTokenizer; 870 m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage); 871 872 switch (eLanguage) 873 { 874 case HIGHLIGHT_BASIC: 875 m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords, 876 sizeof( strListBasicKeyWords ) / sizeof( char* )); 877 break; 878 case HIGHLIGHT_SQL: 879 m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords, 880 sizeof( strListSqlKeyWords ) / sizeof( char* )); 881 break; 882 default: 883 m_pSimpleTokenizer->setKeyWords( NULL, 0 ); 884 } 885 } 886 887 const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference, 888 const String* pChangedLines, sal_uInt32 nArrayLength) 889 { 890 (void)nLineCountDifference; 891 892 for( sal_uInt32 i=0 ; i < nArrayLength ; i++ ) 893 m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]); 894 895 return Range( nLine, nLine + nArrayLength-1 ); 896 } 897 898 void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine, 899 /*out*/HighlightPortions& portions ) 900 { 901 m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions ); 902 } 903