15900e8ecSAndrew Rist /**************************************************************
2cdf0e10cSrcweir *
35900e8ecSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one
45900e8ecSAndrew Rist * or more contributor license agreements. See the NOTICE file
55900e8ecSAndrew Rist * distributed with this work for additional information
65900e8ecSAndrew Rist * regarding copyright ownership. The ASF licenses this file
75900e8ecSAndrew Rist * to you under the Apache License, Version 2.0 (the
85900e8ecSAndrew Rist * "License"); you may not use this file except in compliance
95900e8ecSAndrew Rist * with the License. You may obtain a copy of the License at
105900e8ecSAndrew Rist *
115900e8ecSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0
125900e8ecSAndrew Rist *
135900e8ecSAndrew Rist * Unless required by applicable law or agreed to in writing,
145900e8ecSAndrew Rist * software distributed under the License is distributed on an
155900e8ecSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
165900e8ecSAndrew Rist * KIND, either express or implied. See the License for the
175900e8ecSAndrew Rist * specific language governing permissions and limitations
185900e8ecSAndrew Rist * under the License.
195900e8ecSAndrew Rist *
205900e8ecSAndrew Rist *************************************************************/
215900e8ecSAndrew Rist
225900e8ecSAndrew Rist
23cdf0e10cSrcweir
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_svtools.hxx"
26cdf0e10cSrcweir
27cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx>
28cdf0e10cSrcweir
29cdf0e10cSrcweir #include <unotools/charclass.hxx>
30cdf0e10cSrcweir #include <tools/debug.hxx>
31cdf0e10cSrcweir
32cdf0e10cSrcweir
33cdf0e10cSrcweir // ##########################################################################
34cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps
35cdf0e10cSrcweir // ##########################################################################
36cdf0e10cSrcweir static const char* strListBasicKeyWords[] = {
37cdf0e10cSrcweir "access",
38cdf0e10cSrcweir "alias",
39cdf0e10cSrcweir "and",
40cdf0e10cSrcweir "any",
41cdf0e10cSrcweir "append",
42cdf0e10cSrcweir "as",
43cdf0e10cSrcweir "base",
44cdf0e10cSrcweir "binary",
45cdf0e10cSrcweir "boolean",
46cdf0e10cSrcweir "byref",
47cdf0e10cSrcweir "byte",
48cdf0e10cSrcweir "byval",
49cdf0e10cSrcweir "call",
50cdf0e10cSrcweir "case",
51cdf0e10cSrcweir "cdecl",
52cdf0e10cSrcweir "classmodule",
53cdf0e10cSrcweir "close",
54cdf0e10cSrcweir "compare",
55cdf0e10cSrcweir "compatible",
56cdf0e10cSrcweir "const",
57cdf0e10cSrcweir "currency",
58cdf0e10cSrcweir "date",
59cdf0e10cSrcweir "declare",
60cdf0e10cSrcweir "defbool",
61cdf0e10cSrcweir "defcur",
62cdf0e10cSrcweir "defdate",
63cdf0e10cSrcweir "defdbl",
64cdf0e10cSrcweir "deferr",
65cdf0e10cSrcweir "defint",
66cdf0e10cSrcweir "deflng",
67cdf0e10cSrcweir "defobj",
68cdf0e10cSrcweir "defsng",
69cdf0e10cSrcweir "defstr",
70cdf0e10cSrcweir "defvar",
71cdf0e10cSrcweir "dim",
72cdf0e10cSrcweir "do",
73cdf0e10cSrcweir "double",
74cdf0e10cSrcweir "each",
75cdf0e10cSrcweir "else",
76cdf0e10cSrcweir "elseif",
77cdf0e10cSrcweir "end",
78cdf0e10cSrcweir "end enum",
79cdf0e10cSrcweir "end function",
80cdf0e10cSrcweir "end if",
81cdf0e10cSrcweir "end select",
82cdf0e10cSrcweir "end sub",
83cdf0e10cSrcweir "end type",
84cdf0e10cSrcweir "endif",
85cdf0e10cSrcweir "enum",
86cdf0e10cSrcweir "eqv",
87cdf0e10cSrcweir "erase",
88cdf0e10cSrcweir "error",
89cdf0e10cSrcweir "exit",
90cdf0e10cSrcweir "explicit",
91cdf0e10cSrcweir "for",
92cdf0e10cSrcweir "function",
93cdf0e10cSrcweir "get",
94cdf0e10cSrcweir "global",
95cdf0e10cSrcweir "gosub",
96cdf0e10cSrcweir "goto",
97cdf0e10cSrcweir "if",
98cdf0e10cSrcweir "imp",
99cdf0e10cSrcweir "implements",
100cdf0e10cSrcweir "in",
101cdf0e10cSrcweir "input",
102cdf0e10cSrcweir "integer",
103cdf0e10cSrcweir "is",
104cdf0e10cSrcweir "let",
105cdf0e10cSrcweir "lib",
106cdf0e10cSrcweir "like",
107cdf0e10cSrcweir "line",
108cdf0e10cSrcweir "line input",
109cdf0e10cSrcweir "local",
110cdf0e10cSrcweir "lock",
111cdf0e10cSrcweir "long",
112cdf0e10cSrcweir "loop",
113cdf0e10cSrcweir "lprint",
114cdf0e10cSrcweir "lset",
115cdf0e10cSrcweir "mod",
116cdf0e10cSrcweir "name",
117cdf0e10cSrcweir "new",
118cdf0e10cSrcweir "next",
119cdf0e10cSrcweir "not",
120cdf0e10cSrcweir "object",
121cdf0e10cSrcweir "on",
122cdf0e10cSrcweir "open",
123cdf0e10cSrcweir "option",
124cdf0e10cSrcweir "optional",
125cdf0e10cSrcweir "or",
126cdf0e10cSrcweir "output",
127cdf0e10cSrcweir "preserve",
128cdf0e10cSrcweir "print",
129cdf0e10cSrcweir "private",
130cdf0e10cSrcweir "property",
131cdf0e10cSrcweir "public",
132cdf0e10cSrcweir "random",
133cdf0e10cSrcweir "read",
134cdf0e10cSrcweir "redim",
135cdf0e10cSrcweir "rem",
136cdf0e10cSrcweir "resume",
137cdf0e10cSrcweir "return",
138cdf0e10cSrcweir "rset",
139cdf0e10cSrcweir "select",
140cdf0e10cSrcweir "set",
141cdf0e10cSrcweir "shared",
142cdf0e10cSrcweir "single",
143cdf0e10cSrcweir "static",
144cdf0e10cSrcweir "step",
145cdf0e10cSrcweir "stop",
146cdf0e10cSrcweir "string",
147cdf0e10cSrcweir "sub",
148cdf0e10cSrcweir "system",
149cdf0e10cSrcweir "text",
150cdf0e10cSrcweir "then",
151cdf0e10cSrcweir "to",
152cdf0e10cSrcweir "type",
153cdf0e10cSrcweir "typeof",
154cdf0e10cSrcweir "until",
155cdf0e10cSrcweir "variant",
156cdf0e10cSrcweir "wend",
157cdf0e10cSrcweir "while",
158cdf0e10cSrcweir "with",
159cdf0e10cSrcweir "write",
160cdf0e10cSrcweir "xor"
161cdf0e10cSrcweir };
162cdf0e10cSrcweir
163cdf0e10cSrcweir
164cdf0e10cSrcweir static const char* strListSqlKeyWords[] = {
165cdf0e10cSrcweir "all",
166cdf0e10cSrcweir "and",
167cdf0e10cSrcweir "any",
168cdf0e10cSrcweir "as",
169cdf0e10cSrcweir "asc",
170cdf0e10cSrcweir "avg",
171cdf0e10cSrcweir "between",
172cdf0e10cSrcweir "by",
173cdf0e10cSrcweir "cast",
174cdf0e10cSrcweir "corresponding",
175cdf0e10cSrcweir "count",
176cdf0e10cSrcweir "create",
177cdf0e10cSrcweir "cross",
178cdf0e10cSrcweir "delete",
179cdf0e10cSrcweir "desc",
180cdf0e10cSrcweir "distinct",
181cdf0e10cSrcweir "drop",
182cdf0e10cSrcweir "escape",
183cdf0e10cSrcweir "except",
184cdf0e10cSrcweir "exists",
185cdf0e10cSrcweir "false",
186cdf0e10cSrcweir "from",
187cdf0e10cSrcweir "full",
188cdf0e10cSrcweir "global",
189cdf0e10cSrcweir "group",
190cdf0e10cSrcweir "having",
191cdf0e10cSrcweir "in",
192cdf0e10cSrcweir "inner",
193cdf0e10cSrcweir "insert",
194cdf0e10cSrcweir "intersect",
195cdf0e10cSrcweir "into",
196cdf0e10cSrcweir "is",
197cdf0e10cSrcweir "join",
198cdf0e10cSrcweir "left",
199cdf0e10cSrcweir "like",
200cdf0e10cSrcweir "local",
201cdf0e10cSrcweir "match",
202cdf0e10cSrcweir "max",
203cdf0e10cSrcweir "min",
204cdf0e10cSrcweir "natural",
205cdf0e10cSrcweir "not",
206cdf0e10cSrcweir "null",
207cdf0e10cSrcweir "on",
208cdf0e10cSrcweir "or",
209cdf0e10cSrcweir "order",
210cdf0e10cSrcweir "outer",
211cdf0e10cSrcweir "right",
212cdf0e10cSrcweir "select",
213cdf0e10cSrcweir "set",
214cdf0e10cSrcweir "some",
215cdf0e10cSrcweir "sum",
216cdf0e10cSrcweir "table",
217cdf0e10cSrcweir "temporary",
218cdf0e10cSrcweir "true",
219cdf0e10cSrcweir "union",
220cdf0e10cSrcweir "unique",
221cdf0e10cSrcweir "unknown",
222cdf0e10cSrcweir "update",
223cdf0e10cSrcweir "using",
224cdf0e10cSrcweir "values",
225cdf0e10cSrcweir "where"
226cdf0e10cSrcweir };
227cdf0e10cSrcweir
228cdf0e10cSrcweir
compare_strings(const void * arg1,const void * arg2)229cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
230cdf0e10cSrcweir {
231cdf0e10cSrcweir return strcmp( (char *)arg1, *(char **)arg2 );
232cdf0e10cSrcweir }
233cdf0e10cSrcweir
234cdf0e10cSrcweir
235cdf0e10cSrcweir class LetterTable
236cdf0e10cSrcweir {
237cdf0e10cSrcweir bool IsLetterTab[256];
238cdf0e10cSrcweir
239cdf0e10cSrcweir public:
240cdf0e10cSrcweir LetterTable( void );
241cdf0e10cSrcweir
isLetter(sal_Unicode c)242cdf0e10cSrcweir inline bool isLetter( sal_Unicode c )
243cdf0e10cSrcweir {
244cdf0e10cSrcweir bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
245cdf0e10cSrcweir return bRet;
246cdf0e10cSrcweir }
247cdf0e10cSrcweir bool isLetterUnicode( sal_Unicode c );
248cdf0e10cSrcweir };
249cdf0e10cSrcweir
250cdf0e10cSrcweir class BasicSimpleCharClass
251cdf0e10cSrcweir {
252cdf0e10cSrcweir static LetterTable aLetterTable;
253cdf0e10cSrcweir
254cdf0e10cSrcweir public:
isAlpha(sal_Unicode c,bool bCompatible)255cdf0e10cSrcweir static sal_Bool isAlpha( sal_Unicode c, bool bCompatible )
256cdf0e10cSrcweir {
257cdf0e10cSrcweir sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
258cdf0e10cSrcweir || (bCompatible && aLetterTable.isLetter( c ));
259cdf0e10cSrcweir return bRet;
260cdf0e10cSrcweir }
261cdf0e10cSrcweir
isDigit(sal_Unicode c)262cdf0e10cSrcweir static sal_Bool isDigit( sal_Unicode c )
263cdf0e10cSrcweir {
264cdf0e10cSrcweir sal_Bool bRet = (c >= '0' && c <= '9');
265cdf0e10cSrcweir return bRet;
266cdf0e10cSrcweir }
267cdf0e10cSrcweir
isAlphaNumeric(sal_Unicode c,bool bCompatible)268cdf0e10cSrcweir static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible )
269cdf0e10cSrcweir {
270cdf0e10cSrcweir sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible );
271cdf0e10cSrcweir return bRet;
272cdf0e10cSrcweir }
273cdf0e10cSrcweir };
274cdf0e10cSrcweir
275cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable;
276cdf0e10cSrcweir
LetterTable(void)277cdf0e10cSrcweir LetterTable::LetterTable( void )
278cdf0e10cSrcweir {
279cdf0e10cSrcweir for( int i = 0 ; i < 256 ; ++i )
280cdf0e10cSrcweir IsLetterTab[i] = false;
281cdf0e10cSrcweir
282cdf0e10cSrcweir IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT
283cdf0e10cSrcweir IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT
284cdf0e10cSrcweir IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
285cdf0e10cSrcweir IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE
286cdf0e10cSrcweir IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS
287cdf0e10cSrcweir IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE
288cdf0e10cSrcweir IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE
289cdf0e10cSrcweir IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA
290cdf0e10cSrcweir IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT
291cdf0e10cSrcweir IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT
292cdf0e10cSrcweir IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
293cdf0e10cSrcweir IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS
294cdf0e10cSrcweir IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT
295cdf0e10cSrcweir IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT
296cdf0e10cSrcweir IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
297cdf0e10cSrcweir IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS
298cdf0e10cSrcweir IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH
299cdf0e10cSrcweir IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE
300cdf0e10cSrcweir IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT
301cdf0e10cSrcweir IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT
302cdf0e10cSrcweir IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
303cdf0e10cSrcweir IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE
304cdf0e10cSrcweir IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS
305cdf0e10cSrcweir IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE
306cdf0e10cSrcweir IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT
307cdf0e10cSrcweir IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT
308cdf0e10cSrcweir IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
309cdf0e10cSrcweir IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS
310cdf0e10cSrcweir IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT
311cdf0e10cSrcweir IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN
312cdf0e10cSrcweir IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S
313cdf0e10cSrcweir IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT
314cdf0e10cSrcweir IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT
315cdf0e10cSrcweir IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
316cdf0e10cSrcweir IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE
317cdf0e10cSrcweir IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS
318cdf0e10cSrcweir IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE
319cdf0e10cSrcweir IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE
320cdf0e10cSrcweir IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA
321cdf0e10cSrcweir IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT
322cdf0e10cSrcweir IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT
323cdf0e10cSrcweir IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
324cdf0e10cSrcweir IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS
325cdf0e10cSrcweir IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT
326cdf0e10cSrcweir IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT
327cdf0e10cSrcweir IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
328cdf0e10cSrcweir IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS
329cdf0e10cSrcweir IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH
330cdf0e10cSrcweir IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE
331cdf0e10cSrcweir IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT
332cdf0e10cSrcweir IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT
333cdf0e10cSrcweir IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
334cdf0e10cSrcweir IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE
335cdf0e10cSrcweir IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS
336cdf0e10cSrcweir IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR
337cdf0e10cSrcweir IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT
338cdf0e10cSrcweir IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT
339cdf0e10cSrcweir IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
340cdf0e10cSrcweir IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS
341cdf0e10cSrcweir IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT
342cdf0e10cSrcweir IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN
343cdf0e10cSrcweir IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS
344cdf0e10cSrcweir }
345cdf0e10cSrcweir
isLetterUnicode(sal_Unicode c)346cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c )
347cdf0e10cSrcweir {
348cdf0e10cSrcweir static CharClass* pCharClass = NULL;
349cdf0e10cSrcweir if( pCharClass == NULL )
350cdf0e10cSrcweir pCharClass = new CharClass( Application::GetSettings().GetLocale() );
351cdf0e10cSrcweir String aStr( c );
352cdf0e10cSrcweir bool bRet = pCharClass->isLetter( aStr, 0 );
353cdf0e10cSrcweir return bRet;
354cdf0e10cSrcweir }
355cdf0e10cSrcweir
356cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen
testCharFlags(sal_Unicode c,sal_uInt16 nTestFlags)357cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
358cdf0e10cSrcweir {
359cdf0e10cSrcweir bool bRet = false;
360cdf0e10cSrcweir if( c != 0 && c <= 255 )
361cdf0e10cSrcweir {
362cdf0e10cSrcweir bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
363cdf0e10cSrcweir }
364cdf0e10cSrcweir else if( c > 255 )
365cdf0e10cSrcweir {
366cdf0e10cSrcweir bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
367cdf0e10cSrcweir ? BasicSimpleCharClass::isAlpha( c, true ) : false;
368cdf0e10cSrcweir }
369cdf0e10cSrcweir return bRet;
370cdf0e10cSrcweir }
371cdf0e10cSrcweir
setKeyWords(const char ** ppKeyWords,sal_uInt16 nCount)372cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
373cdf0e10cSrcweir {
374cdf0e10cSrcweir ppListKeyWords = ppKeyWords;
375cdf0e10cSrcweir nKeyWordCount = nCount;
376cdf0e10cSrcweir }
377cdf0e10cSrcweir
378cdf0e10cSrcweir // Neues Token holen
getNextToken(TokenTypes & reType,const sal_Unicode * & rpStartPos,const sal_Unicode * & rpEndPos)379cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
380cdf0e10cSrcweir /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
381cdf0e10cSrcweir {
382cdf0e10cSrcweir reType = TT_UNKNOWN;
383cdf0e10cSrcweir
384cdf0e10cSrcweir // Position merken
385cdf0e10cSrcweir rpStartPos = mpActualPos;
386cdf0e10cSrcweir
387cdf0e10cSrcweir // Zeichen untersuchen
388cdf0e10cSrcweir sal_Unicode c = peekChar();
389cdf0e10cSrcweir if( c == CHAR_EOF )
390cdf0e10cSrcweir return sal_False;
391cdf0e10cSrcweir
392cdf0e10cSrcweir // Zeichen lesen
393cdf0e10cSrcweir getChar();
394cdf0e10cSrcweir
395cdf0e10cSrcweir //*** Alle Moeglichkeiten durchgehen ***
396cdf0e10cSrcweir // Space?
397cdf0e10cSrcweir if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
398cdf0e10cSrcweir {
399cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
400cdf0e10cSrcweir getChar();
401cdf0e10cSrcweir
402cdf0e10cSrcweir reType = TT_WHITESPACE;
403cdf0e10cSrcweir }
404cdf0e10cSrcweir
405cdf0e10cSrcweir // Identifier?
406cdf0e10cSrcweir else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
407cdf0e10cSrcweir {
408cdf0e10cSrcweir sal_Bool bIdentifierChar;
409cdf0e10cSrcweir do
410cdf0e10cSrcweir {
411cdf0e10cSrcweir // Naechstes Zeichen holen
412cdf0e10cSrcweir c = peekChar();
413cdf0e10cSrcweir bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
414cdf0e10cSrcweir if( bIdentifierChar )
415cdf0e10cSrcweir getChar();
416cdf0e10cSrcweir }
417cdf0e10cSrcweir while( bIdentifierChar );
418cdf0e10cSrcweir
419cdf0e10cSrcweir reType = TT_IDENTIFIER;
420cdf0e10cSrcweir
421cdf0e10cSrcweir // Schluesselwort-Tabelle
422cdf0e10cSrcweir if (ppListKeyWords != NULL)
423cdf0e10cSrcweir {
424cdf0e10cSrcweir int nCount = mpActualPos - rpStartPos;
425cdf0e10cSrcweir
426cdf0e10cSrcweir // No keyword if string contains char > 255
427cdf0e10cSrcweir bool bCanBeKeyword = true;
428cdf0e10cSrcweir for( int i = 0 ; i < nCount ; i++ )
429cdf0e10cSrcweir {
430cdf0e10cSrcweir if( rpStartPos[i] > 255 )
431cdf0e10cSrcweir {
432cdf0e10cSrcweir bCanBeKeyword = false;
433cdf0e10cSrcweir break;
434cdf0e10cSrcweir }
435cdf0e10cSrcweir }
436cdf0e10cSrcweir
437cdf0e10cSrcweir if( bCanBeKeyword )
438cdf0e10cSrcweir {
439cdf0e10cSrcweir String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
440cdf0e10cSrcweir ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
441cdf0e10cSrcweir aByteStr.ToLowerAscii();
442cdf0e10cSrcweir if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
443cdf0e10cSrcweir compare_strings ) )
444cdf0e10cSrcweir {
445cdf0e10cSrcweir reType = TT_KEYWORDS;
446cdf0e10cSrcweir
447cdf0e10cSrcweir if ( aByteStr.Equals( "rem" ) )
448cdf0e10cSrcweir {
449cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
450cdf0e10cSrcweir sal_Unicode cPeek = peekChar();
451cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
452cdf0e10cSrcweir {
453cdf0e10cSrcweir c = getChar();
454cdf0e10cSrcweir cPeek = peekChar();
455cdf0e10cSrcweir }
456cdf0e10cSrcweir
457cdf0e10cSrcweir reType = TT_COMMENT;
458cdf0e10cSrcweir }
459cdf0e10cSrcweir }
460cdf0e10cSrcweir }
461cdf0e10cSrcweir }
462cdf0e10cSrcweir }
463cdf0e10cSrcweir
464cdf0e10cSrcweir // Operator?
465cdf0e10cSrcweir // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
466cdf0e10cSrcweir else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
467cdf0e10cSrcweir {
468cdf0e10cSrcweir // paramters for SQL view
469cdf0e10cSrcweir if ( (c==':') || (c=='?'))
470cdf0e10cSrcweir {
471cdf0e10cSrcweir if (c!='?')
472cdf0e10cSrcweir {
473cdf0e10cSrcweir sal_Bool bIdentifierChar;
474cdf0e10cSrcweir do
475cdf0e10cSrcweir {
476cdf0e10cSrcweir // Naechstes Zeichen holen
477cdf0e10cSrcweir c = peekChar();
478cdf0e10cSrcweir bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true );
479cdf0e10cSrcweir if( bIdentifierChar )
480cdf0e10cSrcweir getChar();
481cdf0e10cSrcweir }
482cdf0e10cSrcweir while( bIdentifierChar );
483cdf0e10cSrcweir }
484cdf0e10cSrcweir reType = TT_PARAMETER;
485cdf0e10cSrcweir }
486*a9ab3c7bSHerbert Dürr else if( c=='-' )
487cdf0e10cSrcweir {
488cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar();
489cdf0e10cSrcweir if (cPeekNext=='-')
490cdf0e10cSrcweir {
491cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
492cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
493cdf0e10cSrcweir {
494cdf0e10cSrcweir getChar();
495cdf0e10cSrcweir cPeekNext = peekChar();
496cdf0e10cSrcweir }
497cdf0e10cSrcweir reType = TT_COMMENT;
498cdf0e10cSrcweir }
499cdf0e10cSrcweir }
500cdf0e10cSrcweir else if (c=='/')
501cdf0e10cSrcweir {
502cdf0e10cSrcweir sal_Unicode cPeekNext = peekChar();
503cdf0e10cSrcweir if (cPeekNext=='/')
504cdf0e10cSrcweir {
505cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
506cdf0e10cSrcweir while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
507cdf0e10cSrcweir {
508cdf0e10cSrcweir getChar();
509cdf0e10cSrcweir cPeekNext = peekChar();
510cdf0e10cSrcweir }
511cdf0e10cSrcweir reType = TT_COMMENT;
512cdf0e10cSrcweir }
513cdf0e10cSrcweir }
514cdf0e10cSrcweir else
515cdf0e10cSrcweir {
516cdf0e10cSrcweir // Kommentar ?
517cdf0e10cSrcweir if ( c == '\'' )
518cdf0e10cSrcweir {
519cdf0e10cSrcweir c = getChar(); // '/' entfernen
520cdf0e10cSrcweir
521cdf0e10cSrcweir // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
522cdf0e10cSrcweir sal_Unicode cPeek = c;
523cdf0e10cSrcweir while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
524cdf0e10cSrcweir {
525cdf0e10cSrcweir getChar();
526cdf0e10cSrcweir cPeek = peekChar();
527cdf0e10cSrcweir }
528cdf0e10cSrcweir
529cdf0e10cSrcweir reType = TT_COMMENT;
530cdf0e10cSrcweir }
531cdf0e10cSrcweir
532cdf0e10cSrcweir // Echter Operator, kann hier einfach behandelt werden,
533cdf0e10cSrcweir // da nicht der wirkliche Operator, wie z.B. += interessiert,
534cdf0e10cSrcweir // sondern nur die Tatsache, dass es sich um einen handelt.
535cdf0e10cSrcweir if( reType != TT_COMMENT )
536cdf0e10cSrcweir {
537cdf0e10cSrcweir reType = TT_OPERATOR;
538cdf0e10cSrcweir }
539cdf0e10cSrcweir
540cdf0e10cSrcweir }
541cdf0e10cSrcweir }
542cdf0e10cSrcweir
543cdf0e10cSrcweir // Objekt-Trenner? Muss vor Number abgehandelt werden
544cdf0e10cSrcweir else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
545cdf0e10cSrcweir {
546cdf0e10cSrcweir reType = TT_OPERATOR;
547cdf0e10cSrcweir }
548cdf0e10cSrcweir
549cdf0e10cSrcweir // Zahl?
550cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
551cdf0e10cSrcweir {
552cdf0e10cSrcweir reType = TT_NUMBER;
553cdf0e10cSrcweir
554cdf0e10cSrcweir // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
555cdf0e10cSrcweir int nRadix = 10;
556cdf0e10cSrcweir
557cdf0e10cSrcweir // Ist es eine Hex- oder Oct-Zahl?
558cdf0e10cSrcweir if( c == '&' )
559cdf0e10cSrcweir {
560cdf0e10cSrcweir // Octal?
561cdf0e10cSrcweir if( peekChar() == 'o' || peekChar() == 'O' )
562cdf0e10cSrcweir {
563cdf0e10cSrcweir // o entfernen
564cdf0e10cSrcweir getChar();
565cdf0e10cSrcweir nRadix = 8; // Octal-Basis
566cdf0e10cSrcweir
567cdf0e10cSrcweir // Alle Ziffern einlesen
568cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
569cdf0e10cSrcweir c = getChar();
570cdf0e10cSrcweir }
571cdf0e10cSrcweir // Hex?
572cdf0e10cSrcweir else if( peekChar() == 'h' || peekChar() == 'H' )
573cdf0e10cSrcweir {
574cdf0e10cSrcweir // x entfernen
575cdf0e10cSrcweir getChar();
576cdf0e10cSrcweir nRadix = 16; // Hex-Basis
577cdf0e10cSrcweir
578cdf0e10cSrcweir // Alle Ziffern einlesen und puffern
579cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
580cdf0e10cSrcweir c = getChar();
581cdf0e10cSrcweir }
582cdf0e10cSrcweir else
583cdf0e10cSrcweir {
584cdf0e10cSrcweir reType = TT_OPERATOR;
585cdf0e10cSrcweir }
586cdf0e10cSrcweir }
587cdf0e10cSrcweir
588cdf0e10cSrcweir // Wenn nicht Oct oder Hex als double ansehen
589cdf0e10cSrcweir if( reType == TT_NUMBER && nRadix == 10 )
590cdf0e10cSrcweir {
591cdf0e10cSrcweir // Flag, ob das letzte Zeichen ein Exponent war
592cdf0e10cSrcweir sal_Bool bAfterExpChar = sal_False;
593cdf0e10cSrcweir
594cdf0e10cSrcweir // Alle Ziffern einlesen
595cdf0e10cSrcweir while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
596cdf0e10cSrcweir (bAfterExpChar && peekChar() == '+' ) ||
597cdf0e10cSrcweir (bAfterExpChar && peekChar() == '-' ) )
598cdf0e10cSrcweir // Nach Exponent auch +/- OK
599cdf0e10cSrcweir {
600cdf0e10cSrcweir c = getChar(); // Zeichen lesen
601cdf0e10cSrcweir bAfterExpChar = ( c == 'e' || c == 'E' );
602cdf0e10cSrcweir }
603cdf0e10cSrcweir }
604cdf0e10cSrcweir
605cdf0e10cSrcweir // reType = TT_NUMBER;
606cdf0e10cSrcweir }
607cdf0e10cSrcweir
608cdf0e10cSrcweir // String?
609cdf0e10cSrcweir else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
610cdf0e10cSrcweir {
611cdf0e10cSrcweir // Merken, welches Zeichen den String eroeffnet hat
612cdf0e10cSrcweir sal_Unicode cEndString = c;
613cdf0e10cSrcweir if( c == '[' )
614cdf0e10cSrcweir cEndString = ']';
615cdf0e10cSrcweir
616cdf0e10cSrcweir // Alle Ziffern einlesen und puffern
617cdf0e10cSrcweir while( peekChar() != cEndString )
618cdf0e10cSrcweir {
619cdf0e10cSrcweir // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
620cdf0e10cSrcweir if( peekChar() == CHAR_EOF )
621cdf0e10cSrcweir {
622cdf0e10cSrcweir // ERROR: unterminated string literal
623cdf0e10cSrcweir reType = TT_ERROR;
624cdf0e10cSrcweir break;
625cdf0e10cSrcweir }
626cdf0e10cSrcweir c = getChar();
627cdf0e10cSrcweir if( testCharFlags( c, CHAR_EOL ) == sal_True )
628cdf0e10cSrcweir {
629cdf0e10cSrcweir // ERROR: unterminated string literal
630cdf0e10cSrcweir reType = TT_ERROR;
631cdf0e10cSrcweir break;
632cdf0e10cSrcweir }
633cdf0e10cSrcweir }
634cdf0e10cSrcweir
635cdf0e10cSrcweir // Zeichen lesen
636cdf0e10cSrcweir if( reType != TT_ERROR )
637cdf0e10cSrcweir {
638cdf0e10cSrcweir getChar();
639cdf0e10cSrcweir if( cEndString == ']' )
640cdf0e10cSrcweir reType = TT_IDENTIFIER;
641cdf0e10cSrcweir else
642cdf0e10cSrcweir reType = TT_STRING;
643cdf0e10cSrcweir }
644cdf0e10cSrcweir }
645cdf0e10cSrcweir
646cdf0e10cSrcweir // Zeilenende?
647cdf0e10cSrcweir else if( testCharFlags( c, CHAR_EOL ) == sal_True )
648cdf0e10cSrcweir {
649cdf0e10cSrcweir // Falls ein weiteres anderes EOL-Char folgt, weg damit
650cdf0e10cSrcweir sal_Unicode cNext = peekChar();
651cdf0e10cSrcweir if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
652cdf0e10cSrcweir getChar();
653cdf0e10cSrcweir
654cdf0e10cSrcweir // Positions-Daten auf Zeilen-Beginn setzen
655cdf0e10cSrcweir nCol = 0;
656cdf0e10cSrcweir nLine++;
657cdf0e10cSrcweir
658cdf0e10cSrcweir reType = TT_EOL;
659cdf0e10cSrcweir }
660cdf0e10cSrcweir
661cdf0e10cSrcweir // Alles andere bleibt TT_UNKNOWN
662cdf0e10cSrcweir
663cdf0e10cSrcweir
664cdf0e10cSrcweir // End-Position eintragen
665cdf0e10cSrcweir rpEndPos = mpActualPos;
666cdf0e10cSrcweir return sal_True;
667cdf0e10cSrcweir }
668cdf0e10cSrcweir
getTokStr(const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)669cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr
670cdf0e10cSrcweir ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
671cdf0e10cSrcweir {
672cdf0e10cSrcweir return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
673cdf0e10cSrcweir }
674cdf0e10cSrcweir
675cdf0e10cSrcweir #ifdef DBG_UTIL
676cdf0e10cSrcweir // TEST: Token ausgeben
getFullTokenStr(TokenTypes eType,const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)677cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
678cdf0e10cSrcweir /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
679cdf0e10cSrcweir {
680cdf0e10cSrcweir String aOut;
681cdf0e10cSrcweir switch( eType )
682cdf0e10cSrcweir {
683cdf0e10cSrcweir case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
684cdf0e10cSrcweir case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
685cdf0e10cSrcweir case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
686cdf0e10cSrcweir case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
687cdf0e10cSrcweir case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
688cdf0e10cSrcweir case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
689cdf0e10cSrcweir case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
690cdf0e10cSrcweir case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
691cdf0e10cSrcweir case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
692cdf0e10cSrcweir case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
693cdf0e10cSrcweir case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
694cdf0e10cSrcweir }
695cdf0e10cSrcweir if( eType != TT_EOL )
696cdf0e10cSrcweir {
697cdf0e10cSrcweir aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
698cdf0e10cSrcweir }
699cdf0e10cSrcweir aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
700cdf0e10cSrcweir return aOut;
701cdf0e10cSrcweir }
702cdf0e10cSrcweir #endif
703cdf0e10cSrcweir
SimpleTokenizer_Impl(HighlighterLanguage aLang)704cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
705cdf0e10cSrcweir {
706cdf0e10cSrcweir memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
707cdf0e10cSrcweir
708cdf0e10cSrcweir // Zeichen-Tabelle fuellen
709cdf0e10cSrcweir sal_uInt16 i;
710cdf0e10cSrcweir
711cdf0e10cSrcweir // Zulaessige Zeichen fuer Identifier
712cdf0e10cSrcweir sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
713cdf0e10cSrcweir for( i = 'a' ; i <= 'z' ; i++ )
714cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask;
715cdf0e10cSrcweir for( i = 'A' ; i <= 'Z' ; i++ )
716cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask;
717cdf0e10cSrcweir // '_' extra eintragen
718cdf0e10cSrcweir aCharTypeTab[(int)'_'] |= nHelpMask;
719cdf0e10cSrcweir // AB 23.6.97: '$' ist auch erlaubt
720cdf0e10cSrcweir aCharTypeTab[(int)'$'] |= nHelpMask;
721cdf0e10cSrcweir
722cdf0e10cSrcweir // Ziffern (Identifier und Number ist moeglich)
723cdf0e10cSrcweir nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
724cdf0e10cSrcweir CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
725cdf0e10cSrcweir for( i = '0' ; i <= '9' ; i++ )
726cdf0e10cSrcweir aCharTypeTab[i] |= nHelpMask;
727cdf0e10cSrcweir
728cdf0e10cSrcweir // e und E sowie . von Hand ergaenzen
729cdf0e10cSrcweir aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
730cdf0e10cSrcweir aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
731cdf0e10cSrcweir aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
732cdf0e10cSrcweir aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
733cdf0e10cSrcweir
734cdf0e10cSrcweir // Hex-Ziffern
735cdf0e10cSrcweir for( i = 'a' ; i <= 'f' ; i++ )
736cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
737cdf0e10cSrcweir for( i = 'A' ; i <= 'F' ; i++ )
738cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
739cdf0e10cSrcweir
740cdf0e10cSrcweir // Oct-Ziffern
741cdf0e10cSrcweir for( i = '0' ; i <= '7' ; i++ )
742cdf0e10cSrcweir aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
743cdf0e10cSrcweir
744cdf0e10cSrcweir // String-Beginn/End-Zeichen
745cdf0e10cSrcweir aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
746cdf0e10cSrcweir aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
747cdf0e10cSrcweir aCharTypeTab[(int)'['] |= CHAR_START_STRING;
748cdf0e10cSrcweir aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
749cdf0e10cSrcweir
750cdf0e10cSrcweir // Operator-Zeichen
751cdf0e10cSrcweir aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
752cdf0e10cSrcweir aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
753cdf0e10cSrcweir // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
754cdf0e10cSrcweir aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
755cdf0e10cSrcweir aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
756cdf0e10cSrcweir aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
757cdf0e10cSrcweir aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
758cdf0e10cSrcweir aCharTypeTab[(int)','] |= CHAR_OPERATOR;
759cdf0e10cSrcweir aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
760cdf0e10cSrcweir aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
761cdf0e10cSrcweir aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
762cdf0e10cSrcweir aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
763cdf0e10cSrcweir aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
764cdf0e10cSrcweir aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
765cdf0e10cSrcweir aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
766cdf0e10cSrcweir aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
767cdf0e10cSrcweir aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
768cdf0e10cSrcweir aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
769cdf0e10cSrcweir aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
770cdf0e10cSrcweir aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
771cdf0e10cSrcweir // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
772cdf0e10cSrcweir aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
773cdf0e10cSrcweir aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
774cdf0e10cSrcweir
775cdf0e10cSrcweir // Space
776cdf0e10cSrcweir aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
777cdf0e10cSrcweir aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
778cdf0e10cSrcweir
779cdf0e10cSrcweir // Zeilen-Ende-Zeichen
780cdf0e10cSrcweir aCharTypeTab[(int)'\r'] |= CHAR_EOL;
781cdf0e10cSrcweir aCharTypeTab[(int)'\n'] |= CHAR_EOL;
782cdf0e10cSrcweir
783cdf0e10cSrcweir ppListKeyWords = NULL;
784cdf0e10cSrcweir }
785cdf0e10cSrcweir
~SimpleTokenizer_Impl(void)786cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
787cdf0e10cSrcweir {
788cdf0e10cSrcweir }
789cdf0e10cSrcweir
getSimpleTokenizer(void)790cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void )
791cdf0e10cSrcweir {
792cdf0e10cSrcweir static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
793cdf0e10cSrcweir if( !pSimpleTokenizer )
794cdf0e10cSrcweir pSimpleTokenizer = new SimpleTokenizer_Impl();
795cdf0e10cSrcweir return pSimpleTokenizer;
796cdf0e10cSrcweir }
797cdf0e10cSrcweir
798cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
parseLine(sal_uInt32 nParseLine,const String * aSource)799cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
800cdf0e10cSrcweir {
801cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen
802cdf0e10cSrcweir mpStringBegin = mpActualPos = aSource->GetBuffer();
803cdf0e10cSrcweir
804cdf0e10cSrcweir // Zeile und Spalte initialisieren
805cdf0e10cSrcweir nLine = nParseLine;
806cdf0e10cSrcweir nCol = 0L;
807cdf0e10cSrcweir
808cdf0e10cSrcweir // Variablen fuer die Out-Parameter
809cdf0e10cSrcweir TokenTypes eType;
810cdf0e10cSrcweir const sal_Unicode* pStartPos;
811cdf0e10cSrcweir const sal_Unicode* pEndPos;
812cdf0e10cSrcweir
813cdf0e10cSrcweir // Schleife ueber alle Tokens
814cdf0e10cSrcweir sal_uInt16 nTokenCount = 0;
815cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) )
816cdf0e10cSrcweir nTokenCount++;
817cdf0e10cSrcweir
818cdf0e10cSrcweir return nTokenCount;
819cdf0e10cSrcweir }
820cdf0e10cSrcweir
getHighlightPortions(sal_uInt32 nParseLine,const String & rLine,HighlightPortions & portions)821cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
822cdf0e10cSrcweir /*out*/HighlightPortions& portions )
823cdf0e10cSrcweir {
824cdf0e10cSrcweir // Position auf den Anfang des Source-Strings setzen
825cdf0e10cSrcweir mpStringBegin = mpActualPos = rLine.GetBuffer();
826cdf0e10cSrcweir
827cdf0e10cSrcweir // Zeile und Spalte initialisieren
828cdf0e10cSrcweir nLine = nParseLine;
829cdf0e10cSrcweir nCol = 0L;
830cdf0e10cSrcweir
831cdf0e10cSrcweir // Variablen fuer die Out-Parameter
832cdf0e10cSrcweir TokenTypes eType;
833cdf0e10cSrcweir const sal_Unicode* pStartPos;
834cdf0e10cSrcweir const sal_Unicode* pEndPos;
835cdf0e10cSrcweir
836cdf0e10cSrcweir // Schleife ueber alle Tokens
837cdf0e10cSrcweir while( getNextToken( eType, pStartPos, pEndPos ) )
838cdf0e10cSrcweir {
839cdf0e10cSrcweir HighlightPortion portion;
840cdf0e10cSrcweir
841cdf0e10cSrcweir portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
842cdf0e10cSrcweir portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
843cdf0e10cSrcweir portion.tokenType = eType;
844cdf0e10cSrcweir
845cdf0e10cSrcweir portions.push_back(portion);
846cdf0e10cSrcweir }
847cdf0e10cSrcweir }
848cdf0e10cSrcweir
849cdf0e10cSrcweir
850cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////////
851cdf0e10cSrcweir // Implementierung des SyntaxHighlighter
852cdf0e10cSrcweir
SyntaxHighlighter()853cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter()
854cdf0e10cSrcweir {
855cdf0e10cSrcweir m_pSimpleTokenizer = 0;
856cdf0e10cSrcweir m_pKeyWords = NULL;
857cdf0e10cSrcweir m_nKeyWordCount = 0;
858cdf0e10cSrcweir }
859cdf0e10cSrcweir
~SyntaxHighlighter()860cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter()
861cdf0e10cSrcweir {
862cdf0e10cSrcweir delete m_pSimpleTokenizer;
863cdf0e10cSrcweir delete m_pKeyWords;
864cdf0e10cSrcweir }
865cdf0e10cSrcweir
initialize(HighlighterLanguage eLanguage_)866cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
867cdf0e10cSrcweir {
868cdf0e10cSrcweir eLanguage = eLanguage_;
869cdf0e10cSrcweir delete m_pSimpleTokenizer;
870cdf0e10cSrcweir m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
871cdf0e10cSrcweir
872cdf0e10cSrcweir switch (eLanguage)
873cdf0e10cSrcweir {
874cdf0e10cSrcweir case HIGHLIGHT_BASIC:
875cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
876cdf0e10cSrcweir sizeof( strListBasicKeyWords ) / sizeof( char* ));
877cdf0e10cSrcweir break;
878cdf0e10cSrcweir case HIGHLIGHT_SQL:
879cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
880cdf0e10cSrcweir sizeof( strListSqlKeyWords ) / sizeof( char* ));
881cdf0e10cSrcweir break;
882cdf0e10cSrcweir default:
883cdf0e10cSrcweir m_pSimpleTokenizer->setKeyWords( NULL, 0 );
884cdf0e10cSrcweir }
885cdf0e10cSrcweir }
886cdf0e10cSrcweir
notifyChange(sal_uInt32 nLine,sal_Int32 nLineCountDifference,const String * pChangedLines,sal_uInt32 nArrayLength)887cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
888cdf0e10cSrcweir const String* pChangedLines, sal_uInt32 nArrayLength)
889cdf0e10cSrcweir {
890cdf0e10cSrcweir (void)nLineCountDifference;
891cdf0e10cSrcweir
892cdf0e10cSrcweir for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
893cdf0e10cSrcweir m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
894cdf0e10cSrcweir
895cdf0e10cSrcweir return Range( nLine, nLine + nArrayLength-1 );
896cdf0e10cSrcweir }
897cdf0e10cSrcweir
getHighlightPortions(sal_uInt32 nLine,const String & rLine,HighlightPortions & portions)898cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
899cdf0e10cSrcweir /*out*/HighlightPortions& portions )
900cdf0e10cSrcweir {
901cdf0e10cSrcweir m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
902cdf0e10cSrcweir }
903