15900e8ecSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
35900e8ecSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
45900e8ecSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
55900e8ecSAndrew Rist  * distributed with this work for additional information
65900e8ecSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
75900e8ecSAndrew Rist  * to you under the Apache License, Version 2.0 (the
85900e8ecSAndrew Rist  * "License"); you may not use this file except in compliance
95900e8ecSAndrew Rist  * with the License.  You may obtain a copy of the License at
105900e8ecSAndrew Rist  *
115900e8ecSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
125900e8ecSAndrew Rist  *
135900e8ecSAndrew Rist  * Unless required by applicable law or agreed to in writing,
145900e8ecSAndrew Rist  * software distributed under the License is distributed on an
155900e8ecSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
165900e8ecSAndrew Rist  * KIND, either express or implied.  See the License for the
175900e8ecSAndrew Rist  * specific language governing permissions and limitations
185900e8ecSAndrew Rist  * under the License.
195900e8ecSAndrew Rist  *
205900e8ecSAndrew Rist  *************************************************************/
215900e8ecSAndrew Rist 
225900e8ecSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_svtools.hxx"
26cdf0e10cSrcweir 
27cdf0e10cSrcweir #include <svtools/syntaxhighlight.hxx>
28cdf0e10cSrcweir 
29cdf0e10cSrcweir #include <unotools/charclass.hxx>
30cdf0e10cSrcweir #include <tools/debug.hxx>
31cdf0e10cSrcweir 
32cdf0e10cSrcweir 
33cdf0e10cSrcweir // ##########################################################################
34cdf0e10cSrcweir // ATTENTION: all these words needs to be in small caps
35cdf0e10cSrcweir // ##########################################################################
36cdf0e10cSrcweir static const char* strListBasicKeyWords[] = {
37cdf0e10cSrcweir 	"access",
38cdf0e10cSrcweir 	"alias",
39cdf0e10cSrcweir 	"and",
40cdf0e10cSrcweir 	"any",
41cdf0e10cSrcweir 	"append",
42cdf0e10cSrcweir 	"as",
43cdf0e10cSrcweir 	"base",
44cdf0e10cSrcweir 	"binary",
45cdf0e10cSrcweir 	"boolean",
46cdf0e10cSrcweir 	"byref",
47cdf0e10cSrcweir 	"byte",
48cdf0e10cSrcweir 	"byval",
49cdf0e10cSrcweir 	"call",
50cdf0e10cSrcweir 	"case",
51cdf0e10cSrcweir 	"cdecl",
52cdf0e10cSrcweir 	"classmodule",
53cdf0e10cSrcweir 	"close",
54cdf0e10cSrcweir 	"compare",
55cdf0e10cSrcweir 	"compatible",
56cdf0e10cSrcweir 	"const",
57cdf0e10cSrcweir 	"currency",
58cdf0e10cSrcweir 	"date",
59cdf0e10cSrcweir 	"declare",
60cdf0e10cSrcweir 	"defbool",
61cdf0e10cSrcweir 	"defcur",
62cdf0e10cSrcweir 	"defdate",
63cdf0e10cSrcweir 	"defdbl",
64cdf0e10cSrcweir 	"deferr",
65cdf0e10cSrcweir 	"defint",
66cdf0e10cSrcweir 	"deflng",
67cdf0e10cSrcweir 	"defobj",
68cdf0e10cSrcweir 	"defsng",
69cdf0e10cSrcweir 	"defstr",
70cdf0e10cSrcweir 	"defvar",
71cdf0e10cSrcweir 	"dim",
72cdf0e10cSrcweir 	"do",
73cdf0e10cSrcweir 	"double",
74cdf0e10cSrcweir 	"each",
75cdf0e10cSrcweir 	"else",
76cdf0e10cSrcweir 	"elseif",
77cdf0e10cSrcweir 	"end",
78cdf0e10cSrcweir 	"end enum",
79cdf0e10cSrcweir 	"end function",
80cdf0e10cSrcweir 	"end if",
81cdf0e10cSrcweir 	"end select",
82cdf0e10cSrcweir 	"end sub",
83cdf0e10cSrcweir 	"end type",
84cdf0e10cSrcweir 	"endif",
85cdf0e10cSrcweir 	"enum",
86cdf0e10cSrcweir 	"eqv",
87cdf0e10cSrcweir 	"erase",
88cdf0e10cSrcweir 	"error",
89cdf0e10cSrcweir 	"exit",
90cdf0e10cSrcweir 	"explicit",
91cdf0e10cSrcweir 	"for",
92cdf0e10cSrcweir 	"function",
93cdf0e10cSrcweir 	"get",
94cdf0e10cSrcweir 	"global",
95cdf0e10cSrcweir 	"gosub",
96cdf0e10cSrcweir 	"goto",
97cdf0e10cSrcweir 	"if",
98cdf0e10cSrcweir 	"imp",
99cdf0e10cSrcweir 	"implements",
100cdf0e10cSrcweir 	"in",
101cdf0e10cSrcweir 	"input",
102cdf0e10cSrcweir 	"integer",
103cdf0e10cSrcweir 	"is",
104cdf0e10cSrcweir 	"let",
105cdf0e10cSrcweir 	"lib",
106cdf0e10cSrcweir 	"like",
107cdf0e10cSrcweir 	"line",
108cdf0e10cSrcweir 	"line input",
109cdf0e10cSrcweir 	"local",
110cdf0e10cSrcweir 	"lock",
111cdf0e10cSrcweir 	"long",
112cdf0e10cSrcweir 	"loop",
113cdf0e10cSrcweir 	"lprint",
114cdf0e10cSrcweir 	"lset",
115cdf0e10cSrcweir 	"mod",
116cdf0e10cSrcweir 	"name",
117cdf0e10cSrcweir 	"new",
118cdf0e10cSrcweir 	"next",
119cdf0e10cSrcweir 	"not",
120cdf0e10cSrcweir 	"object",
121cdf0e10cSrcweir 	"on",
122cdf0e10cSrcweir 	"open",
123cdf0e10cSrcweir 	"option",
124cdf0e10cSrcweir 	"optional",
125cdf0e10cSrcweir 	"or",
126cdf0e10cSrcweir 	"output",
127cdf0e10cSrcweir 	"preserve",
128cdf0e10cSrcweir 	"print",
129cdf0e10cSrcweir 	"private",
130cdf0e10cSrcweir 	"property",
131cdf0e10cSrcweir 	"public",
132cdf0e10cSrcweir 	"random",
133cdf0e10cSrcweir 	"read",
134cdf0e10cSrcweir 	"redim",
135cdf0e10cSrcweir 	"rem",
136cdf0e10cSrcweir 	"resume",
137cdf0e10cSrcweir 	"return",
138cdf0e10cSrcweir 	"rset",
139cdf0e10cSrcweir 	"select",
140cdf0e10cSrcweir 	"set",
141cdf0e10cSrcweir 	"shared",
142cdf0e10cSrcweir 	"single",
143cdf0e10cSrcweir 	"static",
144cdf0e10cSrcweir 	"step",
145cdf0e10cSrcweir 	"stop",
146cdf0e10cSrcweir 	"string",
147cdf0e10cSrcweir 	"sub",
148cdf0e10cSrcweir 	"system",
149cdf0e10cSrcweir 	"text",
150cdf0e10cSrcweir 	"then",
151cdf0e10cSrcweir 	"to",
152cdf0e10cSrcweir 	"type",
153cdf0e10cSrcweir 	"typeof",
154cdf0e10cSrcweir 	"until",
155cdf0e10cSrcweir 	"variant",
156cdf0e10cSrcweir 	"wend",
157cdf0e10cSrcweir 	"while",
158cdf0e10cSrcweir 	"with",
159cdf0e10cSrcweir 	"write",
160cdf0e10cSrcweir 	"xor"
161cdf0e10cSrcweir };
162cdf0e10cSrcweir 
163cdf0e10cSrcweir 
164cdf0e10cSrcweir static const char* strListSqlKeyWords[] = {
165cdf0e10cSrcweir 	"all",
166cdf0e10cSrcweir 	"and",
167cdf0e10cSrcweir 	"any",
168cdf0e10cSrcweir 	"as",
169cdf0e10cSrcweir 	"asc",
170cdf0e10cSrcweir 	"avg",
171cdf0e10cSrcweir 	"between",
172cdf0e10cSrcweir 	"by",
173cdf0e10cSrcweir 	"cast",
174cdf0e10cSrcweir 	"corresponding",
175cdf0e10cSrcweir 	"count",
176cdf0e10cSrcweir 	"create",
177cdf0e10cSrcweir 	"cross",
178cdf0e10cSrcweir 	"delete",
179cdf0e10cSrcweir 	"desc",
180cdf0e10cSrcweir 	"distinct",
181cdf0e10cSrcweir 	"drop",
182cdf0e10cSrcweir 	"escape",
183cdf0e10cSrcweir 	"except",
184cdf0e10cSrcweir 	"exists",
185cdf0e10cSrcweir 	"false",
186cdf0e10cSrcweir 	"from",
187cdf0e10cSrcweir 	"full",
188cdf0e10cSrcweir 	"global",
189cdf0e10cSrcweir 	"group",
190cdf0e10cSrcweir 	"having",
191cdf0e10cSrcweir 	"in",
192cdf0e10cSrcweir 	"inner",
193cdf0e10cSrcweir 	"insert",
194cdf0e10cSrcweir 	"intersect",
195cdf0e10cSrcweir 	"into",
196cdf0e10cSrcweir 	"is",
197cdf0e10cSrcweir 	"join",
198cdf0e10cSrcweir 	"left",
199cdf0e10cSrcweir 	"like",
200cdf0e10cSrcweir 	"local",
201cdf0e10cSrcweir 	"match",
202cdf0e10cSrcweir 	"max",
203cdf0e10cSrcweir 	"min",
204cdf0e10cSrcweir 	"natural",
205cdf0e10cSrcweir 	"not",
206cdf0e10cSrcweir 	"null",
207cdf0e10cSrcweir 	"on",
208cdf0e10cSrcweir 	"or",
209cdf0e10cSrcweir 	"order",
210cdf0e10cSrcweir 	"outer",
211cdf0e10cSrcweir 	"right",
212cdf0e10cSrcweir 	"select",
213cdf0e10cSrcweir 	"set",
214cdf0e10cSrcweir 	"some",
215cdf0e10cSrcweir 	"sum",
216cdf0e10cSrcweir 	"table",
217cdf0e10cSrcweir 	"temporary",
218cdf0e10cSrcweir 	"true",
219cdf0e10cSrcweir 	"union",
220cdf0e10cSrcweir 	"unique",
221cdf0e10cSrcweir 	"unknown",
222cdf0e10cSrcweir 	"update",
223cdf0e10cSrcweir 	"using",
224cdf0e10cSrcweir 	"values",
225cdf0e10cSrcweir 	"where"
226cdf0e10cSrcweir };
227cdf0e10cSrcweir 
228cdf0e10cSrcweir 
compare_strings(const void * arg1,const void * arg2)229cdf0e10cSrcweir extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
230cdf0e10cSrcweir {
231cdf0e10cSrcweir 	return strcmp( (char *)arg1, *(char **)arg2 );
232cdf0e10cSrcweir }
233cdf0e10cSrcweir 
234cdf0e10cSrcweir 
235cdf0e10cSrcweir class LetterTable
236cdf0e10cSrcweir {
237cdf0e10cSrcweir 	bool		IsLetterTab[256];
238cdf0e10cSrcweir 
239cdf0e10cSrcweir public:
240cdf0e10cSrcweir 	LetterTable( void );
241cdf0e10cSrcweir 
isLetter(sal_Unicode c)242cdf0e10cSrcweir 	inline bool isLetter( sal_Unicode c )
243cdf0e10cSrcweir 	{
244cdf0e10cSrcweir 		bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
245cdf0e10cSrcweir 		return bRet;
246cdf0e10cSrcweir 	}
247cdf0e10cSrcweir 	bool isLetterUnicode( sal_Unicode c );
248cdf0e10cSrcweir };
249cdf0e10cSrcweir 
250cdf0e10cSrcweir class BasicSimpleCharClass
251cdf0e10cSrcweir {
252cdf0e10cSrcweir 	static LetterTable aLetterTable;
253cdf0e10cSrcweir 
254cdf0e10cSrcweir public:
isAlpha(sal_Unicode c,bool bCompatible)255cdf0e10cSrcweir 	static sal_Bool isAlpha( sal_Unicode c, bool bCompatible )
256cdf0e10cSrcweir 	{
257cdf0e10cSrcweir 		sal_Bool bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
258cdf0e10cSrcweir 					|| (bCompatible && aLetterTable.isLetter( c ));
259cdf0e10cSrcweir 		return bRet;
260cdf0e10cSrcweir 	}
261cdf0e10cSrcweir 
isDigit(sal_Unicode c)262cdf0e10cSrcweir 	static sal_Bool isDigit( sal_Unicode c )
263cdf0e10cSrcweir 	{
264cdf0e10cSrcweir 		sal_Bool bRet = (c >= '0' && c <= '9');
265cdf0e10cSrcweir 		return bRet;
266cdf0e10cSrcweir 	}
267cdf0e10cSrcweir 
isAlphaNumeric(sal_Unicode c,bool bCompatible)268cdf0e10cSrcweir 	static sal_Bool isAlphaNumeric( sal_Unicode c, bool bCompatible )
269cdf0e10cSrcweir 	{
270cdf0e10cSrcweir 		sal_Bool bRet = isDigit( c ) || isAlpha( c, bCompatible );
271cdf0e10cSrcweir 		return bRet;
272cdf0e10cSrcweir 	}
273cdf0e10cSrcweir };
274cdf0e10cSrcweir 
275cdf0e10cSrcweir LetterTable BasicSimpleCharClass::aLetterTable;
276cdf0e10cSrcweir 
LetterTable(void)277cdf0e10cSrcweir LetterTable::LetterTable( void )
278cdf0e10cSrcweir {
279cdf0e10cSrcweir 	for( int i = 0 ; i < 256 ; ++i )
280cdf0e10cSrcweir 		IsLetterTab[i] = false;
281cdf0e10cSrcweir 
282cdf0e10cSrcweir 	IsLetterTab[0xC0] = true;	// ?, CAPITAL LETTER A WITH GRAVE ACCENT
283cdf0e10cSrcweir 	IsLetterTab[0xC1] = true;	// ?, CAPITAL LETTER A WITH ACUTE ACCENT
284cdf0e10cSrcweir 	IsLetterTab[0xC2] = true;	// ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
285cdf0e10cSrcweir 	IsLetterTab[0xC3] = true;	// ?, CAPITAL LETTER A WITH TILDE
286cdf0e10cSrcweir 	IsLetterTab[0xC4] = true;	// ?, CAPITAL LETTER A WITH DIAERESIS
287cdf0e10cSrcweir 	IsLetterTab[0xC5] = true;	// ?, CAPITAL LETTER A WITH RING ABOVE
288cdf0e10cSrcweir 	IsLetterTab[0xC6] = true;	// ?, CAPITAL LIGATURE AE
289cdf0e10cSrcweir 	IsLetterTab[0xC7] = true;	// ?, CAPITAL LETTER C WITH CEDILLA
290cdf0e10cSrcweir 	IsLetterTab[0xC8] = true;	// ?, CAPITAL LETTER E WITH GRAVE ACCENT
291cdf0e10cSrcweir 	IsLetterTab[0xC9] = true;	// ?, CAPITAL LETTER E WITH ACUTE ACCENT
292cdf0e10cSrcweir 	IsLetterTab[0xCA] = true;	// ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
293cdf0e10cSrcweir 	IsLetterTab[0xCB] = true;	// ?, CAPITAL LETTER E WITH DIAERESIS
294cdf0e10cSrcweir 	IsLetterTab[0xCC] = true;	// ?, CAPITAL LETTER I WITH GRAVE ACCENT
295cdf0e10cSrcweir 	IsLetterTab[0xCD] = true;	// ?, CAPITAL LETTER I WITH ACUTE ACCENT
296cdf0e10cSrcweir 	IsLetterTab[0xCE] = true;	// ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
297cdf0e10cSrcweir 	IsLetterTab[0xCF] = true;	// ?, CAPITAL LETTER I WITH DIAERESIS
298cdf0e10cSrcweir 	IsLetterTab[0xD0] = true;	// ?, CAPITAL LETTER ETH
299cdf0e10cSrcweir 	IsLetterTab[0xD1] = true;	// ?, CAPITAL LETTER N WITH TILDE
300cdf0e10cSrcweir 	IsLetterTab[0xD2] = true;	// ?, CAPITAL LETTER O WITH GRAVE ACCENT
301cdf0e10cSrcweir 	IsLetterTab[0xD3] = true;	// ?, CAPITAL LETTER O WITH ACUTE ACCENT
302cdf0e10cSrcweir 	IsLetterTab[0xD4] = true;	// ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
303cdf0e10cSrcweir 	IsLetterTab[0xD5] = true;	// ?, CAPITAL LETTER O WITH TILDE
304cdf0e10cSrcweir 	IsLetterTab[0xD6] = true;	// ?, CAPITAL LETTER O WITH DIAERESIS
305cdf0e10cSrcweir 	IsLetterTab[0xD8] = true;	// ?, CAPITAL LETTER O WITH STROKE
306cdf0e10cSrcweir 	IsLetterTab[0xD9] = true;	// ?, CAPITAL LETTER U WITH GRAVE ACCENT
307cdf0e10cSrcweir 	IsLetterTab[0xDA] = true;	// ?, CAPITAL LETTER U WITH ACUTE ACCENT
308cdf0e10cSrcweir 	IsLetterTab[0xDB] = true;	// ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
309cdf0e10cSrcweir 	IsLetterTab[0xDC] = true;	// ?, CAPITAL LETTER U WITH DIAERESIS
310cdf0e10cSrcweir 	IsLetterTab[0xDD] = true;	// ?, CAPITAL LETTER Y WITH ACUTE ACCENT
311cdf0e10cSrcweir 	IsLetterTab[0xDE] = true;	// ?, CAPITAL LETTER THORN
312cdf0e10cSrcweir 	IsLetterTab[0xDF] = true;	// ?, SMALL LETTER SHARP S
313cdf0e10cSrcweir 	IsLetterTab[0xE0] = true;	// ?, SMALL LETTER A WITH GRAVE ACCENT
314cdf0e10cSrcweir 	IsLetterTab[0xE1] = true;	// ?, SMALL LETTER A WITH ACUTE ACCENT
315cdf0e10cSrcweir 	IsLetterTab[0xE2] = true;	// ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
316cdf0e10cSrcweir 	IsLetterTab[0xE3] = true;	// ?, SMALL LETTER A WITH TILDE
317cdf0e10cSrcweir 	IsLetterTab[0xE4] = true;	// ?, SMALL LETTER A WITH DIAERESIS
318cdf0e10cSrcweir 	IsLetterTab[0xE5] = true;	// ?, SMALL LETTER A WITH RING ABOVE
319cdf0e10cSrcweir 	IsLetterTab[0xE6] = true;	// ?, SMALL LIGATURE AE
320cdf0e10cSrcweir 	IsLetterTab[0xE7] = true;	// ?, SMALL LETTER C WITH CEDILLA
321cdf0e10cSrcweir 	IsLetterTab[0xE8] = true;	// ?, SMALL LETTER E WITH GRAVE ACCENT
322cdf0e10cSrcweir 	IsLetterTab[0xE9] = true;	// ?, SMALL LETTER E WITH ACUTE ACCENT
323cdf0e10cSrcweir 	IsLetterTab[0xEA] = true;	// ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
324cdf0e10cSrcweir 	IsLetterTab[0xEB] = true;	// ?, SMALL LETTER E WITH DIAERESIS
325cdf0e10cSrcweir 	IsLetterTab[0xEC] = true;	// ?, SMALL LETTER I WITH GRAVE ACCENT
326cdf0e10cSrcweir 	IsLetterTab[0xED] = true;	// ?, SMALL LETTER I WITH ACUTE ACCENT
327cdf0e10cSrcweir 	IsLetterTab[0xEE] = true;	// ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
328cdf0e10cSrcweir 	IsLetterTab[0xEF] = true;	// ?, SMALL LETTER I WITH DIAERESIS
329cdf0e10cSrcweir 	IsLetterTab[0xF0] = true;	// ?, SMALL LETTER ETH
330cdf0e10cSrcweir 	IsLetterTab[0xF1] = true;	// ?, SMALL LETTER N WITH TILDE
331cdf0e10cSrcweir 	IsLetterTab[0xF2] = true;	// ?, SMALL LETTER O WITH GRAVE ACCENT
332cdf0e10cSrcweir 	IsLetterTab[0xF3] = true;	// ?, SMALL LETTER O WITH ACUTE ACCENT
333cdf0e10cSrcweir 	IsLetterTab[0xF4] = true;	// ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
334cdf0e10cSrcweir 	IsLetterTab[0xF5] = true;	// ?, SMALL LETTER O WITH TILDE
335cdf0e10cSrcweir 	IsLetterTab[0xF6] = true;	// ?, SMALL LETTER O WITH DIAERESIS
336cdf0e10cSrcweir 	IsLetterTab[0xF8] = true;	// ?, SMALL LETTER O WITH OBLIQUE BAR
337cdf0e10cSrcweir 	IsLetterTab[0xF9] = true;	// ?, SMALL LETTER U WITH GRAVE ACCENT
338cdf0e10cSrcweir 	IsLetterTab[0xFA] = true;	// ?, SMALL LETTER U WITH ACUTE ACCENT
339cdf0e10cSrcweir 	IsLetterTab[0xFB] = true;	// ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
340cdf0e10cSrcweir 	IsLetterTab[0xFC] = true;	// ?, SMALL LETTER U WITH DIAERESIS
341cdf0e10cSrcweir 	IsLetterTab[0xFD] = true;	// ?, SMALL LETTER Y WITH ACUTE ACCENT
342cdf0e10cSrcweir 	IsLetterTab[0xFE] = true;	// ?, SMALL LETTER THORN
343cdf0e10cSrcweir 	IsLetterTab[0xFF] = true;	// � , SMALL LETTER Y WITH DIAERESIS
344cdf0e10cSrcweir }
345cdf0e10cSrcweir 
isLetterUnicode(sal_Unicode c)346cdf0e10cSrcweir bool LetterTable::isLetterUnicode( sal_Unicode c )
347cdf0e10cSrcweir {
348cdf0e10cSrcweir 	static CharClass* pCharClass = NULL;
349cdf0e10cSrcweir 	if( pCharClass == NULL )
350cdf0e10cSrcweir 		pCharClass = new CharClass( Application::GetSettings().GetLocale() );
351cdf0e10cSrcweir 	String aStr( c );
352cdf0e10cSrcweir 	bool bRet = pCharClass->isLetter( aStr, 0 );
353cdf0e10cSrcweir 	return bRet;
354cdf0e10cSrcweir }
355cdf0e10cSrcweir 
356cdf0e10cSrcweir // Hilfsfunktion: Zeichen-Flag Testen
testCharFlags(sal_Unicode c,sal_uInt16 nTestFlags)357cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
358cdf0e10cSrcweir {
359cdf0e10cSrcweir 	bool bRet = false;
360cdf0e10cSrcweir 	if( c != 0 && c <= 255 )
361cdf0e10cSrcweir 	{
362cdf0e10cSrcweir 		bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
363cdf0e10cSrcweir 	}
364cdf0e10cSrcweir 	else if( c > 255 )
365cdf0e10cSrcweir 	{
366cdf0e10cSrcweir 		bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
367cdf0e10cSrcweir 			? BasicSimpleCharClass::isAlpha( c, true ) : false;
368cdf0e10cSrcweir 	}
369cdf0e10cSrcweir 	return bRet;
370cdf0e10cSrcweir }
371cdf0e10cSrcweir 
setKeyWords(const char ** ppKeyWords,sal_uInt16 nCount)372cdf0e10cSrcweir void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
373cdf0e10cSrcweir {
374cdf0e10cSrcweir 	ppListKeyWords = ppKeyWords;
375cdf0e10cSrcweir 	nKeyWordCount = nCount;
376cdf0e10cSrcweir }
377cdf0e10cSrcweir 
378cdf0e10cSrcweir // Neues Token holen
getNextToken(TokenTypes & reType,const sal_Unicode * & rpStartPos,const sal_Unicode * & rpEndPos)379cdf0e10cSrcweir sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
380cdf0e10cSrcweir 	/*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
381cdf0e10cSrcweir {
382cdf0e10cSrcweir 	reType = TT_UNKNOWN;
383cdf0e10cSrcweir 
384cdf0e10cSrcweir 	// Position merken
385cdf0e10cSrcweir 	rpStartPos = mpActualPos;
386cdf0e10cSrcweir 
387cdf0e10cSrcweir 	// Zeichen untersuchen
388cdf0e10cSrcweir 	sal_Unicode c = peekChar();
389cdf0e10cSrcweir 	if( c == CHAR_EOF )
390cdf0e10cSrcweir 		return sal_False;
391cdf0e10cSrcweir 
392cdf0e10cSrcweir 	// Zeichen lesen
393cdf0e10cSrcweir 	getChar();
394cdf0e10cSrcweir 
395cdf0e10cSrcweir 	//*** Alle Moeglichkeiten durchgehen ***
396cdf0e10cSrcweir 	// Space?
397cdf0e10cSrcweir 	if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
398cdf0e10cSrcweir 	{
399cdf0e10cSrcweir 		while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
400cdf0e10cSrcweir 			getChar();
401cdf0e10cSrcweir 
402cdf0e10cSrcweir 		reType = TT_WHITESPACE;
403cdf0e10cSrcweir 	}
404cdf0e10cSrcweir 
405cdf0e10cSrcweir 	// Identifier?
406cdf0e10cSrcweir 	else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
407cdf0e10cSrcweir 	{
408cdf0e10cSrcweir 		sal_Bool bIdentifierChar;
409cdf0e10cSrcweir 		do
410cdf0e10cSrcweir 		{
411cdf0e10cSrcweir 			// Naechstes Zeichen holen
412cdf0e10cSrcweir 			c = peekChar();
413cdf0e10cSrcweir 			bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
414cdf0e10cSrcweir 			if( bIdentifierChar )
415cdf0e10cSrcweir 				getChar();
416cdf0e10cSrcweir 		}
417cdf0e10cSrcweir 		while( bIdentifierChar );
418cdf0e10cSrcweir 
419cdf0e10cSrcweir 		reType = TT_IDENTIFIER;
420cdf0e10cSrcweir 
421cdf0e10cSrcweir 		// Schluesselwort-Tabelle
422cdf0e10cSrcweir 		if (ppListKeyWords != NULL)
423cdf0e10cSrcweir 		{
424cdf0e10cSrcweir 			int nCount = mpActualPos - rpStartPos;
425cdf0e10cSrcweir 
426cdf0e10cSrcweir 			// No keyword if string contains char > 255
427cdf0e10cSrcweir 			bool bCanBeKeyword = true;
428cdf0e10cSrcweir 			for( int i = 0 ; i < nCount ; i++ )
429cdf0e10cSrcweir 			{
430cdf0e10cSrcweir 				if( rpStartPos[i] > 255 )
431cdf0e10cSrcweir 				{
432cdf0e10cSrcweir 					bCanBeKeyword = false;
433cdf0e10cSrcweir 					break;
434cdf0e10cSrcweir 				}
435cdf0e10cSrcweir 			}
436cdf0e10cSrcweir 
437cdf0e10cSrcweir 			if( bCanBeKeyword )
438cdf0e10cSrcweir 			{
439cdf0e10cSrcweir 				String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
440cdf0e10cSrcweir 				ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
441cdf0e10cSrcweir 				aByteStr.ToLowerAscii();
442cdf0e10cSrcweir 				if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
443cdf0e10cSrcweir 																		compare_strings ) )
444cdf0e10cSrcweir 				{
445cdf0e10cSrcweir 					reType = TT_KEYWORDS;
446cdf0e10cSrcweir 
447cdf0e10cSrcweir 					if ( aByteStr.Equals( "rem" ) )
448cdf0e10cSrcweir 					{
449cdf0e10cSrcweir 						// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
450cdf0e10cSrcweir 						sal_Unicode cPeek = peekChar();
451cdf0e10cSrcweir 						while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
452cdf0e10cSrcweir 						{
453cdf0e10cSrcweir 							c = getChar();
454cdf0e10cSrcweir 							cPeek = peekChar();
455cdf0e10cSrcweir 						}
456cdf0e10cSrcweir 
457cdf0e10cSrcweir 						reType = TT_COMMENT;
458cdf0e10cSrcweir 					}
459cdf0e10cSrcweir 				}
460cdf0e10cSrcweir 			}
461cdf0e10cSrcweir 		}
462cdf0e10cSrcweir 	}
463cdf0e10cSrcweir 
464cdf0e10cSrcweir 	// Operator?
465cdf0e10cSrcweir 	// only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
466cdf0e10cSrcweir 	else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
467cdf0e10cSrcweir 	{
468cdf0e10cSrcweir 		// paramters for SQL view
469cdf0e10cSrcweir 		if ( (c==':') || (c=='?'))
470cdf0e10cSrcweir 		{
471cdf0e10cSrcweir 			if (c!='?')
472cdf0e10cSrcweir 			{
473cdf0e10cSrcweir 				sal_Bool bIdentifierChar;
474cdf0e10cSrcweir 				do
475cdf0e10cSrcweir 				{
476cdf0e10cSrcweir 					// Naechstes Zeichen holen
477cdf0e10cSrcweir 					c = peekChar();
478cdf0e10cSrcweir 					bIdentifierChar =  BasicSimpleCharClass::isAlpha( c, true );
479cdf0e10cSrcweir 					if( bIdentifierChar )
480cdf0e10cSrcweir 						getChar();
481cdf0e10cSrcweir 				}
482cdf0e10cSrcweir 				while( bIdentifierChar );
483cdf0e10cSrcweir 			}
484cdf0e10cSrcweir 			reType = TT_PARAMETER;
485cdf0e10cSrcweir 		}
486*a9ab3c7bSHerbert Dürr 		else if( c=='-' )
487cdf0e10cSrcweir 		{
488cdf0e10cSrcweir 			sal_Unicode cPeekNext = peekChar();
489cdf0e10cSrcweir 			if (cPeekNext=='-')
490cdf0e10cSrcweir 			{
491cdf0e10cSrcweir 				// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
492cdf0e10cSrcweir 				while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
493cdf0e10cSrcweir 				{
494cdf0e10cSrcweir 					getChar();
495cdf0e10cSrcweir 					cPeekNext = peekChar();
496cdf0e10cSrcweir 				}
497cdf0e10cSrcweir 				reType = TT_COMMENT;
498cdf0e10cSrcweir 			}
499cdf0e10cSrcweir 		}
500cdf0e10cSrcweir        else if (c=='/')
501cdf0e10cSrcweir        {
502cdf0e10cSrcweir            sal_Unicode cPeekNext = peekChar();
503cdf0e10cSrcweir            if (cPeekNext=='/')
504cdf0e10cSrcweir            {
505cdf0e10cSrcweir                // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
506cdf0e10cSrcweir                while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
507cdf0e10cSrcweir                {
508cdf0e10cSrcweir                    getChar();
509cdf0e10cSrcweir                    cPeekNext = peekChar();
510cdf0e10cSrcweir                }
511cdf0e10cSrcweir                reType = TT_COMMENT;
512cdf0e10cSrcweir            }
513cdf0e10cSrcweir        }
514cdf0e10cSrcweir 		else
515cdf0e10cSrcweir 		{
516cdf0e10cSrcweir 			// Kommentar ?
517cdf0e10cSrcweir 			if ( c == '\'' )
518cdf0e10cSrcweir 			{
519cdf0e10cSrcweir 				c = getChar();	// '/' entfernen
520cdf0e10cSrcweir 
521cdf0e10cSrcweir 				// Alle Zeichen bis Zeilen-Ende oder EOF entfernen
522cdf0e10cSrcweir 				sal_Unicode cPeek = c;
523cdf0e10cSrcweir 				while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
524cdf0e10cSrcweir 				{
525cdf0e10cSrcweir 					getChar();
526cdf0e10cSrcweir 					cPeek = peekChar();
527cdf0e10cSrcweir 				}
528cdf0e10cSrcweir 
529cdf0e10cSrcweir 				reType = TT_COMMENT;
530cdf0e10cSrcweir 			}
531cdf0e10cSrcweir 
532cdf0e10cSrcweir 			// Echter Operator, kann hier einfach behandelt werden,
533cdf0e10cSrcweir 			// da nicht der wirkliche Operator, wie z.B. += interessiert,
534cdf0e10cSrcweir 			// sondern nur die Tatsache, dass es sich um einen handelt.
535cdf0e10cSrcweir 			if( reType != TT_COMMENT )
536cdf0e10cSrcweir 			{
537cdf0e10cSrcweir 				reType = TT_OPERATOR;
538cdf0e10cSrcweir 			}
539cdf0e10cSrcweir 
540cdf0e10cSrcweir 		}
541cdf0e10cSrcweir 	}
542cdf0e10cSrcweir 
543cdf0e10cSrcweir 	// Objekt-Trenner? Muss vor Number abgehandelt werden
544cdf0e10cSrcweir 	else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
545cdf0e10cSrcweir 	{
546cdf0e10cSrcweir 		reType = TT_OPERATOR;
547cdf0e10cSrcweir 	}
548cdf0e10cSrcweir 
549cdf0e10cSrcweir 	// Zahl?
550cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
551cdf0e10cSrcweir 	{
552cdf0e10cSrcweir 		reType = TT_NUMBER;
553cdf0e10cSrcweir 
554cdf0e10cSrcweir 		// Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
555cdf0e10cSrcweir 		int nRadix = 10;
556cdf0e10cSrcweir 
557cdf0e10cSrcweir 		// Ist es eine Hex- oder Oct-Zahl?
558cdf0e10cSrcweir 		if( c == '&' )
559cdf0e10cSrcweir 		{
560cdf0e10cSrcweir 			// Octal?
561cdf0e10cSrcweir 			if( peekChar() == 'o' || peekChar() == 'O' )
562cdf0e10cSrcweir 			{
563cdf0e10cSrcweir 				// o entfernen
564cdf0e10cSrcweir 				getChar();
565cdf0e10cSrcweir 				nRadix = 8; 	// Octal-Basis
566cdf0e10cSrcweir 
567cdf0e10cSrcweir 				// Alle Ziffern einlesen
568cdf0e10cSrcweir 				while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
569cdf0e10cSrcweir 					c = getChar();
570cdf0e10cSrcweir 			}
571cdf0e10cSrcweir 			// Hex?
572cdf0e10cSrcweir 			else if( peekChar() == 'h' || peekChar() == 'H' )
573cdf0e10cSrcweir 			{
574cdf0e10cSrcweir 				// x entfernen
575cdf0e10cSrcweir 				getChar();
576cdf0e10cSrcweir 				nRadix = 16;	 // Hex-Basis
577cdf0e10cSrcweir 
578cdf0e10cSrcweir 				// Alle Ziffern einlesen und puffern
579cdf0e10cSrcweir 				while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
580cdf0e10cSrcweir 					c = getChar();
581cdf0e10cSrcweir 			}
582cdf0e10cSrcweir 			else
583cdf0e10cSrcweir 			{
584cdf0e10cSrcweir 				reType = TT_OPERATOR;
585cdf0e10cSrcweir 			}
586cdf0e10cSrcweir 		}
587cdf0e10cSrcweir 
588cdf0e10cSrcweir 		// Wenn nicht Oct oder Hex als double ansehen
589cdf0e10cSrcweir 		if( reType == TT_NUMBER && nRadix == 10 )
590cdf0e10cSrcweir 		{
591cdf0e10cSrcweir 			// Flag, ob das letzte Zeichen ein Exponent war
592cdf0e10cSrcweir 			sal_Bool bAfterExpChar = sal_False;
593cdf0e10cSrcweir 
594cdf0e10cSrcweir 			// Alle Ziffern einlesen
595cdf0e10cSrcweir 			while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
596cdf0e10cSrcweir 					(bAfterExpChar && peekChar() == '+' ) ||
597cdf0e10cSrcweir 					(bAfterExpChar && peekChar() == '-' ) )
598cdf0e10cSrcweir 					// Nach Exponent auch +/- OK
599cdf0e10cSrcweir 			{
600cdf0e10cSrcweir 				c = getChar();					// Zeichen lesen
601cdf0e10cSrcweir 				bAfterExpChar = ( c == 'e' || c == 'E' );
602cdf0e10cSrcweir 			}
603cdf0e10cSrcweir 		}
604cdf0e10cSrcweir 
605cdf0e10cSrcweir 		// reType = TT_NUMBER;
606cdf0e10cSrcweir 	}
607cdf0e10cSrcweir 
608cdf0e10cSrcweir 	// String?
609cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
610cdf0e10cSrcweir 	{
611cdf0e10cSrcweir 		// Merken, welches Zeichen den String eroeffnet hat
612cdf0e10cSrcweir 		sal_Unicode cEndString = c;
613cdf0e10cSrcweir 		if( c == '[' )
614cdf0e10cSrcweir 			cEndString = ']';
615cdf0e10cSrcweir 
616cdf0e10cSrcweir 		// Alle Ziffern einlesen und puffern
617cdf0e10cSrcweir 		while( peekChar() != cEndString )
618cdf0e10cSrcweir 		{
619cdf0e10cSrcweir 			// #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
620cdf0e10cSrcweir 			if( peekChar() == CHAR_EOF )
621cdf0e10cSrcweir 			{
622cdf0e10cSrcweir 				// ERROR: unterminated string literal
623cdf0e10cSrcweir 				reType = TT_ERROR;
624cdf0e10cSrcweir 				break;
625cdf0e10cSrcweir 			}
626cdf0e10cSrcweir 			c = getChar();
627cdf0e10cSrcweir 			if( testCharFlags( c, CHAR_EOL ) == sal_True )
628cdf0e10cSrcweir 			{
629cdf0e10cSrcweir 				// ERROR: unterminated string literal
630cdf0e10cSrcweir 				reType = TT_ERROR;
631cdf0e10cSrcweir 				break;
632cdf0e10cSrcweir 			}
633cdf0e10cSrcweir 		}
634cdf0e10cSrcweir 
635cdf0e10cSrcweir 		//	Zeichen lesen
636cdf0e10cSrcweir 		if( reType != TT_ERROR )
637cdf0e10cSrcweir 		{
638cdf0e10cSrcweir 			getChar();
639cdf0e10cSrcweir 			if( cEndString == ']' )
640cdf0e10cSrcweir 				reType = TT_IDENTIFIER;
641cdf0e10cSrcweir 			else
642cdf0e10cSrcweir 				reType = TT_STRING;
643cdf0e10cSrcweir 		}
644cdf0e10cSrcweir 	}
645cdf0e10cSrcweir 
646cdf0e10cSrcweir 	// Zeilenende?
647cdf0e10cSrcweir 	else if( testCharFlags( c, CHAR_EOL ) == sal_True )
648cdf0e10cSrcweir 	{
649cdf0e10cSrcweir 		// Falls ein weiteres anderes EOL-Char folgt, weg damit
650cdf0e10cSrcweir 		sal_Unicode cNext = peekChar();
651cdf0e10cSrcweir 		if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
652cdf0e10cSrcweir 			getChar();
653cdf0e10cSrcweir 
654cdf0e10cSrcweir 		// Positions-Daten auf Zeilen-Beginn setzen
655cdf0e10cSrcweir 		nCol = 0;
656cdf0e10cSrcweir 		nLine++;
657cdf0e10cSrcweir 
658cdf0e10cSrcweir 		reType = TT_EOL;
659cdf0e10cSrcweir 	}
660cdf0e10cSrcweir 
661cdf0e10cSrcweir 	// Alles andere bleibt TT_UNKNOWN
662cdf0e10cSrcweir 
663cdf0e10cSrcweir 
664cdf0e10cSrcweir 	// End-Position eintragen
665cdf0e10cSrcweir 	rpEndPos = mpActualPos;
666cdf0e10cSrcweir 	return sal_True;
667cdf0e10cSrcweir }
668cdf0e10cSrcweir 
getTokStr(const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)669cdf0e10cSrcweir String SimpleTokenizer_Impl::getTokStr
670cdf0e10cSrcweir 	( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
671cdf0e10cSrcweir {
672cdf0e10cSrcweir 	return String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
673cdf0e10cSrcweir }
674cdf0e10cSrcweir 
675cdf0e10cSrcweir #ifdef DBG_UTIL
676cdf0e10cSrcweir // TEST: Token ausgeben
getFullTokenStr(TokenTypes eType,const sal_Unicode * pStartPos,const sal_Unicode * pEndPos)677cdf0e10cSrcweir String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
678cdf0e10cSrcweir 	/*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
679cdf0e10cSrcweir {
680cdf0e10cSrcweir 	String aOut;
681cdf0e10cSrcweir 	switch( eType )
682cdf0e10cSrcweir 	{
683cdf0e10cSrcweir 		case TT_UNKNOWN:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
684cdf0e10cSrcweir 		case TT_IDENTIFIER:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
685cdf0e10cSrcweir 		case TT_WHITESPACE:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
686cdf0e10cSrcweir 		case TT_NUMBER:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
687cdf0e10cSrcweir 		case TT_STRING:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
688cdf0e10cSrcweir 		case TT_EOL:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
689cdf0e10cSrcweir 		case TT_COMMENT:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
690cdf0e10cSrcweir 		case TT_ERROR:		aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
691cdf0e10cSrcweir 		case TT_OPERATOR:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
692cdf0e10cSrcweir 		case TT_KEYWORDS:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
693cdf0e10cSrcweir 		case TT_PARAMETER:	aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
694cdf0e10cSrcweir 	}
695cdf0e10cSrcweir 	if( eType != TT_EOL )
696cdf0e10cSrcweir 	{
697cdf0e10cSrcweir 		aOut += String( pStartPos, (sal_uInt16)( pEndPos - pStartPos ) );
698cdf0e10cSrcweir 	}
699cdf0e10cSrcweir 	aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
700cdf0e10cSrcweir 	return aOut;
701cdf0e10cSrcweir }
702cdf0e10cSrcweir #endif
703cdf0e10cSrcweir 
SimpleTokenizer_Impl(HighlighterLanguage aLang)704cdf0e10cSrcweir SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
705cdf0e10cSrcweir {
706cdf0e10cSrcweir 	memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
707cdf0e10cSrcweir 
708cdf0e10cSrcweir 	// Zeichen-Tabelle fuellen
709cdf0e10cSrcweir 	sal_uInt16 i;
710cdf0e10cSrcweir 
711cdf0e10cSrcweir 	// Zulaessige Zeichen fuer Identifier
712cdf0e10cSrcweir 	sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
713cdf0e10cSrcweir 	for( i = 'a' ; i <= 'z' ; i++ )
714cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
715cdf0e10cSrcweir 	for( i = 'A' ; i <= 'Z' ; i++ )
716cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
717cdf0e10cSrcweir 	// '_' extra eintragen
718cdf0e10cSrcweir 	aCharTypeTab[(int)'_'] |= nHelpMask;
719cdf0e10cSrcweir 	// AB 23.6.97: '$' ist auch erlaubt
720cdf0e10cSrcweir 	aCharTypeTab[(int)'$'] |= nHelpMask;
721cdf0e10cSrcweir 
722cdf0e10cSrcweir 	// Ziffern (Identifier und Number ist moeglich)
723cdf0e10cSrcweir 	nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
724cdf0e10cSrcweir 						 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
725cdf0e10cSrcweir 	for( i = '0' ; i <= '9' ; i++ )
726cdf0e10cSrcweir 		aCharTypeTab[i] |= nHelpMask;
727cdf0e10cSrcweir 
728cdf0e10cSrcweir 	// e und E sowie . von Hand ergaenzen
729cdf0e10cSrcweir 	aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
730cdf0e10cSrcweir 	aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
731cdf0e10cSrcweir 	aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
732cdf0e10cSrcweir 	aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
733cdf0e10cSrcweir 
734cdf0e10cSrcweir 	// Hex-Ziffern
735cdf0e10cSrcweir 	for( i = 'a' ; i <= 'f' ; i++ )
736cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
737cdf0e10cSrcweir 	for( i = 'A' ; i <= 'F' ; i++ )
738cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
739cdf0e10cSrcweir 
740cdf0e10cSrcweir 	// Oct-Ziffern
741cdf0e10cSrcweir 	for( i = '0' ; i <= '7' ; i++ )
742cdf0e10cSrcweir 		aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
743cdf0e10cSrcweir 
744cdf0e10cSrcweir 	// String-Beginn/End-Zeichen
745cdf0e10cSrcweir 	aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
746cdf0e10cSrcweir 	aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
747cdf0e10cSrcweir 	aCharTypeTab[(int)'[']  |= CHAR_START_STRING;
748cdf0e10cSrcweir 	aCharTypeTab[(int)'`']  |= CHAR_START_STRING;
749cdf0e10cSrcweir 
750cdf0e10cSrcweir 	// Operator-Zeichen
751cdf0e10cSrcweir 	aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
752cdf0e10cSrcweir 	aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
753cdf0e10cSrcweir 	// aCharTypeTab[(int)'&'] |= CHAR_OPERATOR;		Removed because of #i14140
754cdf0e10cSrcweir 	aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
755cdf0e10cSrcweir 	aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
756cdf0e10cSrcweir 	aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
757cdf0e10cSrcweir 	aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
758cdf0e10cSrcweir 	aCharTypeTab[(int)','] |= CHAR_OPERATOR;
759cdf0e10cSrcweir 	aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
760cdf0e10cSrcweir 	aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
761cdf0e10cSrcweir 	aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
762cdf0e10cSrcweir 	aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
763cdf0e10cSrcweir 	aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
764cdf0e10cSrcweir 	aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
765cdf0e10cSrcweir 	aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
766cdf0e10cSrcweir 	aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
767cdf0e10cSrcweir 	aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
768cdf0e10cSrcweir 	aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
769cdf0e10cSrcweir 	aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
770cdf0e10cSrcweir 	aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
771cdf0e10cSrcweir 	// aCharTypeTab[(int)'['] |= CHAR_OPERATOR;		Removed because of #i17826
772cdf0e10cSrcweir 	aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
773cdf0e10cSrcweir 	aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
774cdf0e10cSrcweir 
775cdf0e10cSrcweir 	// Space
776cdf0e10cSrcweir 	aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
777cdf0e10cSrcweir 	aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
778cdf0e10cSrcweir 
779cdf0e10cSrcweir 	// Zeilen-Ende-Zeichen
780cdf0e10cSrcweir 	aCharTypeTab[(int)'\r'] |= CHAR_EOL;
781cdf0e10cSrcweir 	aCharTypeTab[(int)'\n'] |= CHAR_EOL;
782cdf0e10cSrcweir 
783cdf0e10cSrcweir 	ppListKeyWords = NULL;
784cdf0e10cSrcweir }
785cdf0e10cSrcweir 
~SimpleTokenizer_Impl(void)786cdf0e10cSrcweir SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
787cdf0e10cSrcweir {
788cdf0e10cSrcweir }
789cdf0e10cSrcweir 
getSimpleTokenizer(void)790cdf0e10cSrcweir SimpleTokenizer_Impl* getSimpleTokenizer( void )
791cdf0e10cSrcweir {
792cdf0e10cSrcweir 	static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
793cdf0e10cSrcweir 	if( !pSimpleTokenizer )
794cdf0e10cSrcweir 		pSimpleTokenizer = new SimpleTokenizer_Impl();
795cdf0e10cSrcweir 	return pSimpleTokenizer;
796cdf0e10cSrcweir }
797cdf0e10cSrcweir 
798cdf0e10cSrcweir // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
parseLine(sal_uInt32 nParseLine,const String * aSource)799cdf0e10cSrcweir sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
800cdf0e10cSrcweir {
801cdf0e10cSrcweir 	// Position auf den Anfang des Source-Strings setzen
802cdf0e10cSrcweir 	mpStringBegin = mpActualPos = aSource->GetBuffer();
803cdf0e10cSrcweir 
804cdf0e10cSrcweir 	// Zeile und Spalte initialisieren
805cdf0e10cSrcweir 	nLine = nParseLine;
806cdf0e10cSrcweir 	nCol = 0L;
807cdf0e10cSrcweir 
808cdf0e10cSrcweir 	// Variablen fuer die Out-Parameter
809cdf0e10cSrcweir 	TokenTypes eType;
810cdf0e10cSrcweir 	const sal_Unicode* pStartPos;
811cdf0e10cSrcweir 	const sal_Unicode* pEndPos;
812cdf0e10cSrcweir 
813cdf0e10cSrcweir 	// Schleife ueber alle Tokens
814cdf0e10cSrcweir 	sal_uInt16 nTokenCount = 0;
815cdf0e10cSrcweir 	while( getNextToken( eType, pStartPos, pEndPos ) )
816cdf0e10cSrcweir 		nTokenCount++;
817cdf0e10cSrcweir 
818cdf0e10cSrcweir 	return nTokenCount;
819cdf0e10cSrcweir }
820cdf0e10cSrcweir 
getHighlightPortions(sal_uInt32 nParseLine,const String & rLine,HighlightPortions & portions)821cdf0e10cSrcweir void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
822cdf0e10cSrcweir 													/*out*/HighlightPortions& portions  )
823cdf0e10cSrcweir {
824cdf0e10cSrcweir 	// Position auf den Anfang des Source-Strings setzen
825cdf0e10cSrcweir 	mpStringBegin = mpActualPos = rLine.GetBuffer();
826cdf0e10cSrcweir 
827cdf0e10cSrcweir 	// Zeile und Spalte initialisieren
828cdf0e10cSrcweir 	nLine = nParseLine;
829cdf0e10cSrcweir 	nCol = 0L;
830cdf0e10cSrcweir 
831cdf0e10cSrcweir 	// Variablen fuer die Out-Parameter
832cdf0e10cSrcweir 	TokenTypes eType;
833cdf0e10cSrcweir 	const sal_Unicode* pStartPos;
834cdf0e10cSrcweir 	const sal_Unicode* pEndPos;
835cdf0e10cSrcweir 
836cdf0e10cSrcweir 	// Schleife ueber alle Tokens
837cdf0e10cSrcweir 	while( getNextToken( eType, pStartPos, pEndPos ) )
838cdf0e10cSrcweir 	{
839cdf0e10cSrcweir 		HighlightPortion portion;
840cdf0e10cSrcweir 
841cdf0e10cSrcweir 		portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
842cdf0e10cSrcweir 		portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
843cdf0e10cSrcweir 		portion.tokenType = eType;
844cdf0e10cSrcweir 
845cdf0e10cSrcweir         portions.push_back(portion);
846cdf0e10cSrcweir 	}
847cdf0e10cSrcweir }
848cdf0e10cSrcweir 
849cdf0e10cSrcweir 
850cdf0e10cSrcweir //////////////////////////////////////////////////////////////////////////
851cdf0e10cSrcweir // Implementierung des SyntaxHighlighter
852cdf0e10cSrcweir 
SyntaxHighlighter()853cdf0e10cSrcweir SyntaxHighlighter::SyntaxHighlighter()
854cdf0e10cSrcweir {
855cdf0e10cSrcweir 	m_pSimpleTokenizer = 0;
856cdf0e10cSrcweir 	m_pKeyWords = NULL;
857cdf0e10cSrcweir 	m_nKeyWordCount = 0;
858cdf0e10cSrcweir }
859cdf0e10cSrcweir 
~SyntaxHighlighter()860cdf0e10cSrcweir SyntaxHighlighter::~SyntaxHighlighter()
861cdf0e10cSrcweir {
862cdf0e10cSrcweir 	delete m_pSimpleTokenizer;
863cdf0e10cSrcweir 	delete m_pKeyWords;
864cdf0e10cSrcweir }
865cdf0e10cSrcweir 
initialize(HighlighterLanguage eLanguage_)866cdf0e10cSrcweir void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
867cdf0e10cSrcweir {
868cdf0e10cSrcweir 	eLanguage = eLanguage_;
869cdf0e10cSrcweir 	delete m_pSimpleTokenizer;
870cdf0e10cSrcweir 	m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
871cdf0e10cSrcweir 
872cdf0e10cSrcweir 	switch (eLanguage)
873cdf0e10cSrcweir 	{
874cdf0e10cSrcweir 		case HIGHLIGHT_BASIC:
875cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
876cdf0e10cSrcweir 											sizeof( strListBasicKeyWords ) / sizeof( char* ));
877cdf0e10cSrcweir 			break;
878cdf0e10cSrcweir 		case HIGHLIGHT_SQL:
879cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
880cdf0e10cSrcweir 											sizeof( strListSqlKeyWords ) / sizeof( char* ));
881cdf0e10cSrcweir 			break;
882cdf0e10cSrcweir 		default:
883cdf0e10cSrcweir 			m_pSimpleTokenizer->setKeyWords( NULL, 0 );
884cdf0e10cSrcweir 	}
885cdf0e10cSrcweir }
886cdf0e10cSrcweir 
notifyChange(sal_uInt32 nLine,sal_Int32 nLineCountDifference,const String * pChangedLines,sal_uInt32 nArrayLength)887cdf0e10cSrcweir const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
888cdf0e10cSrcweir 								const String* pChangedLines, sal_uInt32 nArrayLength)
889cdf0e10cSrcweir {
890cdf0e10cSrcweir     (void)nLineCountDifference;
891cdf0e10cSrcweir 
892cdf0e10cSrcweir 	for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
893cdf0e10cSrcweir 		m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
894cdf0e10cSrcweir 
895cdf0e10cSrcweir 	return Range( nLine, nLine + nArrayLength-1 );
896cdf0e10cSrcweir }
897cdf0e10cSrcweir 
getHighlightPortions(sal_uInt32 nLine,const String & rLine,HighlightPortions & portions)898cdf0e10cSrcweir void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
899cdf0e10cSrcweir 											/*out*/HighlightPortions& portions )
900cdf0e10cSrcweir {
901cdf0e10cSrcweir 	m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
902cdf0e10cSrcweir }
903