xref: /aoo41x/main/soltools/cpp/_lex.c (revision 7ce20373)
1*7ce20373SAndrew Rist /**************************************************************
2*7ce20373SAndrew Rist  *
3*7ce20373SAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*7ce20373SAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*7ce20373SAndrew Rist  * distributed with this work for additional information
6*7ce20373SAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*7ce20373SAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*7ce20373SAndrew Rist  * "License"); you may not use this file except in compliance
9*7ce20373SAndrew Rist  * with the License.  You may obtain a copy of the License at
10*7ce20373SAndrew Rist  *
11*7ce20373SAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*7ce20373SAndrew Rist  *
13*7ce20373SAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*7ce20373SAndrew Rist  * software distributed under the License is distributed on an
15*7ce20373SAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*7ce20373SAndrew Rist  * KIND, either express or implied.  See the License for the
17*7ce20373SAndrew Rist  * specific language governing permissions and limitations
18*7ce20373SAndrew Rist  * under the License.
19*7ce20373SAndrew Rist  *
20*7ce20373SAndrew Rist  *************************************************************/
21*7ce20373SAndrew Rist 
22cdf0e10cSrcweir #include <stdio.h>
23cdf0e10cSrcweir #include <stdlib.h>
24cdf0e10cSrcweir #include <string.h>
25cdf0e10cSrcweir #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
26cdf0e10cSrcweir #include <io.h>
27cdf0e10cSrcweir #else
28cdf0e10cSrcweir #include <unistd.h>
29cdf0e10cSrcweir #endif
30cdf0e10cSrcweir #include "cpp.h"
31cdf0e10cSrcweir /*
32cdf0e10cSrcweir  * lexical FSM encoding
33cdf0e10cSrcweir  *   when in state state, and one of the characters
34cdf0e10cSrcweir  *   in ch arrives, enter nextstate.
35cdf0e10cSrcweir  *   States >= S_SELF are either final, or at least require special action.
36cdf0e10cSrcweir  *   In 'fsm' there is a line for each state X charset X nextstate.
37cdf0e10cSrcweir  *   List chars that overwrite previous entries later (e.g. C_ALPH
38cdf0e10cSrcweir  *   can be overridden by '_' by a later entry; and C_XX is the
39cdf0e10cSrcweir  *   the universal set, and should always be first.
40cdf0e10cSrcweir  *   States above S_SELF are represented in the big table as negative values.
41cdf0e10cSrcweir  *   S_SELF and S_SELFB encode the resulting token type in the upper bits.
42cdf0e10cSrcweir  *   These actions differ in that S_SELF doesn't have a lookahead char,
43cdf0e10cSrcweir  *   S_SELFB does.
44cdf0e10cSrcweir  *
45cdf0e10cSrcweir  *   The encoding is blown out into a big table for time-efficiency.
46cdf0e10cSrcweir  *   Entries have
47cdf0e10cSrcweir  *      nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits.
48cdf0e10cSrcweir  */
49cdf0e10cSrcweir 
50cdf0e10cSrcweir #define	MAXSTATE		32
51cdf0e10cSrcweir #define	ACT(tok,act)	((tok<<7)+act)
52cdf0e10cSrcweir #define	QBSBIT			0100
53cdf0e10cSrcweir #define	GETACT(st)		((st>>7)&0x1ff)
54cdf0e10cSrcweir 
55cdf0e10cSrcweir /* character classes */
56cdf0e10cSrcweir #define	C_WS	1
57cdf0e10cSrcweir #define	C_ALPH	2
58cdf0e10cSrcweir #define	C_NUM	3
59cdf0e10cSrcweir #define	C_EOF	4
60cdf0e10cSrcweir #define	C_XX	5
61cdf0e10cSrcweir 
62cdf0e10cSrcweir enum state
63cdf0e10cSrcweir {
64cdf0e10cSrcweir     START = 0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4,
65cdf0e10cSrcweir     CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
66cdf0e10cSrcweir     CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
67cdf0e10cSrcweir     S_SELF = MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
68cdf0e10cSrcweir     S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
69cdf0e10cSrcweir };
70cdf0e10cSrcweir 
71cdf0e10cSrcweir int tottok;
72cdf0e10cSrcweir int tokkind[256];
73cdf0e10cSrcweir struct fsm
74cdf0e10cSrcweir {
75cdf0e10cSrcweir     int state;                          /* if in this state */
76cdf0e10cSrcweir     uchar ch[4];                        /* and see one of these characters */
77cdf0e10cSrcweir     int nextstate;                      /* enter this state if +ve */
78cdf0e10cSrcweir };
79cdf0e10cSrcweir 
80cdf0e10cSrcweir  /*const*/ struct fsm fsm[] = {
81cdf0e10cSrcweir     /* start state */
82cdf0e10cSrcweir 		 {START, {C_XX}, ACT(UNCLASS, S_SELF)},
83cdf0e10cSrcweir 		 {START, {' ', '\t', '\v'}, WS1},
84cdf0e10cSrcweir 		 {START, {C_NUM}, NUM1},
85cdf0e10cSrcweir 		 {START, {'.'}, NUM3},
86cdf0e10cSrcweir 		 {START, {C_ALPH}, ID1},
87cdf0e10cSrcweir 		 {START, {'L'}, ST1},
88cdf0e10cSrcweir 		 {START, {'"'}, ST2},
89cdf0e10cSrcweir 		 {START, {'\''}, CC1},
90cdf0e10cSrcweir 		 {START, {'/'}, COM1},
91cdf0e10cSrcweir 		 {START, {EOFC}, S_EOF},
92cdf0e10cSrcweir 		 {START, {'\n'}, S_NL},
93cdf0e10cSrcweir 		 {START, {'-'}, MINUS1},
94cdf0e10cSrcweir 		 {START, {'+'}, PLUS1},
95cdf0e10cSrcweir 		 {START, {'<'}, LT1},
96cdf0e10cSrcweir 		 {START, {'>'}, GT1},
97cdf0e10cSrcweir 		 {START, {'='}, ASG1},
98cdf0e10cSrcweir 		 {START, {'!'}, NOT1},
99cdf0e10cSrcweir 		 {START, {'&'}, AND1},
100cdf0e10cSrcweir 		 {START, {'|'}, OR1},
101cdf0e10cSrcweir 		 {START, {'#'}, SHARP1},
102cdf0e10cSrcweir 		 {START, {'%'}, PCT1},
103cdf0e10cSrcweir 		 {START, {'['}, ACT(SBRA, S_SELF)},
104cdf0e10cSrcweir 		 {START, {']'}, ACT(SKET, S_SELF)},
105cdf0e10cSrcweir 		 {START, {'('}, ACT(LP, S_SELF)},
106cdf0e10cSrcweir 		 {START, {')'}, ACT(RP, S_SELF)},
107cdf0e10cSrcweir 		 {START, {'*'}, STAR1},
108cdf0e10cSrcweir 		 {START, {','}, ACT(COMMA, S_SELF)},
109cdf0e10cSrcweir 		 {START, {'?'}, ACT(QUEST, S_SELF)},
110cdf0e10cSrcweir 		 {START, {':'}, ACT(COLON, S_SELF)},
111cdf0e10cSrcweir 		 {START, {';'}, ACT(SEMIC, S_SELF)},
112cdf0e10cSrcweir 		 {START, {'{'}, ACT(CBRA, S_SELF)},
113cdf0e10cSrcweir 		 {START, {'}'}, ACT(CKET, S_SELF)},
114cdf0e10cSrcweir 		 {START, {'~'}, ACT(TILDE, S_SELF)},
115cdf0e10cSrcweir 		 {START, {'^'}, CIRC1},
116cdf0e10cSrcweir 
117cdf0e10cSrcweir     /* saw a digit */
118cdf0e10cSrcweir 		 {NUM1, {C_XX}, ACT(NUMBER, S_SELFB)},
119cdf0e10cSrcweir 		 {NUM1, {C_NUM, C_ALPH, '.'}, NUM1},
120cdf0e10cSrcweir 		 {NUM1, {'E', 'e'}, NUM2},
121cdf0e10cSrcweir 		 {NUM1, {'_'}, ACT(NUMBER, S_SELFB)},
122cdf0e10cSrcweir 
123cdf0e10cSrcweir     /* saw possible start of exponent, digits-e */
124cdf0e10cSrcweir 		 {NUM2, {C_XX}, ACT(NUMBER, S_SELFB)},
125cdf0e10cSrcweir 		 {NUM2, {'+', '-'}, NUM1},
126cdf0e10cSrcweir 		 {NUM2, {C_NUM, C_ALPH}, NUM1},
127cdf0e10cSrcweir 		 {NUM2, {'_'}, ACT(NUMBER, S_SELFB)},
128cdf0e10cSrcweir 
129cdf0e10cSrcweir     /* saw a '.', which could be a number or an operator */
130cdf0e10cSrcweir 		 {NUM3, {C_XX}, ACT(DOT, S_SELFB)},
131cdf0e10cSrcweir 		 {NUM3, {'.'}, DOTS1},
132cdf0e10cSrcweir 		 {NUM3, {C_NUM}, NUM1},
133cdf0e10cSrcweir 
134cdf0e10cSrcweir 		 {DOTS1, {C_XX}, ACT(UNCLASS, S_SELFB)},
135cdf0e10cSrcweir 		 {DOTS1, {C_NUM}, NUM1},
136cdf0e10cSrcweir 		 {DOTS1, {'.'}, ACT(ELLIPS, S_SELF)},
137cdf0e10cSrcweir 
138cdf0e10cSrcweir     /* saw a letter or _ */
139cdf0e10cSrcweir 		 {ID1, {C_XX}, ACT(NAME, S_NAME)},
140cdf0e10cSrcweir 		 {ID1, {C_ALPH, C_NUM}, ID1},
141cdf0e10cSrcweir 
142cdf0e10cSrcweir     /* saw L (start of wide string?) */
143cdf0e10cSrcweir 		 {ST1, {C_XX}, ACT(NAME, S_NAME)},
144cdf0e10cSrcweir 		 {ST1, {C_ALPH, C_NUM}, ID1},
145cdf0e10cSrcweir 		 {ST1, {'"'}, ST2},
146cdf0e10cSrcweir 		 {ST1, {'\''}, CC1},
147cdf0e10cSrcweir 
148cdf0e10cSrcweir     /* saw " beginning string */
149cdf0e10cSrcweir 		 {ST2, {C_XX}, ST2},
150cdf0e10cSrcweir 		 {ST2, {'"'}, ACT(STRING, S_SELF)},
151cdf0e10cSrcweir 		 {ST2, {'\\'}, ST3},
152cdf0e10cSrcweir 		 {ST2, {'\n'}, S_STNL},
153cdf0e10cSrcweir 		 {ST2, {EOFC}, S_EOFSTR},
154cdf0e10cSrcweir 
155cdf0e10cSrcweir     /* saw \ in string */
156cdf0e10cSrcweir 		 {ST3, {C_XX}, ST2},
157cdf0e10cSrcweir 		 {ST3, {'\n'}, S_STNL},
158cdf0e10cSrcweir 		 {ST3, {EOFC}, S_EOFSTR},
159cdf0e10cSrcweir 
160cdf0e10cSrcweir     /* saw ' beginning character const */
161cdf0e10cSrcweir 		 {CC1, {C_XX}, CC1},
162cdf0e10cSrcweir 		 {CC1, {'\''}, ACT(CCON, S_SELF)},
163cdf0e10cSrcweir 		 {CC1, {'\\'}, CC2},
164cdf0e10cSrcweir 		 {CC1, {'\n'}, S_STNL},
165cdf0e10cSrcweir 		 {CC1, {EOFC}, S_EOFSTR},
166cdf0e10cSrcweir 
167cdf0e10cSrcweir     /* saw \ in ccon */
168cdf0e10cSrcweir 		 {CC2, {C_XX}, CC1},
169cdf0e10cSrcweir 		 {CC2, {'\n'}, S_STNL},
170cdf0e10cSrcweir 		 {CC2, {EOFC}, S_EOFSTR},
171cdf0e10cSrcweir 
172cdf0e10cSrcweir     /* saw /, perhaps start of comment */
173cdf0e10cSrcweir 		 {COM1, {C_XX}, ACT(SLASH, S_SELFB)},
174cdf0e10cSrcweir 		 {COM1, {'='}, ACT(ASSLASH, S_SELF)},
175cdf0e10cSrcweir 		 {COM1, {'*'}, COM2},
176cdf0e10cSrcweir 		 {COM1, {'/'}, COM4},
177cdf0e10cSrcweir 
178cdf0e10cSrcweir     /* saw / followed by *, start of comment */
179cdf0e10cSrcweir 		 {COM2, {C_XX}, COM2},
180cdf0e10cSrcweir 		 {COM2, {'\n'}, S_COMNL},
181cdf0e10cSrcweir 		 {COM2, {'*'}, COM3},
182cdf0e10cSrcweir 		 {COM2, {EOFC}, S_EOFCOM},
183cdf0e10cSrcweir 
184cdf0e10cSrcweir     /* saw the * possibly ending a comment */
185cdf0e10cSrcweir 		 {COM3, {C_XX}, COM2},
186cdf0e10cSrcweir 		 {COM3, {'\n'}, S_COMNL},
187cdf0e10cSrcweir 		 {COM3, {'*'}, COM3},
188cdf0e10cSrcweir 		 {COM3, {'/'}, S_COMMENT},
189cdf0e10cSrcweir 
190cdf0e10cSrcweir     /* // comment */
191cdf0e10cSrcweir 		 {COM4, {C_XX}, COM4},
192cdf0e10cSrcweir 		 {COM4, {'\n'}, S_NL},
193cdf0e10cSrcweir 		 {COM4, {EOFC}, S_EOFCOM},
194cdf0e10cSrcweir 
195cdf0e10cSrcweir     /* saw white space, eat it up */
196cdf0e10cSrcweir 		 {WS1, {C_XX}, S_WS},
197cdf0e10cSrcweir 		 {WS1, {'\t', '\v', ' '}, WS1},
198cdf0e10cSrcweir 
199cdf0e10cSrcweir     /* saw -, check --, -=, -> */
200cdf0e10cSrcweir 		 {MINUS1, {C_XX}, ACT(MINUS, S_SELFB)},
201cdf0e10cSrcweir 		 {MINUS1, {'-'}, ACT(MMINUS, S_SELF)},
202cdf0e10cSrcweir 		 {MINUS1, {'='}, ACT(ASMINUS, S_SELF)},
203cdf0e10cSrcweir 		 {MINUS1, {'>'}, ACT(ARROW, S_SELF)},
204cdf0e10cSrcweir 
205cdf0e10cSrcweir     /* saw +, check ++, += */
206cdf0e10cSrcweir 		 {PLUS1, {C_XX}, ACT(PLUS, S_SELFB)},
207cdf0e10cSrcweir 		 {PLUS1, {'+'}, ACT(PPLUS, S_SELF)},
208cdf0e10cSrcweir 		 {PLUS1, {'='}, ACT(ASPLUS, S_SELF)},
209cdf0e10cSrcweir 
210cdf0e10cSrcweir     /* saw <, check <<, <<=, <= */
211cdf0e10cSrcweir 		 {LT1, {C_XX}, ACT(LT, S_SELFB)},
212cdf0e10cSrcweir 		 {LT1, {'<'}, LT2},
213cdf0e10cSrcweir 		 {LT1, {'='}, ACT(LEQ, S_SELF)},
214cdf0e10cSrcweir 		 {LT2, {C_XX}, ACT(LSH, S_SELFB)},
215cdf0e10cSrcweir 		 {LT2, {'='}, ACT(ASLSH, S_SELF)},
216cdf0e10cSrcweir 
217cdf0e10cSrcweir     /* saw >, check >>, >>=, >= */
218cdf0e10cSrcweir 		 {GT1, {C_XX}, ACT(GT, S_SELFB)},
219cdf0e10cSrcweir 		 {GT1, {'>'}, GT2},
220cdf0e10cSrcweir 		 {GT1, {'='}, ACT(GEQ, S_SELF)},
221cdf0e10cSrcweir 		 {GT2, {C_XX}, ACT(RSH, S_SELFB)},
222cdf0e10cSrcweir 		 {GT2, {'='}, ACT(ASRSH, S_SELF)},
223cdf0e10cSrcweir 
224cdf0e10cSrcweir     /* = */
225cdf0e10cSrcweir 		 {ASG1, {C_XX}, ACT(ASGN, S_SELFB)},
226cdf0e10cSrcweir 		 {ASG1, {'='}, ACT(EQ, S_SELF)},
227cdf0e10cSrcweir 
228cdf0e10cSrcweir     /* ! */
229cdf0e10cSrcweir 		 {NOT1, {C_XX}, ACT(NOT, S_SELFB)},
230cdf0e10cSrcweir 		 {NOT1, {'='}, ACT(NEQ, S_SELF)},
231cdf0e10cSrcweir 
232cdf0e10cSrcweir     /* & */
233cdf0e10cSrcweir 		 {AND1, {C_XX}, ACT(AND, S_SELFB)},
234cdf0e10cSrcweir 		 {AND1, {'&'}, ACT(LAND, S_SELF)},
235cdf0e10cSrcweir 		 {AND1, {'='}, ACT(ASAND, S_SELF)},
236cdf0e10cSrcweir 
237cdf0e10cSrcweir     /* | */
238cdf0e10cSrcweir 		 {OR1, {C_XX}, ACT(OR, S_SELFB)},
239cdf0e10cSrcweir 		 {OR1, {'|'}, ACT(LOR, S_SELF)},
240cdf0e10cSrcweir 		 {OR1, {'='}, ACT(ASOR, S_SELF)},
241cdf0e10cSrcweir 
242cdf0e10cSrcweir     /* # */
243cdf0e10cSrcweir 		 {SHARP1, {C_XX}, ACT(SHARP, S_SELFB)},
244cdf0e10cSrcweir 		 {SHARP1, {'#'}, ACT(DSHARP, S_SELF)},
245cdf0e10cSrcweir 
246cdf0e10cSrcweir     /* % */
247cdf0e10cSrcweir 		 {PCT1, {C_XX}, ACT(PCT, S_SELFB)},
248cdf0e10cSrcweir 		 {PCT1, {'='}, ACT(ASPCT, S_SELF)},
249cdf0e10cSrcweir 
250cdf0e10cSrcweir     /* * */
251cdf0e10cSrcweir 		 {STAR1, {C_XX}, ACT(STAR, S_SELFB)},
252cdf0e10cSrcweir 		 {STAR1, {'='}, ACT(ASSTAR, S_SELF)},
253cdf0e10cSrcweir 
254cdf0e10cSrcweir     /* ^ */
255cdf0e10cSrcweir 		 {CIRC1, {C_XX}, ACT(CIRC, S_SELFB)},
256cdf0e10cSrcweir 		 {CIRC1, {'='}, ACT(ASCIRC, S_SELF)},
257cdf0e10cSrcweir 
258cdf0e10cSrcweir 		 {-1, "", 0}
259cdf0e10cSrcweir };
260cdf0e10cSrcweir 
261cdf0e10cSrcweir /* first index is char, second is state */
262cdf0e10cSrcweir /* increase #states to power of 2 to encourage use of shift */
263cdf0e10cSrcweir short bigfsm[256][MAXSTATE];
264cdf0e10cSrcweir 
265cdf0e10cSrcweir void
expandlex(void)266cdf0e10cSrcweir     expandlex(void)
267cdf0e10cSrcweir {
268cdf0e10cSrcweir      /* const */ struct fsm *fp;
269cdf0e10cSrcweir     int i, j, nstate;
270cdf0e10cSrcweir 
271cdf0e10cSrcweir     for (fp = fsm; fp->state >= 0; fp++)
272cdf0e10cSrcweir     {
273cdf0e10cSrcweir         for (i = 0; fp->ch[i]; i++)
274cdf0e10cSrcweir         {
275cdf0e10cSrcweir             nstate = fp->nextstate;
276cdf0e10cSrcweir             if (nstate >= S_SELF)
277cdf0e10cSrcweir                 nstate = ~nstate;
278cdf0e10cSrcweir             switch (fp->ch[i])
279cdf0e10cSrcweir             {
280cdf0e10cSrcweir 
281cdf0e10cSrcweir                 case C_XX:              /* random characters */
282cdf0e10cSrcweir                     for (j = 0; j < 256; j++)
283cdf0e10cSrcweir                         bigfsm[j][fp->state] = (short) nstate;
284cdf0e10cSrcweir                     continue;
285cdf0e10cSrcweir                 case C_ALPH:
286cdf0e10cSrcweir                     for (j = 0; j < 256; j++)
287cdf0e10cSrcweir #ifdef S390
288cdf0e10cSrcweir 						if( isalpha( j ) || (j == '_') )
289cdf0e10cSrcweir #else
290cdf0e10cSrcweir                         if (('a' <= j && j <= 'z') || ('A' <= j && j <= 'Z')
291cdf0e10cSrcweir                             || j == '_')
292cdf0e10cSrcweir #endif
293cdf0e10cSrcweir                             bigfsm[j][fp->state] = (short) nstate;
294cdf0e10cSrcweir                     continue;
295cdf0e10cSrcweir                 case C_NUM:
296cdf0e10cSrcweir                     for (j = '0'; j <= '9'; j++)
297cdf0e10cSrcweir                         bigfsm[j][fp->state] = (short) nstate;
298cdf0e10cSrcweir                     continue;
299cdf0e10cSrcweir                 default:
300cdf0e10cSrcweir                     bigfsm[fp->ch[i]][fp->state] = (short) nstate;
301cdf0e10cSrcweir             }
302cdf0e10cSrcweir         }
303cdf0e10cSrcweir     }
304cdf0e10cSrcweir 
305cdf0e10cSrcweir     /*
306cdf0e10cSrcweir      * install special cases for ? (trigraphs),  \ (splicing), runes, and
307cdf0e10cSrcweir      * EOB
308cdf0e10cSrcweir      */
309cdf0e10cSrcweir     for (i = 0; i < MAXSTATE; i++)
310cdf0e10cSrcweir     {
311cdf0e10cSrcweir         for (j = 0; j < 0xFF; j++)
312cdf0e10cSrcweir             if (j == '?' || j == '\\' || j == '\n' || j == '\r')
313cdf0e10cSrcweir             {
314cdf0e10cSrcweir                 if (bigfsm[j][i] > 0)
315cdf0e10cSrcweir                     bigfsm[j][i] = ~bigfsm[j][i];
316cdf0e10cSrcweir                 bigfsm[j][i] &= ~QBSBIT;
317cdf0e10cSrcweir             }
318cdf0e10cSrcweir         bigfsm[EOB][i] = ~S_EOB;
319cdf0e10cSrcweir         if (bigfsm[EOFC][i] >= 0)
320cdf0e10cSrcweir             bigfsm[EOFC][i] = ~S_EOF;
321cdf0e10cSrcweir     }
322cdf0e10cSrcweir }
323cdf0e10cSrcweir 
324cdf0e10cSrcweir void
fixlex(void)325cdf0e10cSrcweir     fixlex(void)
326cdf0e10cSrcweir {
327cdf0e10cSrcweir     /* do C++ comments? */
328cdf0e10cSrcweir     if ((Cplusplus == 0) || (Cflag != 0))
329cdf0e10cSrcweir         bigfsm['/'][COM1] = bigfsm['x'][COM1];
330cdf0e10cSrcweir }
331cdf0e10cSrcweir 
332cdf0e10cSrcweir /*
333cdf0e10cSrcweir  * fill in a row of tokens from input, terminated by NL or END
334cdf0e10cSrcweir  * First token is put at trp->lp.
335cdf0e10cSrcweir  * Reset is non-zero when the input buffer can be "rewound."
336cdf0e10cSrcweir  * The value is a flag indicating that possible macros have
337cdf0e10cSrcweir  * been seen in the row.
338cdf0e10cSrcweir  */
339cdf0e10cSrcweir int
gettokens(Tokenrow * trp,int reset)340cdf0e10cSrcweir     gettokens(Tokenrow * trp, int reset)
341cdf0e10cSrcweir {
342cdf0e10cSrcweir     register int c, state, oldstate;
343cdf0e10cSrcweir     register uchar *ip;
344cdf0e10cSrcweir     register Token *tp, *maxp;
345cdf0e10cSrcweir     int runelen;
346cdf0e10cSrcweir     Source *s = cursource;
347cdf0e10cSrcweir     int nmac = 0;
348cdf0e10cSrcweir 
349cdf0e10cSrcweir     tp = trp->lp;
350cdf0e10cSrcweir     ip = s->inp;
351cdf0e10cSrcweir     if (reset)
352cdf0e10cSrcweir     {
353cdf0e10cSrcweir         s->lineinc = 0;
354cdf0e10cSrcweir         if (ip >= s->inl)
355cdf0e10cSrcweir         {                               /* nothing in buffer */
356cdf0e10cSrcweir             s->inl = s->inb;
357cdf0e10cSrcweir             fillbuf(s);
358cdf0e10cSrcweir             ip = s->inp = s->inb;
359cdf0e10cSrcweir         }
360cdf0e10cSrcweir         else
361cdf0e10cSrcweir             if (ip >= s->inb + (3 * INS / 4))
362cdf0e10cSrcweir             {
363cdf0e10cSrcweir                 memmove(s->inb, ip, 4 + s->inl - ip);
364cdf0e10cSrcweir                 s->inl = s->inb + (s->inl - ip);
365cdf0e10cSrcweir                 ip = s->inp = s->inb;
366cdf0e10cSrcweir             }
367cdf0e10cSrcweir     }
368cdf0e10cSrcweir     maxp = &trp->bp[trp->max];
369cdf0e10cSrcweir     runelen = 1;
370cdf0e10cSrcweir     for (;;)
371cdf0e10cSrcweir     {
372cdf0e10cSrcweir continue2:
373cdf0e10cSrcweir         if (tp >= maxp)
374cdf0e10cSrcweir         {
375cdf0e10cSrcweir             trp->lp = tp;
376cdf0e10cSrcweir             tp = growtokenrow(trp);
377cdf0e10cSrcweir             maxp = &trp->bp[trp->max];
378cdf0e10cSrcweir         }
379cdf0e10cSrcweir         tp->type = UNCLASS;
380cdf0e10cSrcweir         tp->t = ip;
381cdf0e10cSrcweir         tp->wslen = 0;
382cdf0e10cSrcweir         tp->flag = 0;
383cdf0e10cSrcweir         state = START;
384cdf0e10cSrcweir         for (;;)
385cdf0e10cSrcweir         {
386cdf0e10cSrcweir             oldstate = state;
387cdf0e10cSrcweir 
388cdf0e10cSrcweir             c = *ip;
389cdf0e10cSrcweir 
390cdf0e10cSrcweir             if ((state = bigfsm[c][state]) >= 0)
391cdf0e10cSrcweir             {
392cdf0e10cSrcweir                 ip += runelen;
393cdf0e10cSrcweir                 runelen = 1;
394cdf0e10cSrcweir                 continue;
395cdf0e10cSrcweir             }
396cdf0e10cSrcweir             state = ~state;
397cdf0e10cSrcweir     reswitch:
398cdf0e10cSrcweir             switch (state & 0177)
399cdf0e10cSrcweir             {
400cdf0e10cSrcweir                 case S_SELF:
401cdf0e10cSrcweir                     ip += runelen;
402cdf0e10cSrcweir                     runelen = 1;
403cdf0e10cSrcweir                 case S_SELFB:
404cdf0e10cSrcweir                     tp->type = (unsigned char) GETACT(state);
405cdf0e10cSrcweir                     tp->len = ip - tp->t;
406cdf0e10cSrcweir                     tp++;
407cdf0e10cSrcweir                     goto continue2;
408cdf0e10cSrcweir 
409cdf0e10cSrcweir                 case S_NAME:            /* like S_SELFB but with nmac check */
410cdf0e10cSrcweir                     tp->type = NAME;
411cdf0e10cSrcweir                     tp->len = ip - tp->t;
412cdf0e10cSrcweir                     nmac |= quicklook(tp->t[0], tp->len > 1 ? tp->t[1] : 0);
413cdf0e10cSrcweir                     tp++;
414cdf0e10cSrcweir                     goto continue2;
415cdf0e10cSrcweir 
416cdf0e10cSrcweir                 case S_WS:
417cdf0e10cSrcweir                     tp->wslen = ip - tp->t;
418cdf0e10cSrcweir                     tp->t = ip;
419cdf0e10cSrcweir                     state = START;
420cdf0e10cSrcweir                     continue;
421cdf0e10cSrcweir 
422cdf0e10cSrcweir                 default:
423cdf0e10cSrcweir                     if ((state & QBSBIT) == 0)
424cdf0e10cSrcweir                     {
425cdf0e10cSrcweir                         ip += runelen;
426cdf0e10cSrcweir                         runelen = 1;
427cdf0e10cSrcweir                         continue;
428cdf0e10cSrcweir                     }
429cdf0e10cSrcweir                     state &= ~QBSBIT;
430cdf0e10cSrcweir                     s->inp = ip;
431cdf0e10cSrcweir 
432cdf0e10cSrcweir 					if (c == '\n')
433cdf0e10cSrcweir 					{
434cdf0e10cSrcweir 					    while (s->inp + 1 >= s->inl && fillbuf(s) != EOF);
435cdf0e10cSrcweir 
436cdf0e10cSrcweir 						if (s->inp[1] == '\r')
437cdf0e10cSrcweir 						{
438cdf0e10cSrcweir 							memmove(s->inp + 1, s->inp + 2, s->inl - s->inp + 2);
439cdf0e10cSrcweir 							s->inl -= 1;
440cdf0e10cSrcweir 						}
441cdf0e10cSrcweir 
442cdf0e10cSrcweir                         goto reswitch;
443cdf0e10cSrcweir 					}
444cdf0e10cSrcweir 
445cdf0e10cSrcweir 					if (c == '\r')
446cdf0e10cSrcweir 					{
447cdf0e10cSrcweir     				    while (s->inp + 1 >= s->inl && fillbuf(s) != EOF);
448cdf0e10cSrcweir 
449cdf0e10cSrcweir 						if (s->inp[1] == '\n')
450cdf0e10cSrcweir 						{
451cdf0e10cSrcweir 							memmove(s->inp, s->inp + 1, s->inl - s->inp + 1);
452cdf0e10cSrcweir 							s->inl -= 1;
453cdf0e10cSrcweir 						}
454cdf0e10cSrcweir 						else
455cdf0e10cSrcweir 							*s->inp = '\n';
456cdf0e10cSrcweir 
457cdf0e10cSrcweir 						state = oldstate;
458cdf0e10cSrcweir                         continue;
459cdf0e10cSrcweir 					}
460cdf0e10cSrcweir 
461cdf0e10cSrcweir                     if (c == '?')
462cdf0e10cSrcweir                     {                   /* check trigraph */
463cdf0e10cSrcweir                         if (trigraph(s))
464cdf0e10cSrcweir                         {
465cdf0e10cSrcweir                             state = oldstate;
466cdf0e10cSrcweir                             continue;
467cdf0e10cSrcweir                         }
468cdf0e10cSrcweir                         goto reswitch;
469cdf0e10cSrcweir                     }
470cdf0e10cSrcweir                     if (c == '\\')
471cdf0e10cSrcweir                     {                   /* line-folding */
472cdf0e10cSrcweir                         if (foldline(s))
473cdf0e10cSrcweir                         {
474cdf0e10cSrcweir                             s->lineinc++;
475cdf0e10cSrcweir                             state = oldstate;
476cdf0e10cSrcweir                             continue;
477cdf0e10cSrcweir                         }
478cdf0e10cSrcweir                         goto reswitch;
479cdf0e10cSrcweir                     }
480cdf0e10cSrcweir                     error(WARNING, "Lexical botch in cpp");
481cdf0e10cSrcweir                     ip += runelen;
482cdf0e10cSrcweir                     runelen = 1;
483cdf0e10cSrcweir                     continue;
484cdf0e10cSrcweir 
485cdf0e10cSrcweir                 case S_EOB:
486cdf0e10cSrcweir                     s->inp = ip;
487cdf0e10cSrcweir                     fillbuf(cursource);
488cdf0e10cSrcweir                     state = oldstate;
489cdf0e10cSrcweir                     continue;
490cdf0e10cSrcweir 
491cdf0e10cSrcweir                 case S_EOF:
492cdf0e10cSrcweir                     tp->type = END;
493cdf0e10cSrcweir                     tp->len = 0;
494cdf0e10cSrcweir                     s->inp = ip;
495cdf0e10cSrcweir                     if (tp != trp->bp && (tp - 1)->type != NL && cursource->fd != -1)
496cdf0e10cSrcweir                         error(WARNING, "No newline at end of file");
497cdf0e10cSrcweir                     trp->lp = tp + 1;
498cdf0e10cSrcweir                     return nmac;
499cdf0e10cSrcweir 
500cdf0e10cSrcweir                 case S_STNL:
501cdf0e10cSrcweir                     error(ERROR, "Unterminated string or char const");
502cdf0e10cSrcweir                 case S_NL:
503cdf0e10cSrcweir                     tp->t = ip;
504cdf0e10cSrcweir                     tp->type = NL;
505cdf0e10cSrcweir                     tp->len = 1;
506cdf0e10cSrcweir                     tp->wslen = 0;
507cdf0e10cSrcweir                     s->lineinc++;
508cdf0e10cSrcweir                     s->inp = ip + 1;
509cdf0e10cSrcweir                     trp->lp = tp + 1;
510cdf0e10cSrcweir                     return nmac;
511cdf0e10cSrcweir 
512cdf0e10cSrcweir                 case S_EOFSTR:
513cdf0e10cSrcweir                     error(FATAL, "EOF in string or char constant");
514cdf0e10cSrcweir                     break;
515cdf0e10cSrcweir 
516cdf0e10cSrcweir                 case S_COMNL:
517cdf0e10cSrcweir                     s->lineinc++;
518cdf0e10cSrcweir                     state = COM2;
519cdf0e10cSrcweir                     ip += runelen;
520cdf0e10cSrcweir                     runelen = 1;
521cdf0e10cSrcweir                     continue;
522cdf0e10cSrcweir 
523cdf0e10cSrcweir                 case S_EOFCOM:
524cdf0e10cSrcweir                     error(WARNING, "EOF inside comment");
525cdf0e10cSrcweir                     --ip;
526cdf0e10cSrcweir                 case S_COMMENT:
527cdf0e10cSrcweir 					if (!Cflag)
528cdf0e10cSrcweir 					{
529cdf0e10cSrcweir 						tp->t = ++ip;
530cdf0e10cSrcweir 						tp->t[-1] = ' ';
531cdf0e10cSrcweir 						tp->wslen = 1;
532cdf0e10cSrcweir 						state = START;
533cdf0e10cSrcweir 						continue;
534cdf0e10cSrcweir 					}
535cdf0e10cSrcweir 					else
536cdf0e10cSrcweir 					{
537cdf0e10cSrcweir 	                    runelen = 1;
538cdf0e10cSrcweir                         s->lineinc = 0;;
539cdf0e10cSrcweir                         tp->type = COMMENT;
540cdf0e10cSrcweir 						tp->flag |= XTWS;
541cdf0e10cSrcweir 					}
542cdf0e10cSrcweir             }
543cdf0e10cSrcweir             break;
544cdf0e10cSrcweir         }
545cdf0e10cSrcweir         ip += runelen;
546cdf0e10cSrcweir         runelen = 1;
547cdf0e10cSrcweir         tp->len = ip - tp->t;
548cdf0e10cSrcweir         tp++;
549cdf0e10cSrcweir     }
550cdf0e10cSrcweir }
551cdf0e10cSrcweir 
552cdf0e10cSrcweir /* have seen ?; handle the trigraph it starts (if any) else 0 */
553cdf0e10cSrcweir int
trigraph(Source * s)554cdf0e10cSrcweir     trigraph(Source * s)
555cdf0e10cSrcweir {
556cdf0e10cSrcweir     uchar c;
557cdf0e10cSrcweir 
558cdf0e10cSrcweir     while (s->inp + 2 >= s->inl && fillbuf(s) != EOF);
559cdf0e10cSrcweir 	;
560cdf0e10cSrcweir     if (s->inp[1] != '?')
561cdf0e10cSrcweir         return 0;
562cdf0e10cSrcweir     c = 0;
563cdf0e10cSrcweir     switch (s->inp[2])
564cdf0e10cSrcweir     {
565cdf0e10cSrcweir         case '=':
566cdf0e10cSrcweir             c = '#';
567cdf0e10cSrcweir             break;
568cdf0e10cSrcweir         case '(':
569cdf0e10cSrcweir             c = '[';
570cdf0e10cSrcweir             break;
571cdf0e10cSrcweir         case '/':
572cdf0e10cSrcweir             c = '\\';
573cdf0e10cSrcweir             break;
574cdf0e10cSrcweir         case ')':
575cdf0e10cSrcweir             c = ']';
576cdf0e10cSrcweir             break;
577cdf0e10cSrcweir         case '\'':
578cdf0e10cSrcweir             c = '^';
579cdf0e10cSrcweir             break;
580cdf0e10cSrcweir         case '<':
581cdf0e10cSrcweir             c = '{';
582cdf0e10cSrcweir             break;
583cdf0e10cSrcweir         case '!':
584cdf0e10cSrcweir             c = '|';
585cdf0e10cSrcweir             break;
586cdf0e10cSrcweir         case '>':
587cdf0e10cSrcweir             c = '}';
588cdf0e10cSrcweir             break;
589cdf0e10cSrcweir         case '-':
590cdf0e10cSrcweir             c = '~';
591cdf0e10cSrcweir             break;
592cdf0e10cSrcweir     }
593cdf0e10cSrcweir     if (c)
594cdf0e10cSrcweir     {
595cdf0e10cSrcweir         *s->inp = c;
596cdf0e10cSrcweir         memmove(s->inp + 1, s->inp + 3, s->inl - s->inp + 2);
597cdf0e10cSrcweir         s->inl -= 2;
598cdf0e10cSrcweir     }
599cdf0e10cSrcweir     return c;
600cdf0e10cSrcweir }
601cdf0e10cSrcweir 
602cdf0e10cSrcweir int
foldline(Source * s)603cdf0e10cSrcweir     foldline(Source * s)
604cdf0e10cSrcweir {
605cdf0e10cSrcweir     int n = 1;
606cdf0e10cSrcweir 
607cdf0e10cSrcweir 	/* skip pending wihite spaces */
608cdf0e10cSrcweir 	while ((s->inp[n] == ' ') || (s->inp[n] == '\t'))
609cdf0e10cSrcweir 	{
610cdf0e10cSrcweir 		n++;
611cdf0e10cSrcweir 	    if ((s->inp + n >= s->inl) && (fillbuf(s) == EOF))
612cdf0e10cSrcweir 			break;
613cdf0e10cSrcweir 	}
614cdf0e10cSrcweir 
615cdf0e10cSrcweir 	/* refill buffer */
616cdf0e10cSrcweir     while (s->inp + (n + 1) >= s->inl && fillbuf(s) != EOF);
617cdf0e10cSrcweir 
618cdf0e10cSrcweir     /* skip DOS line ends */
619cdf0e10cSrcweir     if (((s->inp[n] == '\r') && (s->inp[n+1] == '\n')) ||
620cdf0e10cSrcweir 		((s->inp[n] == '\n') && (s->inp[n+1] == '\r')))
621cdf0e10cSrcweir         n++;
622cdf0e10cSrcweir 
623cdf0e10cSrcweir     if ((s->inp[n] == '\n') || (s->inp[n] == '\r'))
624cdf0e10cSrcweir     {
625cdf0e10cSrcweir         memmove(s->inp, s->inp + n + 1, s->inl - s->inp + n + 2);
626cdf0e10cSrcweir         s->inl -= n + 1;
627cdf0e10cSrcweir         return 1;
628cdf0e10cSrcweir     }
629cdf0e10cSrcweir     return 0;
630cdf0e10cSrcweir }
631cdf0e10cSrcweir 
632cdf0e10cSrcweir int
fillbuf(Source * s)633cdf0e10cSrcweir     fillbuf(Source * s)
634cdf0e10cSrcweir {
635cdf0e10cSrcweir     int n;
636cdf0e10cSrcweir 
637cdf0e10cSrcweir     if (s->fd < 0 || (n = read(s->fd, (char *) s->inl, INS / 8)) <= 0)
638cdf0e10cSrcweir         n = 0;
639cdf0e10cSrcweir     s->inl += n;
640cdf0e10cSrcweir     s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOB;
641cdf0e10cSrcweir     if (n == 0)
642cdf0e10cSrcweir     {
643cdf0e10cSrcweir         s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC;
644cdf0e10cSrcweir         return EOF;
645cdf0e10cSrcweir     }
646cdf0e10cSrcweir     return 0;
647cdf0e10cSrcweir }
648cdf0e10cSrcweir 
649cdf0e10cSrcweir /*
650cdf0e10cSrcweir  * Push down to new source of characters.
651cdf0e10cSrcweir  * If fd>0 and str==NULL, then from a file `name';
652cdf0e10cSrcweir  * if fd==-1 and str, then from the string.
653cdf0e10cSrcweir  */
654cdf0e10cSrcweir Source *
setsource(char * name,int path,int fd,char * str,int wrap)655cdf0e10cSrcweir     setsource(char *name, int path, int fd, char *str, int wrap)
656cdf0e10cSrcweir {
657cdf0e10cSrcweir     Source *s = new(Source);
658cdf0e10cSrcweir     int len;
659cdf0e10cSrcweir 
660cdf0e10cSrcweir     s->line = 1;
661cdf0e10cSrcweir     s->lineinc = 0;
662cdf0e10cSrcweir     s->fd = fd;
663cdf0e10cSrcweir     s->filename = name;
664cdf0e10cSrcweir     s->next = cursource;
665cdf0e10cSrcweir     s->ifdepth = 0;
666cdf0e10cSrcweir     s->pathdepth = path;
667cdf0e10cSrcweir 	s->wrap = wrap;
668cdf0e10cSrcweir 
669cdf0e10cSrcweir     cursource = s;
670cdf0e10cSrcweir 
671cdf0e10cSrcweir 	if (s->wrap)
672cdf0e10cSrcweir 		genwrap(0);
673cdf0e10cSrcweir 
674cdf0e10cSrcweir     /* slop at right for EOB */
675cdf0e10cSrcweir     if (str)
676cdf0e10cSrcweir     {
677cdf0e10cSrcweir         len = strlen(str);
678cdf0e10cSrcweir         s->inb = domalloc(len + 4);
679cdf0e10cSrcweir         s->inp = s->inb;
680cdf0e10cSrcweir         strncpy((char *) s->inp, str, len);
681cdf0e10cSrcweir     }
682cdf0e10cSrcweir     else
683cdf0e10cSrcweir     {
684cdf0e10cSrcweir         s->inb = domalloc(INS + 4);
685cdf0e10cSrcweir         s->inp = s->inb;
686cdf0e10cSrcweir         len = 0;
687cdf0e10cSrcweir     }
688cdf0e10cSrcweir     s->inl = s->inp + len;
689cdf0e10cSrcweir     s->inl[0] = s->inl[1] = EOB;
690cdf0e10cSrcweir 
691cdf0e10cSrcweir     return s;
692cdf0e10cSrcweir }
693cdf0e10cSrcweir 
694cdf0e10cSrcweir void
unsetsource(void)695cdf0e10cSrcweir     unsetsource(void)
696cdf0e10cSrcweir {
697cdf0e10cSrcweir     Source *s = cursource;
698cdf0e10cSrcweir 
699cdf0e10cSrcweir 	if (s->wrap)
700cdf0e10cSrcweir 		genwrap(1);
701cdf0e10cSrcweir 
702cdf0e10cSrcweir     if (s->fd >= 0)
703cdf0e10cSrcweir     {
704cdf0e10cSrcweir         close(s->fd);
705cdf0e10cSrcweir         dofree(s->inb);
706cdf0e10cSrcweir     }
707cdf0e10cSrcweir     cursource = s->next;
708cdf0e10cSrcweir     dofree(s);
709cdf0e10cSrcweir }
710