xref: /aoo42x/main/soltools/cpp/_lex.c (revision cdf0e10c)
1*cdf0e10cSrcweir #include <stdio.h>
2*cdf0e10cSrcweir #include <stdlib.h>
3*cdf0e10cSrcweir #include <string.h>
4*cdf0e10cSrcweir #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
5*cdf0e10cSrcweir #include <io.h>
6*cdf0e10cSrcweir #else
7*cdf0e10cSrcweir #include <unistd.h>
8*cdf0e10cSrcweir #endif
9*cdf0e10cSrcweir #include "cpp.h"
10*cdf0e10cSrcweir /*
11*cdf0e10cSrcweir  * lexical FSM encoding
12*cdf0e10cSrcweir  *   when in state state, and one of the characters
13*cdf0e10cSrcweir  *   in ch arrives, enter nextstate.
14*cdf0e10cSrcweir  *   States >= S_SELF are either final, or at least require special action.
15*cdf0e10cSrcweir  *   In 'fsm' there is a line for each state X charset X nextstate.
16*cdf0e10cSrcweir  *   List chars that overwrite previous entries later (e.g. C_ALPH
17*cdf0e10cSrcweir  *   can be overridden by '_' by a later entry; and C_XX is the
18*cdf0e10cSrcweir  *   the universal set, and should always be first.
19*cdf0e10cSrcweir  *   States above S_SELF are represented in the big table as negative values.
20*cdf0e10cSrcweir  *   S_SELF and S_SELFB encode the resulting token type in the upper bits.
21*cdf0e10cSrcweir  *   These actions differ in that S_SELF doesn't have a lookahead char,
22*cdf0e10cSrcweir  *   S_SELFB does.
23*cdf0e10cSrcweir  *
24*cdf0e10cSrcweir  *   The encoding is blown out into a big table for time-efficiency.
25*cdf0e10cSrcweir  *   Entries have
26*cdf0e10cSrcweir  *      nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits.
27*cdf0e10cSrcweir  */
28*cdf0e10cSrcweir 
29*cdf0e10cSrcweir #define	MAXSTATE		32
30*cdf0e10cSrcweir #define	ACT(tok,act)	((tok<<7)+act)
31*cdf0e10cSrcweir #define	QBSBIT			0100
32*cdf0e10cSrcweir #define	GETACT(st)		((st>>7)&0x1ff)
33*cdf0e10cSrcweir 
34*cdf0e10cSrcweir /* character classes */
35*cdf0e10cSrcweir #define	C_WS	1
36*cdf0e10cSrcweir #define	C_ALPH	2
37*cdf0e10cSrcweir #define	C_NUM	3
38*cdf0e10cSrcweir #define	C_EOF	4
39*cdf0e10cSrcweir #define	C_XX	5
40*cdf0e10cSrcweir 
41*cdf0e10cSrcweir enum state
42*cdf0e10cSrcweir {
43*cdf0e10cSrcweir     START = 0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4,
44*cdf0e10cSrcweir     CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
45*cdf0e10cSrcweir     CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
46*cdf0e10cSrcweir     S_SELF = MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
47*cdf0e10cSrcweir     S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
48*cdf0e10cSrcweir };
49*cdf0e10cSrcweir 
50*cdf0e10cSrcweir int tottok;
51*cdf0e10cSrcweir int tokkind[256];
52*cdf0e10cSrcweir struct fsm
53*cdf0e10cSrcweir {
54*cdf0e10cSrcweir     int state;                          /* if in this state */
55*cdf0e10cSrcweir     uchar ch[4];                        /* and see one of these characters */
56*cdf0e10cSrcweir     int nextstate;                      /* enter this state if +ve */
57*cdf0e10cSrcweir };
58*cdf0e10cSrcweir 
59*cdf0e10cSrcweir  /*const*/ struct fsm fsm[] = {
60*cdf0e10cSrcweir     /* start state */
61*cdf0e10cSrcweir 		 {START, {C_XX}, ACT(UNCLASS, S_SELF)},
62*cdf0e10cSrcweir 		 {START, {' ', '\t', '\v'}, WS1},
63*cdf0e10cSrcweir 		 {START, {C_NUM}, NUM1},
64*cdf0e10cSrcweir 		 {START, {'.'}, NUM3},
65*cdf0e10cSrcweir 		 {START, {C_ALPH}, ID1},
66*cdf0e10cSrcweir 		 {START, {'L'}, ST1},
67*cdf0e10cSrcweir 		 {START, {'"'}, ST2},
68*cdf0e10cSrcweir 		 {START, {'\''}, CC1},
69*cdf0e10cSrcweir 		 {START, {'/'}, COM1},
70*cdf0e10cSrcweir 		 {START, {EOFC}, S_EOF},
71*cdf0e10cSrcweir 		 {START, {'\n'}, S_NL},
72*cdf0e10cSrcweir 		 {START, {'-'}, MINUS1},
73*cdf0e10cSrcweir 		 {START, {'+'}, PLUS1},
74*cdf0e10cSrcweir 		 {START, {'<'}, LT1},
75*cdf0e10cSrcweir 		 {START, {'>'}, GT1},
76*cdf0e10cSrcweir 		 {START, {'='}, ASG1},
77*cdf0e10cSrcweir 		 {START, {'!'}, NOT1},
78*cdf0e10cSrcweir 		 {START, {'&'}, AND1},
79*cdf0e10cSrcweir 		 {START, {'|'}, OR1},
80*cdf0e10cSrcweir 		 {START, {'#'}, SHARP1},
81*cdf0e10cSrcweir 		 {START, {'%'}, PCT1},
82*cdf0e10cSrcweir 		 {START, {'['}, ACT(SBRA, S_SELF)},
83*cdf0e10cSrcweir 		 {START, {']'}, ACT(SKET, S_SELF)},
84*cdf0e10cSrcweir 		 {START, {'('}, ACT(LP, S_SELF)},
85*cdf0e10cSrcweir 		 {START, {')'}, ACT(RP, S_SELF)},
86*cdf0e10cSrcweir 		 {START, {'*'}, STAR1},
87*cdf0e10cSrcweir 		 {START, {','}, ACT(COMMA, S_SELF)},
88*cdf0e10cSrcweir 		 {START, {'?'}, ACT(QUEST, S_SELF)},
89*cdf0e10cSrcweir 		 {START, {':'}, ACT(COLON, S_SELF)},
90*cdf0e10cSrcweir 		 {START, {';'}, ACT(SEMIC, S_SELF)},
91*cdf0e10cSrcweir 		 {START, {'{'}, ACT(CBRA, S_SELF)},
92*cdf0e10cSrcweir 		 {START, {'}'}, ACT(CKET, S_SELF)},
93*cdf0e10cSrcweir 		 {START, {'~'}, ACT(TILDE, S_SELF)},
94*cdf0e10cSrcweir 		 {START, {'^'}, CIRC1},
95*cdf0e10cSrcweir 
96*cdf0e10cSrcweir     /* saw a digit */
97*cdf0e10cSrcweir 		 {NUM1, {C_XX}, ACT(NUMBER, S_SELFB)},
98*cdf0e10cSrcweir 		 {NUM1, {C_NUM, C_ALPH, '.'}, NUM1},
99*cdf0e10cSrcweir 		 {NUM1, {'E', 'e'}, NUM2},
100*cdf0e10cSrcweir 		 {NUM1, {'_'}, ACT(NUMBER, S_SELFB)},
101*cdf0e10cSrcweir 
102*cdf0e10cSrcweir     /* saw possible start of exponent, digits-e */
103*cdf0e10cSrcweir 		 {NUM2, {C_XX}, ACT(NUMBER, S_SELFB)},
104*cdf0e10cSrcweir 		 {NUM2, {'+', '-'}, NUM1},
105*cdf0e10cSrcweir 		 {NUM2, {C_NUM, C_ALPH}, NUM1},
106*cdf0e10cSrcweir 		 {NUM2, {'_'}, ACT(NUMBER, S_SELFB)},
107*cdf0e10cSrcweir 
108*cdf0e10cSrcweir     /* saw a '.', which could be a number or an operator */
109*cdf0e10cSrcweir 		 {NUM3, {C_XX}, ACT(DOT, S_SELFB)},
110*cdf0e10cSrcweir 		 {NUM3, {'.'}, DOTS1},
111*cdf0e10cSrcweir 		 {NUM3, {C_NUM}, NUM1},
112*cdf0e10cSrcweir 
113*cdf0e10cSrcweir 		 {DOTS1, {C_XX}, ACT(UNCLASS, S_SELFB)},
114*cdf0e10cSrcweir 		 {DOTS1, {C_NUM}, NUM1},
115*cdf0e10cSrcweir 		 {DOTS1, {'.'}, ACT(ELLIPS, S_SELF)},
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir     /* saw a letter or _ */
118*cdf0e10cSrcweir 		 {ID1, {C_XX}, ACT(NAME, S_NAME)},
119*cdf0e10cSrcweir 		 {ID1, {C_ALPH, C_NUM}, ID1},
120*cdf0e10cSrcweir 
121*cdf0e10cSrcweir     /* saw L (start of wide string?) */
122*cdf0e10cSrcweir 		 {ST1, {C_XX}, ACT(NAME, S_NAME)},
123*cdf0e10cSrcweir 		 {ST1, {C_ALPH, C_NUM}, ID1},
124*cdf0e10cSrcweir 		 {ST1, {'"'}, ST2},
125*cdf0e10cSrcweir 		 {ST1, {'\''}, CC1},
126*cdf0e10cSrcweir 
127*cdf0e10cSrcweir     /* saw " beginning string */
128*cdf0e10cSrcweir 		 {ST2, {C_XX}, ST2},
129*cdf0e10cSrcweir 		 {ST2, {'"'}, ACT(STRING, S_SELF)},
130*cdf0e10cSrcweir 		 {ST2, {'\\'}, ST3},
131*cdf0e10cSrcweir 		 {ST2, {'\n'}, S_STNL},
132*cdf0e10cSrcweir 		 {ST2, {EOFC}, S_EOFSTR},
133*cdf0e10cSrcweir 
134*cdf0e10cSrcweir     /* saw \ in string */
135*cdf0e10cSrcweir 		 {ST3, {C_XX}, ST2},
136*cdf0e10cSrcweir 		 {ST3, {'\n'}, S_STNL},
137*cdf0e10cSrcweir 		 {ST3, {EOFC}, S_EOFSTR},
138*cdf0e10cSrcweir 
139*cdf0e10cSrcweir     /* saw ' beginning character const */
140*cdf0e10cSrcweir 		 {CC1, {C_XX}, CC1},
141*cdf0e10cSrcweir 		 {CC1, {'\''}, ACT(CCON, S_SELF)},
142*cdf0e10cSrcweir 		 {CC1, {'\\'}, CC2},
143*cdf0e10cSrcweir 		 {CC1, {'\n'}, S_STNL},
144*cdf0e10cSrcweir 		 {CC1, {EOFC}, S_EOFSTR},
145*cdf0e10cSrcweir 
146*cdf0e10cSrcweir     /* saw \ in ccon */
147*cdf0e10cSrcweir 		 {CC2, {C_XX}, CC1},
148*cdf0e10cSrcweir 		 {CC2, {'\n'}, S_STNL},
149*cdf0e10cSrcweir 		 {CC2, {EOFC}, S_EOFSTR},
150*cdf0e10cSrcweir 
151*cdf0e10cSrcweir     /* saw /, perhaps start of comment */
152*cdf0e10cSrcweir 		 {COM1, {C_XX}, ACT(SLASH, S_SELFB)},
153*cdf0e10cSrcweir 		 {COM1, {'='}, ACT(ASSLASH, S_SELF)},
154*cdf0e10cSrcweir 		 {COM1, {'*'}, COM2},
155*cdf0e10cSrcweir 		 {COM1, {'/'}, COM4},
156*cdf0e10cSrcweir 
157*cdf0e10cSrcweir     /* saw / followed by *, start of comment */
158*cdf0e10cSrcweir 		 {COM2, {C_XX}, COM2},
159*cdf0e10cSrcweir 		 {COM2, {'\n'}, S_COMNL},
160*cdf0e10cSrcweir 		 {COM2, {'*'}, COM3},
161*cdf0e10cSrcweir 		 {COM2, {EOFC}, S_EOFCOM},
162*cdf0e10cSrcweir 
163*cdf0e10cSrcweir     /* saw the * possibly ending a comment */
164*cdf0e10cSrcweir 		 {COM3, {C_XX}, COM2},
165*cdf0e10cSrcweir 		 {COM3, {'\n'}, S_COMNL},
166*cdf0e10cSrcweir 		 {COM3, {'*'}, COM3},
167*cdf0e10cSrcweir 		 {COM3, {'/'}, S_COMMENT},
168*cdf0e10cSrcweir 
169*cdf0e10cSrcweir     /* // comment */
170*cdf0e10cSrcweir 		 {COM4, {C_XX}, COM4},
171*cdf0e10cSrcweir 		 {COM4, {'\n'}, S_NL},
172*cdf0e10cSrcweir 		 {COM4, {EOFC}, S_EOFCOM},
173*cdf0e10cSrcweir 
174*cdf0e10cSrcweir     /* saw white space, eat it up */
175*cdf0e10cSrcweir 		 {WS1, {C_XX}, S_WS},
176*cdf0e10cSrcweir 		 {WS1, {'\t', '\v', ' '}, WS1},
177*cdf0e10cSrcweir 
178*cdf0e10cSrcweir     /* saw -, check --, -=, -> */
179*cdf0e10cSrcweir 		 {MINUS1, {C_XX}, ACT(MINUS, S_SELFB)},
180*cdf0e10cSrcweir 		 {MINUS1, {'-'}, ACT(MMINUS, S_SELF)},
181*cdf0e10cSrcweir 		 {MINUS1, {'='}, ACT(ASMINUS, S_SELF)},
182*cdf0e10cSrcweir 		 {MINUS1, {'>'}, ACT(ARROW, S_SELF)},
183*cdf0e10cSrcweir 
184*cdf0e10cSrcweir     /* saw +, check ++, += */
185*cdf0e10cSrcweir 		 {PLUS1, {C_XX}, ACT(PLUS, S_SELFB)},
186*cdf0e10cSrcweir 		 {PLUS1, {'+'}, ACT(PPLUS, S_SELF)},
187*cdf0e10cSrcweir 		 {PLUS1, {'='}, ACT(ASPLUS, S_SELF)},
188*cdf0e10cSrcweir 
189*cdf0e10cSrcweir     /* saw <, check <<, <<=, <= */
190*cdf0e10cSrcweir 		 {LT1, {C_XX}, ACT(LT, S_SELFB)},
191*cdf0e10cSrcweir 		 {LT1, {'<'}, LT2},
192*cdf0e10cSrcweir 		 {LT1, {'='}, ACT(LEQ, S_SELF)},
193*cdf0e10cSrcweir 		 {LT2, {C_XX}, ACT(LSH, S_SELFB)},
194*cdf0e10cSrcweir 		 {LT2, {'='}, ACT(ASLSH, S_SELF)},
195*cdf0e10cSrcweir 
196*cdf0e10cSrcweir     /* saw >, check >>, >>=, >= */
197*cdf0e10cSrcweir 		 {GT1, {C_XX}, ACT(GT, S_SELFB)},
198*cdf0e10cSrcweir 		 {GT1, {'>'}, GT2},
199*cdf0e10cSrcweir 		 {GT1, {'='}, ACT(GEQ, S_SELF)},
200*cdf0e10cSrcweir 		 {GT2, {C_XX}, ACT(RSH, S_SELFB)},
201*cdf0e10cSrcweir 		 {GT2, {'='}, ACT(ASRSH, S_SELF)},
202*cdf0e10cSrcweir 
203*cdf0e10cSrcweir     /* = */
204*cdf0e10cSrcweir 		 {ASG1, {C_XX}, ACT(ASGN, S_SELFB)},
205*cdf0e10cSrcweir 		 {ASG1, {'='}, ACT(EQ, S_SELF)},
206*cdf0e10cSrcweir 
207*cdf0e10cSrcweir     /* ! */
208*cdf0e10cSrcweir 		 {NOT1, {C_XX}, ACT(NOT, S_SELFB)},
209*cdf0e10cSrcweir 		 {NOT1, {'='}, ACT(NEQ, S_SELF)},
210*cdf0e10cSrcweir 
211*cdf0e10cSrcweir     /* & */
212*cdf0e10cSrcweir 		 {AND1, {C_XX}, ACT(AND, S_SELFB)},
213*cdf0e10cSrcweir 		 {AND1, {'&'}, ACT(LAND, S_SELF)},
214*cdf0e10cSrcweir 		 {AND1, {'='}, ACT(ASAND, S_SELF)},
215*cdf0e10cSrcweir 
216*cdf0e10cSrcweir     /* | */
217*cdf0e10cSrcweir 		 {OR1, {C_XX}, ACT(OR, S_SELFB)},
218*cdf0e10cSrcweir 		 {OR1, {'|'}, ACT(LOR, S_SELF)},
219*cdf0e10cSrcweir 		 {OR1, {'='}, ACT(ASOR, S_SELF)},
220*cdf0e10cSrcweir 
221*cdf0e10cSrcweir     /* # */
222*cdf0e10cSrcweir 		 {SHARP1, {C_XX}, ACT(SHARP, S_SELFB)},
223*cdf0e10cSrcweir 		 {SHARP1, {'#'}, ACT(DSHARP, S_SELF)},
224*cdf0e10cSrcweir 
225*cdf0e10cSrcweir     /* % */
226*cdf0e10cSrcweir 		 {PCT1, {C_XX}, ACT(PCT, S_SELFB)},
227*cdf0e10cSrcweir 		 {PCT1, {'='}, ACT(ASPCT, S_SELF)},
228*cdf0e10cSrcweir 
229*cdf0e10cSrcweir     /* * */
230*cdf0e10cSrcweir 		 {STAR1, {C_XX}, ACT(STAR, S_SELFB)},
231*cdf0e10cSrcweir 		 {STAR1, {'='}, ACT(ASSTAR, S_SELF)},
232*cdf0e10cSrcweir 
233*cdf0e10cSrcweir     /* ^ */
234*cdf0e10cSrcweir 		 {CIRC1, {C_XX}, ACT(CIRC, S_SELFB)},
235*cdf0e10cSrcweir 		 {CIRC1, {'='}, ACT(ASCIRC, S_SELF)},
236*cdf0e10cSrcweir 
237*cdf0e10cSrcweir 		 {-1, "", 0}
238*cdf0e10cSrcweir };
239*cdf0e10cSrcweir 
240*cdf0e10cSrcweir /* first index is char, second is state */
241*cdf0e10cSrcweir /* increase #states to power of 2 to encourage use of shift */
242*cdf0e10cSrcweir short bigfsm[256][MAXSTATE];
243*cdf0e10cSrcweir 
244*cdf0e10cSrcweir void
245*cdf0e10cSrcweir     expandlex(void)
246*cdf0e10cSrcweir {
247*cdf0e10cSrcweir      /* const */ struct fsm *fp;
248*cdf0e10cSrcweir     int i, j, nstate;
249*cdf0e10cSrcweir 
250*cdf0e10cSrcweir     for (fp = fsm; fp->state >= 0; fp++)
251*cdf0e10cSrcweir     {
252*cdf0e10cSrcweir         for (i = 0; fp->ch[i]; i++)
253*cdf0e10cSrcweir         {
254*cdf0e10cSrcweir             nstate = fp->nextstate;
255*cdf0e10cSrcweir             if (nstate >= S_SELF)
256*cdf0e10cSrcweir                 nstate = ~nstate;
257*cdf0e10cSrcweir             switch (fp->ch[i])
258*cdf0e10cSrcweir             {
259*cdf0e10cSrcweir 
260*cdf0e10cSrcweir                 case C_XX:              /* random characters */
261*cdf0e10cSrcweir                     for (j = 0; j < 256; j++)
262*cdf0e10cSrcweir                         bigfsm[j][fp->state] = (short) nstate;
263*cdf0e10cSrcweir                     continue;
264*cdf0e10cSrcweir                 case C_ALPH:
265*cdf0e10cSrcweir                     for (j = 0; j < 256; j++)
266*cdf0e10cSrcweir #ifdef S390
267*cdf0e10cSrcweir 						if( isalpha( j ) || (j == '_') )
268*cdf0e10cSrcweir #else
269*cdf0e10cSrcweir                         if (('a' <= j && j <= 'z') || ('A' <= j && j <= 'Z')
270*cdf0e10cSrcweir                             || j == '_')
271*cdf0e10cSrcweir #endif
272*cdf0e10cSrcweir                             bigfsm[j][fp->state] = (short) nstate;
273*cdf0e10cSrcweir                     continue;
274*cdf0e10cSrcweir                 case C_NUM:
275*cdf0e10cSrcweir                     for (j = '0'; j <= '9'; j++)
276*cdf0e10cSrcweir                         bigfsm[j][fp->state] = (short) nstate;
277*cdf0e10cSrcweir                     continue;
278*cdf0e10cSrcweir                 default:
279*cdf0e10cSrcweir                     bigfsm[fp->ch[i]][fp->state] = (short) nstate;
280*cdf0e10cSrcweir             }
281*cdf0e10cSrcweir         }
282*cdf0e10cSrcweir     }
283*cdf0e10cSrcweir 
284*cdf0e10cSrcweir     /*
285*cdf0e10cSrcweir      * install special cases for ? (trigraphs),  \ (splicing), runes, and
286*cdf0e10cSrcweir      * EOB
287*cdf0e10cSrcweir      */
288*cdf0e10cSrcweir     for (i = 0; i < MAXSTATE; i++)
289*cdf0e10cSrcweir     {
290*cdf0e10cSrcweir         for (j = 0; j < 0xFF; j++)
291*cdf0e10cSrcweir             if (j == '?' || j == '\\' || j == '\n' || j == '\r')
292*cdf0e10cSrcweir             {
293*cdf0e10cSrcweir                 if (bigfsm[j][i] > 0)
294*cdf0e10cSrcweir                     bigfsm[j][i] = ~bigfsm[j][i];
295*cdf0e10cSrcweir                 bigfsm[j][i] &= ~QBSBIT;
296*cdf0e10cSrcweir             }
297*cdf0e10cSrcweir         bigfsm[EOB][i] = ~S_EOB;
298*cdf0e10cSrcweir         if (bigfsm[EOFC][i] >= 0)
299*cdf0e10cSrcweir             bigfsm[EOFC][i] = ~S_EOF;
300*cdf0e10cSrcweir     }
301*cdf0e10cSrcweir }
302*cdf0e10cSrcweir 
303*cdf0e10cSrcweir void
304*cdf0e10cSrcweir     fixlex(void)
305*cdf0e10cSrcweir {
306*cdf0e10cSrcweir     /* do C++ comments? */
307*cdf0e10cSrcweir     if ((Cplusplus == 0) || (Cflag != 0))
308*cdf0e10cSrcweir         bigfsm['/'][COM1] = bigfsm['x'][COM1];
309*cdf0e10cSrcweir }
310*cdf0e10cSrcweir 
311*cdf0e10cSrcweir /*
312*cdf0e10cSrcweir  * fill in a row of tokens from input, terminated by NL or END
313*cdf0e10cSrcweir  * First token is put at trp->lp.
314*cdf0e10cSrcweir  * Reset is non-zero when the input buffer can be "rewound."
315*cdf0e10cSrcweir  * The value is a flag indicating that possible macros have
316*cdf0e10cSrcweir  * been seen in the row.
317*cdf0e10cSrcweir  */
318*cdf0e10cSrcweir int
319*cdf0e10cSrcweir     gettokens(Tokenrow * trp, int reset)
320*cdf0e10cSrcweir {
321*cdf0e10cSrcweir     register int c, state, oldstate;
322*cdf0e10cSrcweir     register uchar *ip;
323*cdf0e10cSrcweir     register Token *tp, *maxp;
324*cdf0e10cSrcweir     int runelen;
325*cdf0e10cSrcweir     Source *s = cursource;
326*cdf0e10cSrcweir     int nmac = 0;
327*cdf0e10cSrcweir 
328*cdf0e10cSrcweir     tp = trp->lp;
329*cdf0e10cSrcweir     ip = s->inp;
330*cdf0e10cSrcweir     if (reset)
331*cdf0e10cSrcweir     {
332*cdf0e10cSrcweir         s->lineinc = 0;
333*cdf0e10cSrcweir         if (ip >= s->inl)
334*cdf0e10cSrcweir         {                               /* nothing in buffer */
335*cdf0e10cSrcweir             s->inl = s->inb;
336*cdf0e10cSrcweir             fillbuf(s);
337*cdf0e10cSrcweir             ip = s->inp = s->inb;
338*cdf0e10cSrcweir         }
339*cdf0e10cSrcweir         else
340*cdf0e10cSrcweir             if (ip >= s->inb + (3 * INS / 4))
341*cdf0e10cSrcweir             {
342*cdf0e10cSrcweir                 memmove(s->inb, ip, 4 + s->inl - ip);
343*cdf0e10cSrcweir                 s->inl = s->inb + (s->inl - ip);
344*cdf0e10cSrcweir                 ip = s->inp = s->inb;
345*cdf0e10cSrcweir             }
346*cdf0e10cSrcweir     }
347*cdf0e10cSrcweir     maxp = &trp->bp[trp->max];
348*cdf0e10cSrcweir     runelen = 1;
349*cdf0e10cSrcweir     for (;;)
350*cdf0e10cSrcweir     {
351*cdf0e10cSrcweir continue2:
352*cdf0e10cSrcweir         if (tp >= maxp)
353*cdf0e10cSrcweir         {
354*cdf0e10cSrcweir             trp->lp = tp;
355*cdf0e10cSrcweir             tp = growtokenrow(trp);
356*cdf0e10cSrcweir             maxp = &trp->bp[trp->max];
357*cdf0e10cSrcweir         }
358*cdf0e10cSrcweir         tp->type = UNCLASS;
359*cdf0e10cSrcweir         tp->t = ip;
360*cdf0e10cSrcweir         tp->wslen = 0;
361*cdf0e10cSrcweir         tp->flag = 0;
362*cdf0e10cSrcweir         state = START;
363*cdf0e10cSrcweir         for (;;)
364*cdf0e10cSrcweir         {
365*cdf0e10cSrcweir             oldstate = state;
366*cdf0e10cSrcweir 
367*cdf0e10cSrcweir             c = *ip;
368*cdf0e10cSrcweir 
369*cdf0e10cSrcweir             if ((state = bigfsm[c][state]) >= 0)
370*cdf0e10cSrcweir             {
371*cdf0e10cSrcweir                 ip += runelen;
372*cdf0e10cSrcweir                 runelen = 1;
373*cdf0e10cSrcweir                 continue;
374*cdf0e10cSrcweir             }
375*cdf0e10cSrcweir             state = ~state;
376*cdf0e10cSrcweir     reswitch:
377*cdf0e10cSrcweir             switch (state & 0177)
378*cdf0e10cSrcweir             {
379*cdf0e10cSrcweir                 case S_SELF:
380*cdf0e10cSrcweir                     ip += runelen;
381*cdf0e10cSrcweir                     runelen = 1;
382*cdf0e10cSrcweir                 case S_SELFB:
383*cdf0e10cSrcweir                     tp->type = (unsigned char) GETACT(state);
384*cdf0e10cSrcweir                     tp->len = ip - tp->t;
385*cdf0e10cSrcweir                     tp++;
386*cdf0e10cSrcweir                     goto continue2;
387*cdf0e10cSrcweir 
388*cdf0e10cSrcweir                 case S_NAME:            /* like S_SELFB but with nmac check */
389*cdf0e10cSrcweir                     tp->type = NAME;
390*cdf0e10cSrcweir                     tp->len = ip - tp->t;
391*cdf0e10cSrcweir                     nmac |= quicklook(tp->t[0], tp->len > 1 ? tp->t[1] : 0);
392*cdf0e10cSrcweir                     tp++;
393*cdf0e10cSrcweir                     goto continue2;
394*cdf0e10cSrcweir 
395*cdf0e10cSrcweir                 case S_WS:
396*cdf0e10cSrcweir                     tp->wslen = ip - tp->t;
397*cdf0e10cSrcweir                     tp->t = ip;
398*cdf0e10cSrcweir                     state = START;
399*cdf0e10cSrcweir                     continue;
400*cdf0e10cSrcweir 
401*cdf0e10cSrcweir                 default:
402*cdf0e10cSrcweir                     if ((state & QBSBIT) == 0)
403*cdf0e10cSrcweir                     {
404*cdf0e10cSrcweir                         ip += runelen;
405*cdf0e10cSrcweir                         runelen = 1;
406*cdf0e10cSrcweir                         continue;
407*cdf0e10cSrcweir                     }
408*cdf0e10cSrcweir                     state &= ~QBSBIT;
409*cdf0e10cSrcweir                     s->inp = ip;
410*cdf0e10cSrcweir 
411*cdf0e10cSrcweir 					if (c == '\n')
412*cdf0e10cSrcweir 					{
413*cdf0e10cSrcweir 					    while (s->inp + 1 >= s->inl && fillbuf(s) != EOF);
414*cdf0e10cSrcweir 
415*cdf0e10cSrcweir 						if (s->inp[1] == '\r')
416*cdf0e10cSrcweir 						{
417*cdf0e10cSrcweir 							memmove(s->inp + 1, s->inp + 2, s->inl - s->inp + 2);
418*cdf0e10cSrcweir 							s->inl -= 1;
419*cdf0e10cSrcweir 						}
420*cdf0e10cSrcweir 
421*cdf0e10cSrcweir                         goto reswitch;
422*cdf0e10cSrcweir 					}
423*cdf0e10cSrcweir 
424*cdf0e10cSrcweir 					if (c == '\r')
425*cdf0e10cSrcweir 					{
426*cdf0e10cSrcweir     				    while (s->inp + 1 >= s->inl && fillbuf(s) != EOF);
427*cdf0e10cSrcweir 
428*cdf0e10cSrcweir 						if (s->inp[1] == '\n')
429*cdf0e10cSrcweir 						{
430*cdf0e10cSrcweir 							memmove(s->inp, s->inp + 1, s->inl - s->inp + 1);
431*cdf0e10cSrcweir 							s->inl -= 1;
432*cdf0e10cSrcweir 						}
433*cdf0e10cSrcweir 						else
434*cdf0e10cSrcweir 							*s->inp = '\n';
435*cdf0e10cSrcweir 
436*cdf0e10cSrcweir 						state = oldstate;
437*cdf0e10cSrcweir                         continue;
438*cdf0e10cSrcweir 					}
439*cdf0e10cSrcweir 
440*cdf0e10cSrcweir                     if (c == '?')
441*cdf0e10cSrcweir                     {                   /* check trigraph */
442*cdf0e10cSrcweir                         if (trigraph(s))
443*cdf0e10cSrcweir                         {
444*cdf0e10cSrcweir                             state = oldstate;
445*cdf0e10cSrcweir                             continue;
446*cdf0e10cSrcweir                         }
447*cdf0e10cSrcweir                         goto reswitch;
448*cdf0e10cSrcweir                     }
449*cdf0e10cSrcweir                     if (c == '\\')
450*cdf0e10cSrcweir                     {                   /* line-folding */
451*cdf0e10cSrcweir                         if (foldline(s))
452*cdf0e10cSrcweir                         {
453*cdf0e10cSrcweir                             s->lineinc++;
454*cdf0e10cSrcweir                             state = oldstate;
455*cdf0e10cSrcweir                             continue;
456*cdf0e10cSrcweir                         }
457*cdf0e10cSrcweir                         goto reswitch;
458*cdf0e10cSrcweir                     }
459*cdf0e10cSrcweir                     error(WARNING, "Lexical botch in cpp");
460*cdf0e10cSrcweir                     ip += runelen;
461*cdf0e10cSrcweir                     runelen = 1;
462*cdf0e10cSrcweir                     continue;
463*cdf0e10cSrcweir 
464*cdf0e10cSrcweir                 case S_EOB:
465*cdf0e10cSrcweir                     s->inp = ip;
466*cdf0e10cSrcweir                     fillbuf(cursource);
467*cdf0e10cSrcweir                     state = oldstate;
468*cdf0e10cSrcweir                     continue;
469*cdf0e10cSrcweir 
470*cdf0e10cSrcweir                 case S_EOF:
471*cdf0e10cSrcweir                     tp->type = END;
472*cdf0e10cSrcweir                     tp->len = 0;
473*cdf0e10cSrcweir                     s->inp = ip;
474*cdf0e10cSrcweir                     if (tp != trp->bp && (tp - 1)->type != NL && cursource->fd != -1)
475*cdf0e10cSrcweir                         error(WARNING, "No newline at end of file");
476*cdf0e10cSrcweir                     trp->lp = tp + 1;
477*cdf0e10cSrcweir                     return nmac;
478*cdf0e10cSrcweir 
479*cdf0e10cSrcweir                 case S_STNL:
480*cdf0e10cSrcweir                     error(ERROR, "Unterminated string or char const");
481*cdf0e10cSrcweir                 case S_NL:
482*cdf0e10cSrcweir                     tp->t = ip;
483*cdf0e10cSrcweir                     tp->type = NL;
484*cdf0e10cSrcweir                     tp->len = 1;
485*cdf0e10cSrcweir                     tp->wslen = 0;
486*cdf0e10cSrcweir                     s->lineinc++;
487*cdf0e10cSrcweir                     s->inp = ip + 1;
488*cdf0e10cSrcweir                     trp->lp = tp + 1;
489*cdf0e10cSrcweir                     return nmac;
490*cdf0e10cSrcweir 
491*cdf0e10cSrcweir                 case S_EOFSTR:
492*cdf0e10cSrcweir                     error(FATAL, "EOF in string or char constant");
493*cdf0e10cSrcweir                     break;
494*cdf0e10cSrcweir 
495*cdf0e10cSrcweir                 case S_COMNL:
496*cdf0e10cSrcweir                     s->lineinc++;
497*cdf0e10cSrcweir                     state = COM2;
498*cdf0e10cSrcweir                     ip += runelen;
499*cdf0e10cSrcweir                     runelen = 1;
500*cdf0e10cSrcweir                     continue;
501*cdf0e10cSrcweir 
502*cdf0e10cSrcweir                 case S_EOFCOM:
503*cdf0e10cSrcweir                     error(WARNING, "EOF inside comment");
504*cdf0e10cSrcweir                     --ip;
505*cdf0e10cSrcweir                 case S_COMMENT:
506*cdf0e10cSrcweir 					if (!Cflag)
507*cdf0e10cSrcweir 					{
508*cdf0e10cSrcweir 						tp->t = ++ip;
509*cdf0e10cSrcweir 						tp->t[-1] = ' ';
510*cdf0e10cSrcweir 						tp->wslen = 1;
511*cdf0e10cSrcweir 						state = START;
512*cdf0e10cSrcweir 						continue;
513*cdf0e10cSrcweir 					}
514*cdf0e10cSrcweir 					else
515*cdf0e10cSrcweir 					{
516*cdf0e10cSrcweir 	                    runelen = 1;
517*cdf0e10cSrcweir                         s->lineinc = 0;;
518*cdf0e10cSrcweir                         tp->type = COMMENT;
519*cdf0e10cSrcweir 						tp->flag |= XTWS;
520*cdf0e10cSrcweir 					}
521*cdf0e10cSrcweir             }
522*cdf0e10cSrcweir             break;
523*cdf0e10cSrcweir         }
524*cdf0e10cSrcweir         ip += runelen;
525*cdf0e10cSrcweir         runelen = 1;
526*cdf0e10cSrcweir         tp->len = ip - tp->t;
527*cdf0e10cSrcweir         tp++;
528*cdf0e10cSrcweir     }
529*cdf0e10cSrcweir }
530*cdf0e10cSrcweir 
531*cdf0e10cSrcweir /* have seen ?; handle the trigraph it starts (if any) else 0 */
532*cdf0e10cSrcweir int
533*cdf0e10cSrcweir     trigraph(Source * s)
534*cdf0e10cSrcweir {
535*cdf0e10cSrcweir     uchar c;
536*cdf0e10cSrcweir 
537*cdf0e10cSrcweir     while (s->inp + 2 >= s->inl && fillbuf(s) != EOF);
538*cdf0e10cSrcweir 	;
539*cdf0e10cSrcweir     if (s->inp[1] != '?')
540*cdf0e10cSrcweir         return 0;
541*cdf0e10cSrcweir     c = 0;
542*cdf0e10cSrcweir     switch (s->inp[2])
543*cdf0e10cSrcweir     {
544*cdf0e10cSrcweir         case '=':
545*cdf0e10cSrcweir             c = '#';
546*cdf0e10cSrcweir             break;
547*cdf0e10cSrcweir         case '(':
548*cdf0e10cSrcweir             c = '[';
549*cdf0e10cSrcweir             break;
550*cdf0e10cSrcweir         case '/':
551*cdf0e10cSrcweir             c = '\\';
552*cdf0e10cSrcweir             break;
553*cdf0e10cSrcweir         case ')':
554*cdf0e10cSrcweir             c = ']';
555*cdf0e10cSrcweir             break;
556*cdf0e10cSrcweir         case '\'':
557*cdf0e10cSrcweir             c = '^';
558*cdf0e10cSrcweir             break;
559*cdf0e10cSrcweir         case '<':
560*cdf0e10cSrcweir             c = '{';
561*cdf0e10cSrcweir             break;
562*cdf0e10cSrcweir         case '!':
563*cdf0e10cSrcweir             c = '|';
564*cdf0e10cSrcweir             break;
565*cdf0e10cSrcweir         case '>':
566*cdf0e10cSrcweir             c = '}';
567*cdf0e10cSrcweir             break;
568*cdf0e10cSrcweir         case '-':
569*cdf0e10cSrcweir             c = '~';
570*cdf0e10cSrcweir             break;
571*cdf0e10cSrcweir     }
572*cdf0e10cSrcweir     if (c)
573*cdf0e10cSrcweir     {
574*cdf0e10cSrcweir         *s->inp = c;
575*cdf0e10cSrcweir         memmove(s->inp + 1, s->inp + 3, s->inl - s->inp + 2);
576*cdf0e10cSrcweir         s->inl -= 2;
577*cdf0e10cSrcweir     }
578*cdf0e10cSrcweir     return c;
579*cdf0e10cSrcweir }
580*cdf0e10cSrcweir 
581*cdf0e10cSrcweir int
582*cdf0e10cSrcweir     foldline(Source * s)
583*cdf0e10cSrcweir {
584*cdf0e10cSrcweir     int n = 1;
585*cdf0e10cSrcweir 
586*cdf0e10cSrcweir 	/* skip pending wihite spaces */
587*cdf0e10cSrcweir 	while ((s->inp[n] == ' ') || (s->inp[n] == '\t'))
588*cdf0e10cSrcweir 	{
589*cdf0e10cSrcweir 		n++;
590*cdf0e10cSrcweir 	    if ((s->inp + n >= s->inl) && (fillbuf(s) == EOF))
591*cdf0e10cSrcweir 			break;
592*cdf0e10cSrcweir 	}
593*cdf0e10cSrcweir 
594*cdf0e10cSrcweir 	/* refill buffer */
595*cdf0e10cSrcweir     while (s->inp + (n + 1) >= s->inl && fillbuf(s) != EOF);
596*cdf0e10cSrcweir 
597*cdf0e10cSrcweir     /* skip DOS line ends */
598*cdf0e10cSrcweir     if (((s->inp[n] == '\r') && (s->inp[n+1] == '\n')) ||
599*cdf0e10cSrcweir 		((s->inp[n] == '\n') && (s->inp[n+1] == '\r')))
600*cdf0e10cSrcweir         n++;
601*cdf0e10cSrcweir 
602*cdf0e10cSrcweir     if ((s->inp[n] == '\n') || (s->inp[n] == '\r'))
603*cdf0e10cSrcweir     {
604*cdf0e10cSrcweir         memmove(s->inp, s->inp + n + 1, s->inl - s->inp + n + 2);
605*cdf0e10cSrcweir         s->inl -= n + 1;
606*cdf0e10cSrcweir         return 1;
607*cdf0e10cSrcweir     }
608*cdf0e10cSrcweir     return 0;
609*cdf0e10cSrcweir }
610*cdf0e10cSrcweir 
611*cdf0e10cSrcweir int
612*cdf0e10cSrcweir     fillbuf(Source * s)
613*cdf0e10cSrcweir {
614*cdf0e10cSrcweir     int n;
615*cdf0e10cSrcweir 
616*cdf0e10cSrcweir     if (s->fd < 0 || (n = read(s->fd, (char *) s->inl, INS / 8)) <= 0)
617*cdf0e10cSrcweir         n = 0;
618*cdf0e10cSrcweir     s->inl += n;
619*cdf0e10cSrcweir     s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOB;
620*cdf0e10cSrcweir     if (n == 0)
621*cdf0e10cSrcweir     {
622*cdf0e10cSrcweir         s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC;
623*cdf0e10cSrcweir         return EOF;
624*cdf0e10cSrcweir     }
625*cdf0e10cSrcweir     return 0;
626*cdf0e10cSrcweir }
627*cdf0e10cSrcweir 
628*cdf0e10cSrcweir /*
629*cdf0e10cSrcweir  * Push down to new source of characters.
630*cdf0e10cSrcweir  * If fd>0 and str==NULL, then from a file `name';
631*cdf0e10cSrcweir  * if fd==-1 and str, then from the string.
632*cdf0e10cSrcweir  */
633*cdf0e10cSrcweir Source *
634*cdf0e10cSrcweir     setsource(char *name, int path, int fd, char *str, int wrap)
635*cdf0e10cSrcweir {
636*cdf0e10cSrcweir     Source *s = new(Source);
637*cdf0e10cSrcweir     int len;
638*cdf0e10cSrcweir 
639*cdf0e10cSrcweir     s->line = 1;
640*cdf0e10cSrcweir     s->lineinc = 0;
641*cdf0e10cSrcweir     s->fd = fd;
642*cdf0e10cSrcweir     s->filename = name;
643*cdf0e10cSrcweir     s->next = cursource;
644*cdf0e10cSrcweir     s->ifdepth = 0;
645*cdf0e10cSrcweir     s->pathdepth = path;
646*cdf0e10cSrcweir 	s->wrap = wrap;
647*cdf0e10cSrcweir 
648*cdf0e10cSrcweir     cursource = s;
649*cdf0e10cSrcweir 
650*cdf0e10cSrcweir 	if (s->wrap)
651*cdf0e10cSrcweir 		genwrap(0);
652*cdf0e10cSrcweir 
653*cdf0e10cSrcweir     /* slop at right for EOB */
654*cdf0e10cSrcweir     if (str)
655*cdf0e10cSrcweir     {
656*cdf0e10cSrcweir         len = strlen(str);
657*cdf0e10cSrcweir         s->inb = domalloc(len + 4);
658*cdf0e10cSrcweir         s->inp = s->inb;
659*cdf0e10cSrcweir         strncpy((char *) s->inp, str, len);
660*cdf0e10cSrcweir     }
661*cdf0e10cSrcweir     else
662*cdf0e10cSrcweir     {
663*cdf0e10cSrcweir         s->inb = domalloc(INS + 4);
664*cdf0e10cSrcweir         s->inp = s->inb;
665*cdf0e10cSrcweir         len = 0;
666*cdf0e10cSrcweir     }
667*cdf0e10cSrcweir     s->inl = s->inp + len;
668*cdf0e10cSrcweir     s->inl[0] = s->inl[1] = EOB;
669*cdf0e10cSrcweir 
670*cdf0e10cSrcweir     return s;
671*cdf0e10cSrcweir }
672*cdf0e10cSrcweir 
673*cdf0e10cSrcweir void
674*cdf0e10cSrcweir     unsetsource(void)
675*cdf0e10cSrcweir {
676*cdf0e10cSrcweir     Source *s = cursource;
677*cdf0e10cSrcweir 
678*cdf0e10cSrcweir 	if (s->wrap)
679*cdf0e10cSrcweir 		genwrap(1);
680*cdf0e10cSrcweir 
681*cdf0e10cSrcweir     if (s->fd >= 0)
682*cdf0e10cSrcweir     {
683*cdf0e10cSrcweir         close(s->fd);
684*cdf0e10cSrcweir         dofree(s->inb);
685*cdf0e10cSrcweir     }
686*cdf0e10cSrcweir     cursource = s->next;
687*cdf0e10cSrcweir     dofree(s);
688*cdf0e10cSrcweir }
689