1*cdf0e10cSrcweir #include <stdio.h> 2*cdf0e10cSrcweir #include <stdlib.h> 3*cdf0e10cSrcweir #include <string.h> 4*cdf0e10cSrcweir #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__)) 5*cdf0e10cSrcweir #include <io.h> 6*cdf0e10cSrcweir #else 7*cdf0e10cSrcweir #include <unistd.h> 8*cdf0e10cSrcweir #endif 9*cdf0e10cSrcweir #include "cpp.h" 10*cdf0e10cSrcweir /* 11*cdf0e10cSrcweir * lexical FSM encoding 12*cdf0e10cSrcweir * when in state state, and one of the characters 13*cdf0e10cSrcweir * in ch arrives, enter nextstate. 14*cdf0e10cSrcweir * States >= S_SELF are either final, or at least require special action. 15*cdf0e10cSrcweir * In 'fsm' there is a line for each state X charset X nextstate. 16*cdf0e10cSrcweir * List chars that overwrite previous entries later (e.g. C_ALPH 17*cdf0e10cSrcweir * can be overridden by '_' by a later entry; and C_XX is the 18*cdf0e10cSrcweir * the universal set, and should always be first. 19*cdf0e10cSrcweir * States above S_SELF are represented in the big table as negative values. 20*cdf0e10cSrcweir * S_SELF and S_SELFB encode the resulting token type in the upper bits. 21*cdf0e10cSrcweir * These actions differ in that S_SELF doesn't have a lookahead char, 22*cdf0e10cSrcweir * S_SELFB does. 23*cdf0e10cSrcweir * 24*cdf0e10cSrcweir * The encoding is blown out into a big table for time-efficiency. 25*cdf0e10cSrcweir * Entries have 26*cdf0e10cSrcweir * nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits. 27*cdf0e10cSrcweir */ 28*cdf0e10cSrcweir 29*cdf0e10cSrcweir #define MAXSTATE 32 30*cdf0e10cSrcweir #define ACT(tok,act) ((tok<<7)+act) 31*cdf0e10cSrcweir #define QBSBIT 0100 32*cdf0e10cSrcweir #define GETACT(st) ((st>>7)&0x1ff) 33*cdf0e10cSrcweir 34*cdf0e10cSrcweir /* character classes */ 35*cdf0e10cSrcweir #define C_WS 1 36*cdf0e10cSrcweir #define C_ALPH 2 37*cdf0e10cSrcweir #define C_NUM 3 38*cdf0e10cSrcweir #define C_EOF 4 39*cdf0e10cSrcweir #define C_XX 5 40*cdf0e10cSrcweir 41*cdf0e10cSrcweir enum state 42*cdf0e10cSrcweir { 43*cdf0e10cSrcweir START = 0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4, 44*cdf0e10cSrcweir CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, 45*cdf0e10cSrcweir CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, 46*cdf0e10cSrcweir S_SELF = MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, 47*cdf0e10cSrcweir S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME 48*cdf0e10cSrcweir }; 49*cdf0e10cSrcweir 50*cdf0e10cSrcweir int tottok; 51*cdf0e10cSrcweir int tokkind[256]; 52*cdf0e10cSrcweir struct fsm 53*cdf0e10cSrcweir { 54*cdf0e10cSrcweir int state; /* if in this state */ 55*cdf0e10cSrcweir uchar ch[4]; /* and see one of these characters */ 56*cdf0e10cSrcweir int nextstate; /* enter this state if +ve */ 57*cdf0e10cSrcweir }; 58*cdf0e10cSrcweir 59*cdf0e10cSrcweir /*const*/ struct fsm fsm[] = { 60*cdf0e10cSrcweir /* start state */ 61*cdf0e10cSrcweir {START, {C_XX}, ACT(UNCLASS, S_SELF)}, 62*cdf0e10cSrcweir {START, {' ', '\t', '\v'}, WS1}, 63*cdf0e10cSrcweir {START, {C_NUM}, NUM1}, 64*cdf0e10cSrcweir {START, {'.'}, NUM3}, 65*cdf0e10cSrcweir {START, {C_ALPH}, ID1}, 66*cdf0e10cSrcweir {START, {'L'}, ST1}, 67*cdf0e10cSrcweir {START, {'"'}, ST2}, 68*cdf0e10cSrcweir {START, {'\''}, CC1}, 69*cdf0e10cSrcweir {START, {'/'}, COM1}, 70*cdf0e10cSrcweir {START, {EOFC}, S_EOF}, 71*cdf0e10cSrcweir {START, {'\n'}, S_NL}, 72*cdf0e10cSrcweir {START, {'-'}, MINUS1}, 73*cdf0e10cSrcweir {START, {'+'}, PLUS1}, 74*cdf0e10cSrcweir {START, {'<'}, LT1}, 75*cdf0e10cSrcweir {START, {'>'}, GT1}, 76*cdf0e10cSrcweir {START, {'='}, ASG1}, 77*cdf0e10cSrcweir {START, {'!'}, NOT1}, 78*cdf0e10cSrcweir {START, {'&'}, AND1}, 79*cdf0e10cSrcweir {START, {'|'}, OR1}, 80*cdf0e10cSrcweir {START, {'#'}, SHARP1}, 81*cdf0e10cSrcweir {START, {'%'}, PCT1}, 82*cdf0e10cSrcweir {START, {'['}, ACT(SBRA, S_SELF)}, 83*cdf0e10cSrcweir {START, {']'}, ACT(SKET, S_SELF)}, 84*cdf0e10cSrcweir {START, {'('}, ACT(LP, S_SELF)}, 85*cdf0e10cSrcweir {START, {')'}, ACT(RP, S_SELF)}, 86*cdf0e10cSrcweir {START, {'*'}, STAR1}, 87*cdf0e10cSrcweir {START, {','}, ACT(COMMA, S_SELF)}, 88*cdf0e10cSrcweir {START, {'?'}, ACT(QUEST, S_SELF)}, 89*cdf0e10cSrcweir {START, {':'}, ACT(COLON, S_SELF)}, 90*cdf0e10cSrcweir {START, {';'}, ACT(SEMIC, S_SELF)}, 91*cdf0e10cSrcweir {START, {'{'}, ACT(CBRA, S_SELF)}, 92*cdf0e10cSrcweir {START, {'}'}, ACT(CKET, S_SELF)}, 93*cdf0e10cSrcweir {START, {'~'}, ACT(TILDE, S_SELF)}, 94*cdf0e10cSrcweir {START, {'^'}, CIRC1}, 95*cdf0e10cSrcweir 96*cdf0e10cSrcweir /* saw a digit */ 97*cdf0e10cSrcweir {NUM1, {C_XX}, ACT(NUMBER, S_SELFB)}, 98*cdf0e10cSrcweir {NUM1, {C_NUM, C_ALPH, '.'}, NUM1}, 99*cdf0e10cSrcweir {NUM1, {'E', 'e'}, NUM2}, 100*cdf0e10cSrcweir {NUM1, {'_'}, ACT(NUMBER, S_SELFB)}, 101*cdf0e10cSrcweir 102*cdf0e10cSrcweir /* saw possible start of exponent, digits-e */ 103*cdf0e10cSrcweir {NUM2, {C_XX}, ACT(NUMBER, S_SELFB)}, 104*cdf0e10cSrcweir {NUM2, {'+', '-'}, NUM1}, 105*cdf0e10cSrcweir {NUM2, {C_NUM, C_ALPH}, NUM1}, 106*cdf0e10cSrcweir {NUM2, {'_'}, ACT(NUMBER, S_SELFB)}, 107*cdf0e10cSrcweir 108*cdf0e10cSrcweir /* saw a '.', which could be a number or an operator */ 109*cdf0e10cSrcweir {NUM3, {C_XX}, ACT(DOT, S_SELFB)}, 110*cdf0e10cSrcweir {NUM3, {'.'}, DOTS1}, 111*cdf0e10cSrcweir {NUM3, {C_NUM}, NUM1}, 112*cdf0e10cSrcweir 113*cdf0e10cSrcweir {DOTS1, {C_XX}, ACT(UNCLASS, S_SELFB)}, 114*cdf0e10cSrcweir {DOTS1, {C_NUM}, NUM1}, 115*cdf0e10cSrcweir {DOTS1, {'.'}, ACT(ELLIPS, S_SELF)}, 116*cdf0e10cSrcweir 117*cdf0e10cSrcweir /* saw a letter or _ */ 118*cdf0e10cSrcweir {ID1, {C_XX}, ACT(NAME, S_NAME)}, 119*cdf0e10cSrcweir {ID1, {C_ALPH, C_NUM}, ID1}, 120*cdf0e10cSrcweir 121*cdf0e10cSrcweir /* saw L (start of wide string?) */ 122*cdf0e10cSrcweir {ST1, {C_XX}, ACT(NAME, S_NAME)}, 123*cdf0e10cSrcweir {ST1, {C_ALPH, C_NUM}, ID1}, 124*cdf0e10cSrcweir {ST1, {'"'}, ST2}, 125*cdf0e10cSrcweir {ST1, {'\''}, CC1}, 126*cdf0e10cSrcweir 127*cdf0e10cSrcweir /* saw " beginning string */ 128*cdf0e10cSrcweir {ST2, {C_XX}, ST2}, 129*cdf0e10cSrcweir {ST2, {'"'}, ACT(STRING, S_SELF)}, 130*cdf0e10cSrcweir {ST2, {'\\'}, ST3}, 131*cdf0e10cSrcweir {ST2, {'\n'}, S_STNL}, 132*cdf0e10cSrcweir {ST2, {EOFC}, S_EOFSTR}, 133*cdf0e10cSrcweir 134*cdf0e10cSrcweir /* saw \ in string */ 135*cdf0e10cSrcweir {ST3, {C_XX}, ST2}, 136*cdf0e10cSrcweir {ST3, {'\n'}, S_STNL}, 137*cdf0e10cSrcweir {ST3, {EOFC}, S_EOFSTR}, 138*cdf0e10cSrcweir 139*cdf0e10cSrcweir /* saw ' beginning character const */ 140*cdf0e10cSrcweir {CC1, {C_XX}, CC1}, 141*cdf0e10cSrcweir {CC1, {'\''}, ACT(CCON, S_SELF)}, 142*cdf0e10cSrcweir {CC1, {'\\'}, CC2}, 143*cdf0e10cSrcweir {CC1, {'\n'}, S_STNL}, 144*cdf0e10cSrcweir {CC1, {EOFC}, S_EOFSTR}, 145*cdf0e10cSrcweir 146*cdf0e10cSrcweir /* saw \ in ccon */ 147*cdf0e10cSrcweir {CC2, {C_XX}, CC1}, 148*cdf0e10cSrcweir {CC2, {'\n'}, S_STNL}, 149*cdf0e10cSrcweir {CC2, {EOFC}, S_EOFSTR}, 150*cdf0e10cSrcweir 151*cdf0e10cSrcweir /* saw /, perhaps start of comment */ 152*cdf0e10cSrcweir {COM1, {C_XX}, ACT(SLASH, S_SELFB)}, 153*cdf0e10cSrcweir {COM1, {'='}, ACT(ASSLASH, S_SELF)}, 154*cdf0e10cSrcweir {COM1, {'*'}, COM2}, 155*cdf0e10cSrcweir {COM1, {'/'}, COM4}, 156*cdf0e10cSrcweir 157*cdf0e10cSrcweir /* saw / followed by *, start of comment */ 158*cdf0e10cSrcweir {COM2, {C_XX}, COM2}, 159*cdf0e10cSrcweir {COM2, {'\n'}, S_COMNL}, 160*cdf0e10cSrcweir {COM2, {'*'}, COM3}, 161*cdf0e10cSrcweir {COM2, {EOFC}, S_EOFCOM}, 162*cdf0e10cSrcweir 163*cdf0e10cSrcweir /* saw the * possibly ending a comment */ 164*cdf0e10cSrcweir {COM3, {C_XX}, COM2}, 165*cdf0e10cSrcweir {COM3, {'\n'}, S_COMNL}, 166*cdf0e10cSrcweir {COM3, {'*'}, COM3}, 167*cdf0e10cSrcweir {COM3, {'/'}, S_COMMENT}, 168*cdf0e10cSrcweir 169*cdf0e10cSrcweir /* // comment */ 170*cdf0e10cSrcweir {COM4, {C_XX}, COM4}, 171*cdf0e10cSrcweir {COM4, {'\n'}, S_NL}, 172*cdf0e10cSrcweir {COM4, {EOFC}, S_EOFCOM}, 173*cdf0e10cSrcweir 174*cdf0e10cSrcweir /* saw white space, eat it up */ 175*cdf0e10cSrcweir {WS1, {C_XX}, S_WS}, 176*cdf0e10cSrcweir {WS1, {'\t', '\v', ' '}, WS1}, 177*cdf0e10cSrcweir 178*cdf0e10cSrcweir /* saw -, check --, -=, -> */ 179*cdf0e10cSrcweir {MINUS1, {C_XX}, ACT(MINUS, S_SELFB)}, 180*cdf0e10cSrcweir {MINUS1, {'-'}, ACT(MMINUS, S_SELF)}, 181*cdf0e10cSrcweir {MINUS1, {'='}, ACT(ASMINUS, S_SELF)}, 182*cdf0e10cSrcweir {MINUS1, {'>'}, ACT(ARROW, S_SELF)}, 183*cdf0e10cSrcweir 184*cdf0e10cSrcweir /* saw +, check ++, += */ 185*cdf0e10cSrcweir {PLUS1, {C_XX}, ACT(PLUS, S_SELFB)}, 186*cdf0e10cSrcweir {PLUS1, {'+'}, ACT(PPLUS, S_SELF)}, 187*cdf0e10cSrcweir {PLUS1, {'='}, ACT(ASPLUS, S_SELF)}, 188*cdf0e10cSrcweir 189*cdf0e10cSrcweir /* saw <, check <<, <<=, <= */ 190*cdf0e10cSrcweir {LT1, {C_XX}, ACT(LT, S_SELFB)}, 191*cdf0e10cSrcweir {LT1, {'<'}, LT2}, 192*cdf0e10cSrcweir {LT1, {'='}, ACT(LEQ, S_SELF)}, 193*cdf0e10cSrcweir {LT2, {C_XX}, ACT(LSH, S_SELFB)}, 194*cdf0e10cSrcweir {LT2, {'='}, ACT(ASLSH, S_SELF)}, 195*cdf0e10cSrcweir 196*cdf0e10cSrcweir /* saw >, check >>, >>=, >= */ 197*cdf0e10cSrcweir {GT1, {C_XX}, ACT(GT, S_SELFB)}, 198*cdf0e10cSrcweir {GT1, {'>'}, GT2}, 199*cdf0e10cSrcweir {GT1, {'='}, ACT(GEQ, S_SELF)}, 200*cdf0e10cSrcweir {GT2, {C_XX}, ACT(RSH, S_SELFB)}, 201*cdf0e10cSrcweir {GT2, {'='}, ACT(ASRSH, S_SELF)}, 202*cdf0e10cSrcweir 203*cdf0e10cSrcweir /* = */ 204*cdf0e10cSrcweir {ASG1, {C_XX}, ACT(ASGN, S_SELFB)}, 205*cdf0e10cSrcweir {ASG1, {'='}, ACT(EQ, S_SELF)}, 206*cdf0e10cSrcweir 207*cdf0e10cSrcweir /* ! */ 208*cdf0e10cSrcweir {NOT1, {C_XX}, ACT(NOT, S_SELFB)}, 209*cdf0e10cSrcweir {NOT1, {'='}, ACT(NEQ, S_SELF)}, 210*cdf0e10cSrcweir 211*cdf0e10cSrcweir /* & */ 212*cdf0e10cSrcweir {AND1, {C_XX}, ACT(AND, S_SELFB)}, 213*cdf0e10cSrcweir {AND1, {'&'}, ACT(LAND, S_SELF)}, 214*cdf0e10cSrcweir {AND1, {'='}, ACT(ASAND, S_SELF)}, 215*cdf0e10cSrcweir 216*cdf0e10cSrcweir /* | */ 217*cdf0e10cSrcweir {OR1, {C_XX}, ACT(OR, S_SELFB)}, 218*cdf0e10cSrcweir {OR1, {'|'}, ACT(LOR, S_SELF)}, 219*cdf0e10cSrcweir {OR1, {'='}, ACT(ASOR, S_SELF)}, 220*cdf0e10cSrcweir 221*cdf0e10cSrcweir /* # */ 222*cdf0e10cSrcweir {SHARP1, {C_XX}, ACT(SHARP, S_SELFB)}, 223*cdf0e10cSrcweir {SHARP1, {'#'}, ACT(DSHARP, S_SELF)}, 224*cdf0e10cSrcweir 225*cdf0e10cSrcweir /* % */ 226*cdf0e10cSrcweir {PCT1, {C_XX}, ACT(PCT, S_SELFB)}, 227*cdf0e10cSrcweir {PCT1, {'='}, ACT(ASPCT, S_SELF)}, 228*cdf0e10cSrcweir 229*cdf0e10cSrcweir /* * */ 230*cdf0e10cSrcweir {STAR1, {C_XX}, ACT(STAR, S_SELFB)}, 231*cdf0e10cSrcweir {STAR1, {'='}, ACT(ASSTAR, S_SELF)}, 232*cdf0e10cSrcweir 233*cdf0e10cSrcweir /* ^ */ 234*cdf0e10cSrcweir {CIRC1, {C_XX}, ACT(CIRC, S_SELFB)}, 235*cdf0e10cSrcweir {CIRC1, {'='}, ACT(ASCIRC, S_SELF)}, 236*cdf0e10cSrcweir 237*cdf0e10cSrcweir {-1, "", 0} 238*cdf0e10cSrcweir }; 239*cdf0e10cSrcweir 240*cdf0e10cSrcweir /* first index is char, second is state */ 241*cdf0e10cSrcweir /* increase #states to power of 2 to encourage use of shift */ 242*cdf0e10cSrcweir short bigfsm[256][MAXSTATE]; 243*cdf0e10cSrcweir 244*cdf0e10cSrcweir void 245*cdf0e10cSrcweir expandlex(void) 246*cdf0e10cSrcweir { 247*cdf0e10cSrcweir /* const */ struct fsm *fp; 248*cdf0e10cSrcweir int i, j, nstate; 249*cdf0e10cSrcweir 250*cdf0e10cSrcweir for (fp = fsm; fp->state >= 0; fp++) 251*cdf0e10cSrcweir { 252*cdf0e10cSrcweir for (i = 0; fp->ch[i]; i++) 253*cdf0e10cSrcweir { 254*cdf0e10cSrcweir nstate = fp->nextstate; 255*cdf0e10cSrcweir if (nstate >= S_SELF) 256*cdf0e10cSrcweir nstate = ~nstate; 257*cdf0e10cSrcweir switch (fp->ch[i]) 258*cdf0e10cSrcweir { 259*cdf0e10cSrcweir 260*cdf0e10cSrcweir case C_XX: /* random characters */ 261*cdf0e10cSrcweir for (j = 0; j < 256; j++) 262*cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 263*cdf0e10cSrcweir continue; 264*cdf0e10cSrcweir case C_ALPH: 265*cdf0e10cSrcweir for (j = 0; j < 256; j++) 266*cdf0e10cSrcweir #ifdef S390 267*cdf0e10cSrcweir if( isalpha( j ) || (j == '_') ) 268*cdf0e10cSrcweir #else 269*cdf0e10cSrcweir if (('a' <= j && j <= 'z') || ('A' <= j && j <= 'Z') 270*cdf0e10cSrcweir || j == '_') 271*cdf0e10cSrcweir #endif 272*cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 273*cdf0e10cSrcweir continue; 274*cdf0e10cSrcweir case C_NUM: 275*cdf0e10cSrcweir for (j = '0'; j <= '9'; j++) 276*cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 277*cdf0e10cSrcweir continue; 278*cdf0e10cSrcweir default: 279*cdf0e10cSrcweir bigfsm[fp->ch[i]][fp->state] = (short) nstate; 280*cdf0e10cSrcweir } 281*cdf0e10cSrcweir } 282*cdf0e10cSrcweir } 283*cdf0e10cSrcweir 284*cdf0e10cSrcweir /* 285*cdf0e10cSrcweir * install special cases for ? (trigraphs), \ (splicing), runes, and 286*cdf0e10cSrcweir * EOB 287*cdf0e10cSrcweir */ 288*cdf0e10cSrcweir for (i = 0; i < MAXSTATE; i++) 289*cdf0e10cSrcweir { 290*cdf0e10cSrcweir for (j = 0; j < 0xFF; j++) 291*cdf0e10cSrcweir if (j == '?' || j == '\\' || j == '\n' || j == '\r') 292*cdf0e10cSrcweir { 293*cdf0e10cSrcweir if (bigfsm[j][i] > 0) 294*cdf0e10cSrcweir bigfsm[j][i] = ~bigfsm[j][i]; 295*cdf0e10cSrcweir bigfsm[j][i] &= ~QBSBIT; 296*cdf0e10cSrcweir } 297*cdf0e10cSrcweir bigfsm[EOB][i] = ~S_EOB; 298*cdf0e10cSrcweir if (bigfsm[EOFC][i] >= 0) 299*cdf0e10cSrcweir bigfsm[EOFC][i] = ~S_EOF; 300*cdf0e10cSrcweir } 301*cdf0e10cSrcweir } 302*cdf0e10cSrcweir 303*cdf0e10cSrcweir void 304*cdf0e10cSrcweir fixlex(void) 305*cdf0e10cSrcweir { 306*cdf0e10cSrcweir /* do C++ comments? */ 307*cdf0e10cSrcweir if ((Cplusplus == 0) || (Cflag != 0)) 308*cdf0e10cSrcweir bigfsm['/'][COM1] = bigfsm['x'][COM1]; 309*cdf0e10cSrcweir } 310*cdf0e10cSrcweir 311*cdf0e10cSrcweir /* 312*cdf0e10cSrcweir * fill in a row of tokens from input, terminated by NL or END 313*cdf0e10cSrcweir * First token is put at trp->lp. 314*cdf0e10cSrcweir * Reset is non-zero when the input buffer can be "rewound." 315*cdf0e10cSrcweir * The value is a flag indicating that possible macros have 316*cdf0e10cSrcweir * been seen in the row. 317*cdf0e10cSrcweir */ 318*cdf0e10cSrcweir int 319*cdf0e10cSrcweir gettokens(Tokenrow * trp, int reset) 320*cdf0e10cSrcweir { 321*cdf0e10cSrcweir register int c, state, oldstate; 322*cdf0e10cSrcweir register uchar *ip; 323*cdf0e10cSrcweir register Token *tp, *maxp; 324*cdf0e10cSrcweir int runelen; 325*cdf0e10cSrcweir Source *s = cursource; 326*cdf0e10cSrcweir int nmac = 0; 327*cdf0e10cSrcweir 328*cdf0e10cSrcweir tp = trp->lp; 329*cdf0e10cSrcweir ip = s->inp; 330*cdf0e10cSrcweir if (reset) 331*cdf0e10cSrcweir { 332*cdf0e10cSrcweir s->lineinc = 0; 333*cdf0e10cSrcweir if (ip >= s->inl) 334*cdf0e10cSrcweir { /* nothing in buffer */ 335*cdf0e10cSrcweir s->inl = s->inb; 336*cdf0e10cSrcweir fillbuf(s); 337*cdf0e10cSrcweir ip = s->inp = s->inb; 338*cdf0e10cSrcweir } 339*cdf0e10cSrcweir else 340*cdf0e10cSrcweir if (ip >= s->inb + (3 * INS / 4)) 341*cdf0e10cSrcweir { 342*cdf0e10cSrcweir memmove(s->inb, ip, 4 + s->inl - ip); 343*cdf0e10cSrcweir s->inl = s->inb + (s->inl - ip); 344*cdf0e10cSrcweir ip = s->inp = s->inb; 345*cdf0e10cSrcweir } 346*cdf0e10cSrcweir } 347*cdf0e10cSrcweir maxp = &trp->bp[trp->max]; 348*cdf0e10cSrcweir runelen = 1; 349*cdf0e10cSrcweir for (;;) 350*cdf0e10cSrcweir { 351*cdf0e10cSrcweir continue2: 352*cdf0e10cSrcweir if (tp >= maxp) 353*cdf0e10cSrcweir { 354*cdf0e10cSrcweir trp->lp = tp; 355*cdf0e10cSrcweir tp = growtokenrow(trp); 356*cdf0e10cSrcweir maxp = &trp->bp[trp->max]; 357*cdf0e10cSrcweir } 358*cdf0e10cSrcweir tp->type = UNCLASS; 359*cdf0e10cSrcweir tp->t = ip; 360*cdf0e10cSrcweir tp->wslen = 0; 361*cdf0e10cSrcweir tp->flag = 0; 362*cdf0e10cSrcweir state = START; 363*cdf0e10cSrcweir for (;;) 364*cdf0e10cSrcweir { 365*cdf0e10cSrcweir oldstate = state; 366*cdf0e10cSrcweir 367*cdf0e10cSrcweir c = *ip; 368*cdf0e10cSrcweir 369*cdf0e10cSrcweir if ((state = bigfsm[c][state]) >= 0) 370*cdf0e10cSrcweir { 371*cdf0e10cSrcweir ip += runelen; 372*cdf0e10cSrcweir runelen = 1; 373*cdf0e10cSrcweir continue; 374*cdf0e10cSrcweir } 375*cdf0e10cSrcweir state = ~state; 376*cdf0e10cSrcweir reswitch: 377*cdf0e10cSrcweir switch (state & 0177) 378*cdf0e10cSrcweir { 379*cdf0e10cSrcweir case S_SELF: 380*cdf0e10cSrcweir ip += runelen; 381*cdf0e10cSrcweir runelen = 1; 382*cdf0e10cSrcweir case S_SELFB: 383*cdf0e10cSrcweir tp->type = (unsigned char) GETACT(state); 384*cdf0e10cSrcweir tp->len = ip - tp->t; 385*cdf0e10cSrcweir tp++; 386*cdf0e10cSrcweir goto continue2; 387*cdf0e10cSrcweir 388*cdf0e10cSrcweir case S_NAME: /* like S_SELFB but with nmac check */ 389*cdf0e10cSrcweir tp->type = NAME; 390*cdf0e10cSrcweir tp->len = ip - tp->t; 391*cdf0e10cSrcweir nmac |= quicklook(tp->t[0], tp->len > 1 ? tp->t[1] : 0); 392*cdf0e10cSrcweir tp++; 393*cdf0e10cSrcweir goto continue2; 394*cdf0e10cSrcweir 395*cdf0e10cSrcweir case S_WS: 396*cdf0e10cSrcweir tp->wslen = ip - tp->t; 397*cdf0e10cSrcweir tp->t = ip; 398*cdf0e10cSrcweir state = START; 399*cdf0e10cSrcweir continue; 400*cdf0e10cSrcweir 401*cdf0e10cSrcweir default: 402*cdf0e10cSrcweir if ((state & QBSBIT) == 0) 403*cdf0e10cSrcweir { 404*cdf0e10cSrcweir ip += runelen; 405*cdf0e10cSrcweir runelen = 1; 406*cdf0e10cSrcweir continue; 407*cdf0e10cSrcweir } 408*cdf0e10cSrcweir state &= ~QBSBIT; 409*cdf0e10cSrcweir s->inp = ip; 410*cdf0e10cSrcweir 411*cdf0e10cSrcweir if (c == '\n') 412*cdf0e10cSrcweir { 413*cdf0e10cSrcweir while (s->inp + 1 >= s->inl && fillbuf(s) != EOF); 414*cdf0e10cSrcweir 415*cdf0e10cSrcweir if (s->inp[1] == '\r') 416*cdf0e10cSrcweir { 417*cdf0e10cSrcweir memmove(s->inp + 1, s->inp + 2, s->inl - s->inp + 2); 418*cdf0e10cSrcweir s->inl -= 1; 419*cdf0e10cSrcweir } 420*cdf0e10cSrcweir 421*cdf0e10cSrcweir goto reswitch; 422*cdf0e10cSrcweir } 423*cdf0e10cSrcweir 424*cdf0e10cSrcweir if (c == '\r') 425*cdf0e10cSrcweir { 426*cdf0e10cSrcweir while (s->inp + 1 >= s->inl && fillbuf(s) != EOF); 427*cdf0e10cSrcweir 428*cdf0e10cSrcweir if (s->inp[1] == '\n') 429*cdf0e10cSrcweir { 430*cdf0e10cSrcweir memmove(s->inp, s->inp + 1, s->inl - s->inp + 1); 431*cdf0e10cSrcweir s->inl -= 1; 432*cdf0e10cSrcweir } 433*cdf0e10cSrcweir else 434*cdf0e10cSrcweir *s->inp = '\n'; 435*cdf0e10cSrcweir 436*cdf0e10cSrcweir state = oldstate; 437*cdf0e10cSrcweir continue; 438*cdf0e10cSrcweir } 439*cdf0e10cSrcweir 440*cdf0e10cSrcweir if (c == '?') 441*cdf0e10cSrcweir { /* check trigraph */ 442*cdf0e10cSrcweir if (trigraph(s)) 443*cdf0e10cSrcweir { 444*cdf0e10cSrcweir state = oldstate; 445*cdf0e10cSrcweir continue; 446*cdf0e10cSrcweir } 447*cdf0e10cSrcweir goto reswitch; 448*cdf0e10cSrcweir } 449*cdf0e10cSrcweir if (c == '\\') 450*cdf0e10cSrcweir { /* line-folding */ 451*cdf0e10cSrcweir if (foldline(s)) 452*cdf0e10cSrcweir { 453*cdf0e10cSrcweir s->lineinc++; 454*cdf0e10cSrcweir state = oldstate; 455*cdf0e10cSrcweir continue; 456*cdf0e10cSrcweir } 457*cdf0e10cSrcweir goto reswitch; 458*cdf0e10cSrcweir } 459*cdf0e10cSrcweir error(WARNING, "Lexical botch in cpp"); 460*cdf0e10cSrcweir ip += runelen; 461*cdf0e10cSrcweir runelen = 1; 462*cdf0e10cSrcweir continue; 463*cdf0e10cSrcweir 464*cdf0e10cSrcweir case S_EOB: 465*cdf0e10cSrcweir s->inp = ip; 466*cdf0e10cSrcweir fillbuf(cursource); 467*cdf0e10cSrcweir state = oldstate; 468*cdf0e10cSrcweir continue; 469*cdf0e10cSrcweir 470*cdf0e10cSrcweir case S_EOF: 471*cdf0e10cSrcweir tp->type = END; 472*cdf0e10cSrcweir tp->len = 0; 473*cdf0e10cSrcweir s->inp = ip; 474*cdf0e10cSrcweir if (tp != trp->bp && (tp - 1)->type != NL && cursource->fd != -1) 475*cdf0e10cSrcweir error(WARNING, "No newline at end of file"); 476*cdf0e10cSrcweir trp->lp = tp + 1; 477*cdf0e10cSrcweir return nmac; 478*cdf0e10cSrcweir 479*cdf0e10cSrcweir case S_STNL: 480*cdf0e10cSrcweir error(ERROR, "Unterminated string or char const"); 481*cdf0e10cSrcweir case S_NL: 482*cdf0e10cSrcweir tp->t = ip; 483*cdf0e10cSrcweir tp->type = NL; 484*cdf0e10cSrcweir tp->len = 1; 485*cdf0e10cSrcweir tp->wslen = 0; 486*cdf0e10cSrcweir s->lineinc++; 487*cdf0e10cSrcweir s->inp = ip + 1; 488*cdf0e10cSrcweir trp->lp = tp + 1; 489*cdf0e10cSrcweir return nmac; 490*cdf0e10cSrcweir 491*cdf0e10cSrcweir case S_EOFSTR: 492*cdf0e10cSrcweir error(FATAL, "EOF in string or char constant"); 493*cdf0e10cSrcweir break; 494*cdf0e10cSrcweir 495*cdf0e10cSrcweir case S_COMNL: 496*cdf0e10cSrcweir s->lineinc++; 497*cdf0e10cSrcweir state = COM2; 498*cdf0e10cSrcweir ip += runelen; 499*cdf0e10cSrcweir runelen = 1; 500*cdf0e10cSrcweir continue; 501*cdf0e10cSrcweir 502*cdf0e10cSrcweir case S_EOFCOM: 503*cdf0e10cSrcweir error(WARNING, "EOF inside comment"); 504*cdf0e10cSrcweir --ip; 505*cdf0e10cSrcweir case S_COMMENT: 506*cdf0e10cSrcweir if (!Cflag) 507*cdf0e10cSrcweir { 508*cdf0e10cSrcweir tp->t = ++ip; 509*cdf0e10cSrcweir tp->t[-1] = ' '; 510*cdf0e10cSrcweir tp->wslen = 1; 511*cdf0e10cSrcweir state = START; 512*cdf0e10cSrcweir continue; 513*cdf0e10cSrcweir } 514*cdf0e10cSrcweir else 515*cdf0e10cSrcweir { 516*cdf0e10cSrcweir runelen = 1; 517*cdf0e10cSrcweir s->lineinc = 0;; 518*cdf0e10cSrcweir tp->type = COMMENT; 519*cdf0e10cSrcweir tp->flag |= XTWS; 520*cdf0e10cSrcweir } 521*cdf0e10cSrcweir } 522*cdf0e10cSrcweir break; 523*cdf0e10cSrcweir } 524*cdf0e10cSrcweir ip += runelen; 525*cdf0e10cSrcweir runelen = 1; 526*cdf0e10cSrcweir tp->len = ip - tp->t; 527*cdf0e10cSrcweir tp++; 528*cdf0e10cSrcweir } 529*cdf0e10cSrcweir } 530*cdf0e10cSrcweir 531*cdf0e10cSrcweir /* have seen ?; handle the trigraph it starts (if any) else 0 */ 532*cdf0e10cSrcweir int 533*cdf0e10cSrcweir trigraph(Source * s) 534*cdf0e10cSrcweir { 535*cdf0e10cSrcweir uchar c; 536*cdf0e10cSrcweir 537*cdf0e10cSrcweir while (s->inp + 2 >= s->inl && fillbuf(s) != EOF); 538*cdf0e10cSrcweir ; 539*cdf0e10cSrcweir if (s->inp[1] != '?') 540*cdf0e10cSrcweir return 0; 541*cdf0e10cSrcweir c = 0; 542*cdf0e10cSrcweir switch (s->inp[2]) 543*cdf0e10cSrcweir { 544*cdf0e10cSrcweir case '=': 545*cdf0e10cSrcweir c = '#'; 546*cdf0e10cSrcweir break; 547*cdf0e10cSrcweir case '(': 548*cdf0e10cSrcweir c = '['; 549*cdf0e10cSrcweir break; 550*cdf0e10cSrcweir case '/': 551*cdf0e10cSrcweir c = '\\'; 552*cdf0e10cSrcweir break; 553*cdf0e10cSrcweir case ')': 554*cdf0e10cSrcweir c = ']'; 555*cdf0e10cSrcweir break; 556*cdf0e10cSrcweir case '\'': 557*cdf0e10cSrcweir c = '^'; 558*cdf0e10cSrcweir break; 559*cdf0e10cSrcweir case '<': 560*cdf0e10cSrcweir c = '{'; 561*cdf0e10cSrcweir break; 562*cdf0e10cSrcweir case '!': 563*cdf0e10cSrcweir c = '|'; 564*cdf0e10cSrcweir break; 565*cdf0e10cSrcweir case '>': 566*cdf0e10cSrcweir c = '}'; 567*cdf0e10cSrcweir break; 568*cdf0e10cSrcweir case '-': 569*cdf0e10cSrcweir c = '~'; 570*cdf0e10cSrcweir break; 571*cdf0e10cSrcweir } 572*cdf0e10cSrcweir if (c) 573*cdf0e10cSrcweir { 574*cdf0e10cSrcweir *s->inp = c; 575*cdf0e10cSrcweir memmove(s->inp + 1, s->inp + 3, s->inl - s->inp + 2); 576*cdf0e10cSrcweir s->inl -= 2; 577*cdf0e10cSrcweir } 578*cdf0e10cSrcweir return c; 579*cdf0e10cSrcweir } 580*cdf0e10cSrcweir 581*cdf0e10cSrcweir int 582*cdf0e10cSrcweir foldline(Source * s) 583*cdf0e10cSrcweir { 584*cdf0e10cSrcweir int n = 1; 585*cdf0e10cSrcweir 586*cdf0e10cSrcweir /* skip pending wihite spaces */ 587*cdf0e10cSrcweir while ((s->inp[n] == ' ') || (s->inp[n] == '\t')) 588*cdf0e10cSrcweir { 589*cdf0e10cSrcweir n++; 590*cdf0e10cSrcweir if ((s->inp + n >= s->inl) && (fillbuf(s) == EOF)) 591*cdf0e10cSrcweir break; 592*cdf0e10cSrcweir } 593*cdf0e10cSrcweir 594*cdf0e10cSrcweir /* refill buffer */ 595*cdf0e10cSrcweir while (s->inp + (n + 1) >= s->inl && fillbuf(s) != EOF); 596*cdf0e10cSrcweir 597*cdf0e10cSrcweir /* skip DOS line ends */ 598*cdf0e10cSrcweir if (((s->inp[n] == '\r') && (s->inp[n+1] == '\n')) || 599*cdf0e10cSrcweir ((s->inp[n] == '\n') && (s->inp[n+1] == '\r'))) 600*cdf0e10cSrcweir n++; 601*cdf0e10cSrcweir 602*cdf0e10cSrcweir if ((s->inp[n] == '\n') || (s->inp[n] == '\r')) 603*cdf0e10cSrcweir { 604*cdf0e10cSrcweir memmove(s->inp, s->inp + n + 1, s->inl - s->inp + n + 2); 605*cdf0e10cSrcweir s->inl -= n + 1; 606*cdf0e10cSrcweir return 1; 607*cdf0e10cSrcweir } 608*cdf0e10cSrcweir return 0; 609*cdf0e10cSrcweir } 610*cdf0e10cSrcweir 611*cdf0e10cSrcweir int 612*cdf0e10cSrcweir fillbuf(Source * s) 613*cdf0e10cSrcweir { 614*cdf0e10cSrcweir int n; 615*cdf0e10cSrcweir 616*cdf0e10cSrcweir if (s->fd < 0 || (n = read(s->fd, (char *) s->inl, INS / 8)) <= 0) 617*cdf0e10cSrcweir n = 0; 618*cdf0e10cSrcweir s->inl += n; 619*cdf0e10cSrcweir s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOB; 620*cdf0e10cSrcweir if (n == 0) 621*cdf0e10cSrcweir { 622*cdf0e10cSrcweir s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC; 623*cdf0e10cSrcweir return EOF; 624*cdf0e10cSrcweir } 625*cdf0e10cSrcweir return 0; 626*cdf0e10cSrcweir } 627*cdf0e10cSrcweir 628*cdf0e10cSrcweir /* 629*cdf0e10cSrcweir * Push down to new source of characters. 630*cdf0e10cSrcweir * If fd>0 and str==NULL, then from a file `name'; 631*cdf0e10cSrcweir * if fd==-1 and str, then from the string. 632*cdf0e10cSrcweir */ 633*cdf0e10cSrcweir Source * 634*cdf0e10cSrcweir setsource(char *name, int path, int fd, char *str, int wrap) 635*cdf0e10cSrcweir { 636*cdf0e10cSrcweir Source *s = new(Source); 637*cdf0e10cSrcweir int len; 638*cdf0e10cSrcweir 639*cdf0e10cSrcweir s->line = 1; 640*cdf0e10cSrcweir s->lineinc = 0; 641*cdf0e10cSrcweir s->fd = fd; 642*cdf0e10cSrcweir s->filename = name; 643*cdf0e10cSrcweir s->next = cursource; 644*cdf0e10cSrcweir s->ifdepth = 0; 645*cdf0e10cSrcweir s->pathdepth = path; 646*cdf0e10cSrcweir s->wrap = wrap; 647*cdf0e10cSrcweir 648*cdf0e10cSrcweir cursource = s; 649*cdf0e10cSrcweir 650*cdf0e10cSrcweir if (s->wrap) 651*cdf0e10cSrcweir genwrap(0); 652*cdf0e10cSrcweir 653*cdf0e10cSrcweir /* slop at right for EOB */ 654*cdf0e10cSrcweir if (str) 655*cdf0e10cSrcweir { 656*cdf0e10cSrcweir len = strlen(str); 657*cdf0e10cSrcweir s->inb = domalloc(len + 4); 658*cdf0e10cSrcweir s->inp = s->inb; 659*cdf0e10cSrcweir strncpy((char *) s->inp, str, len); 660*cdf0e10cSrcweir } 661*cdf0e10cSrcweir else 662*cdf0e10cSrcweir { 663*cdf0e10cSrcweir s->inb = domalloc(INS + 4); 664*cdf0e10cSrcweir s->inp = s->inb; 665*cdf0e10cSrcweir len = 0; 666*cdf0e10cSrcweir } 667*cdf0e10cSrcweir s->inl = s->inp + len; 668*cdf0e10cSrcweir s->inl[0] = s->inl[1] = EOB; 669*cdf0e10cSrcweir 670*cdf0e10cSrcweir return s; 671*cdf0e10cSrcweir } 672*cdf0e10cSrcweir 673*cdf0e10cSrcweir void 674*cdf0e10cSrcweir unsetsource(void) 675*cdf0e10cSrcweir { 676*cdf0e10cSrcweir Source *s = cursource; 677*cdf0e10cSrcweir 678*cdf0e10cSrcweir if (s->wrap) 679*cdf0e10cSrcweir genwrap(1); 680*cdf0e10cSrcweir 681*cdf0e10cSrcweir if (s->fd >= 0) 682*cdf0e10cSrcweir { 683*cdf0e10cSrcweir close(s->fd); 684*cdf0e10cSrcweir dofree(s->inb); 685*cdf0e10cSrcweir } 686*cdf0e10cSrcweir cursource = s->next; 687*cdf0e10cSrcweir dofree(s); 688*cdf0e10cSrcweir } 689