1*7ce20373SAndrew Rist /************************************************************** 2*7ce20373SAndrew Rist * 3*7ce20373SAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*7ce20373SAndrew Rist * or more contributor license agreements. See the NOTICE file 5*7ce20373SAndrew Rist * distributed with this work for additional information 6*7ce20373SAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*7ce20373SAndrew Rist * to you under the Apache License, Version 2.0 (the 8*7ce20373SAndrew Rist * "License"); you may not use this file except in compliance 9*7ce20373SAndrew Rist * with the License. You may obtain a copy of the License at 10*7ce20373SAndrew Rist * 11*7ce20373SAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*7ce20373SAndrew Rist * 13*7ce20373SAndrew Rist * Unless required by applicable law or agreed to in writing, 14*7ce20373SAndrew Rist * software distributed under the License is distributed on an 15*7ce20373SAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*7ce20373SAndrew Rist * KIND, either express or implied. See the License for the 17*7ce20373SAndrew Rist * specific language governing permissions and limitations 18*7ce20373SAndrew Rist * under the License. 19*7ce20373SAndrew Rist * 20*7ce20373SAndrew Rist *************************************************************/ 21*7ce20373SAndrew Rist 22cdf0e10cSrcweir #include <stdio.h> 23cdf0e10cSrcweir #include <stdlib.h> 24cdf0e10cSrcweir #include <string.h> 25cdf0e10cSrcweir #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__)) 26cdf0e10cSrcweir #include <io.h> 27cdf0e10cSrcweir #else 28cdf0e10cSrcweir #include <unistd.h> 29cdf0e10cSrcweir #endif 30cdf0e10cSrcweir #include "cpp.h" 31cdf0e10cSrcweir /* 32cdf0e10cSrcweir * lexical FSM encoding 33cdf0e10cSrcweir * when in state state, and one of the characters 34cdf0e10cSrcweir * in ch arrives, enter nextstate. 35cdf0e10cSrcweir * States >= S_SELF are either final, or at least require special action. 36cdf0e10cSrcweir * In 'fsm' there is a line for each state X charset X nextstate. 37cdf0e10cSrcweir * List chars that overwrite previous entries later (e.g. C_ALPH 38cdf0e10cSrcweir * can be overridden by '_' by a later entry; and C_XX is the 39cdf0e10cSrcweir * the universal set, and should always be first. 40cdf0e10cSrcweir * States above S_SELF are represented in the big table as negative values. 41cdf0e10cSrcweir * S_SELF and S_SELFB encode the resulting token type in the upper bits. 42cdf0e10cSrcweir * These actions differ in that S_SELF doesn't have a lookahead char, 43cdf0e10cSrcweir * S_SELFB does. 44cdf0e10cSrcweir * 45cdf0e10cSrcweir * The encoding is blown out into a big table for time-efficiency. 46cdf0e10cSrcweir * Entries have 47cdf0e10cSrcweir * nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits. 48cdf0e10cSrcweir */ 49cdf0e10cSrcweir 50cdf0e10cSrcweir #define MAXSTATE 32 51cdf0e10cSrcweir #define ACT(tok,act) ((tok<<7)+act) 52cdf0e10cSrcweir #define QBSBIT 0100 53cdf0e10cSrcweir #define GETACT(st) ((st>>7)&0x1ff) 54cdf0e10cSrcweir 55cdf0e10cSrcweir /* character classes */ 56cdf0e10cSrcweir #define C_WS 1 57cdf0e10cSrcweir #define C_ALPH 2 58cdf0e10cSrcweir #define C_NUM 3 59cdf0e10cSrcweir #define C_EOF 4 60cdf0e10cSrcweir #define C_XX 5 61cdf0e10cSrcweir 62cdf0e10cSrcweir enum state 63cdf0e10cSrcweir { 64cdf0e10cSrcweir START = 0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4, 65cdf0e10cSrcweir CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, 66cdf0e10cSrcweir CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, 67cdf0e10cSrcweir S_SELF = MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, 68cdf0e10cSrcweir S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME 69cdf0e10cSrcweir }; 70cdf0e10cSrcweir 71cdf0e10cSrcweir int tottok; 72cdf0e10cSrcweir int tokkind[256]; 73cdf0e10cSrcweir struct fsm 74cdf0e10cSrcweir { 75cdf0e10cSrcweir int state; /* if in this state */ 76cdf0e10cSrcweir uchar ch[4]; /* and see one of these characters */ 77cdf0e10cSrcweir int nextstate; /* enter this state if +ve */ 78cdf0e10cSrcweir }; 79cdf0e10cSrcweir 80cdf0e10cSrcweir /*const*/ struct fsm fsm[] = { 81cdf0e10cSrcweir /* start state */ 82cdf0e10cSrcweir {START, {C_XX}, ACT(UNCLASS, S_SELF)}, 83cdf0e10cSrcweir {START, {' ', '\t', '\v'}, WS1}, 84cdf0e10cSrcweir {START, {C_NUM}, NUM1}, 85cdf0e10cSrcweir {START, {'.'}, NUM3}, 86cdf0e10cSrcweir {START, {C_ALPH}, ID1}, 87cdf0e10cSrcweir {START, {'L'}, ST1}, 88cdf0e10cSrcweir {START, {'"'}, ST2}, 89cdf0e10cSrcweir {START, {'\''}, CC1}, 90cdf0e10cSrcweir {START, {'/'}, COM1}, 91cdf0e10cSrcweir {START, {EOFC}, S_EOF}, 92cdf0e10cSrcweir {START, {'\n'}, S_NL}, 93cdf0e10cSrcweir {START, {'-'}, MINUS1}, 94cdf0e10cSrcweir {START, {'+'}, PLUS1}, 95cdf0e10cSrcweir {START, {'<'}, LT1}, 96cdf0e10cSrcweir {START, {'>'}, GT1}, 97cdf0e10cSrcweir {START, {'='}, ASG1}, 98cdf0e10cSrcweir {START, {'!'}, NOT1}, 99cdf0e10cSrcweir {START, {'&'}, AND1}, 100cdf0e10cSrcweir {START, {'|'}, OR1}, 101cdf0e10cSrcweir {START, {'#'}, SHARP1}, 102cdf0e10cSrcweir {START, {'%'}, PCT1}, 103cdf0e10cSrcweir {START, {'['}, ACT(SBRA, S_SELF)}, 104cdf0e10cSrcweir {START, {']'}, ACT(SKET, S_SELF)}, 105cdf0e10cSrcweir {START, {'('}, ACT(LP, S_SELF)}, 106cdf0e10cSrcweir {START, {')'}, ACT(RP, S_SELF)}, 107cdf0e10cSrcweir {START, {'*'}, STAR1}, 108cdf0e10cSrcweir {START, {','}, ACT(COMMA, S_SELF)}, 109cdf0e10cSrcweir {START, {'?'}, ACT(QUEST, S_SELF)}, 110cdf0e10cSrcweir {START, {':'}, ACT(COLON, S_SELF)}, 111cdf0e10cSrcweir {START, {';'}, ACT(SEMIC, S_SELF)}, 112cdf0e10cSrcweir {START, {'{'}, ACT(CBRA, S_SELF)}, 113cdf0e10cSrcweir {START, {'}'}, ACT(CKET, S_SELF)}, 114cdf0e10cSrcweir {START, {'~'}, ACT(TILDE, S_SELF)}, 115cdf0e10cSrcweir {START, {'^'}, CIRC1}, 116cdf0e10cSrcweir 117cdf0e10cSrcweir /* saw a digit */ 118cdf0e10cSrcweir {NUM1, {C_XX}, ACT(NUMBER, S_SELFB)}, 119cdf0e10cSrcweir {NUM1, {C_NUM, C_ALPH, '.'}, NUM1}, 120cdf0e10cSrcweir {NUM1, {'E', 'e'}, NUM2}, 121cdf0e10cSrcweir {NUM1, {'_'}, ACT(NUMBER, S_SELFB)}, 122cdf0e10cSrcweir 123cdf0e10cSrcweir /* saw possible start of exponent, digits-e */ 124cdf0e10cSrcweir {NUM2, {C_XX}, ACT(NUMBER, S_SELFB)}, 125cdf0e10cSrcweir {NUM2, {'+', '-'}, NUM1}, 126cdf0e10cSrcweir {NUM2, {C_NUM, C_ALPH}, NUM1}, 127cdf0e10cSrcweir {NUM2, {'_'}, ACT(NUMBER, S_SELFB)}, 128cdf0e10cSrcweir 129cdf0e10cSrcweir /* saw a '.', which could be a number or an operator */ 130cdf0e10cSrcweir {NUM3, {C_XX}, ACT(DOT, S_SELFB)}, 131cdf0e10cSrcweir {NUM3, {'.'}, DOTS1}, 132cdf0e10cSrcweir {NUM3, {C_NUM}, NUM1}, 133cdf0e10cSrcweir 134cdf0e10cSrcweir {DOTS1, {C_XX}, ACT(UNCLASS, S_SELFB)}, 135cdf0e10cSrcweir {DOTS1, {C_NUM}, NUM1}, 136cdf0e10cSrcweir {DOTS1, {'.'}, ACT(ELLIPS, S_SELF)}, 137cdf0e10cSrcweir 138cdf0e10cSrcweir /* saw a letter or _ */ 139cdf0e10cSrcweir {ID1, {C_XX}, ACT(NAME, S_NAME)}, 140cdf0e10cSrcweir {ID1, {C_ALPH, C_NUM}, ID1}, 141cdf0e10cSrcweir 142cdf0e10cSrcweir /* saw L (start of wide string?) */ 143cdf0e10cSrcweir {ST1, {C_XX}, ACT(NAME, S_NAME)}, 144cdf0e10cSrcweir {ST1, {C_ALPH, C_NUM}, ID1}, 145cdf0e10cSrcweir {ST1, {'"'}, ST2}, 146cdf0e10cSrcweir {ST1, {'\''}, CC1}, 147cdf0e10cSrcweir 148cdf0e10cSrcweir /* saw " beginning string */ 149cdf0e10cSrcweir {ST2, {C_XX}, ST2}, 150cdf0e10cSrcweir {ST2, {'"'}, ACT(STRING, S_SELF)}, 151cdf0e10cSrcweir {ST2, {'\\'}, ST3}, 152cdf0e10cSrcweir {ST2, {'\n'}, S_STNL}, 153cdf0e10cSrcweir {ST2, {EOFC}, S_EOFSTR}, 154cdf0e10cSrcweir 155cdf0e10cSrcweir /* saw \ in string */ 156cdf0e10cSrcweir {ST3, {C_XX}, ST2}, 157cdf0e10cSrcweir {ST3, {'\n'}, S_STNL}, 158cdf0e10cSrcweir {ST3, {EOFC}, S_EOFSTR}, 159cdf0e10cSrcweir 160cdf0e10cSrcweir /* saw ' beginning character const */ 161cdf0e10cSrcweir {CC1, {C_XX}, CC1}, 162cdf0e10cSrcweir {CC1, {'\''}, ACT(CCON, S_SELF)}, 163cdf0e10cSrcweir {CC1, {'\\'}, CC2}, 164cdf0e10cSrcweir {CC1, {'\n'}, S_STNL}, 165cdf0e10cSrcweir {CC1, {EOFC}, S_EOFSTR}, 166cdf0e10cSrcweir 167cdf0e10cSrcweir /* saw \ in ccon */ 168cdf0e10cSrcweir {CC2, {C_XX}, CC1}, 169cdf0e10cSrcweir {CC2, {'\n'}, S_STNL}, 170cdf0e10cSrcweir {CC2, {EOFC}, S_EOFSTR}, 171cdf0e10cSrcweir 172cdf0e10cSrcweir /* saw /, perhaps start of comment */ 173cdf0e10cSrcweir {COM1, {C_XX}, ACT(SLASH, S_SELFB)}, 174cdf0e10cSrcweir {COM1, {'='}, ACT(ASSLASH, S_SELF)}, 175cdf0e10cSrcweir {COM1, {'*'}, COM2}, 176cdf0e10cSrcweir {COM1, {'/'}, COM4}, 177cdf0e10cSrcweir 178cdf0e10cSrcweir /* saw / followed by *, start of comment */ 179cdf0e10cSrcweir {COM2, {C_XX}, COM2}, 180cdf0e10cSrcweir {COM2, {'\n'}, S_COMNL}, 181cdf0e10cSrcweir {COM2, {'*'}, COM3}, 182cdf0e10cSrcweir {COM2, {EOFC}, S_EOFCOM}, 183cdf0e10cSrcweir 184cdf0e10cSrcweir /* saw the * possibly ending a comment */ 185cdf0e10cSrcweir {COM3, {C_XX}, COM2}, 186cdf0e10cSrcweir {COM3, {'\n'}, S_COMNL}, 187cdf0e10cSrcweir {COM3, {'*'}, COM3}, 188cdf0e10cSrcweir {COM3, {'/'}, S_COMMENT}, 189cdf0e10cSrcweir 190cdf0e10cSrcweir /* // comment */ 191cdf0e10cSrcweir {COM4, {C_XX}, COM4}, 192cdf0e10cSrcweir {COM4, {'\n'}, S_NL}, 193cdf0e10cSrcweir {COM4, {EOFC}, S_EOFCOM}, 194cdf0e10cSrcweir 195cdf0e10cSrcweir /* saw white space, eat it up */ 196cdf0e10cSrcweir {WS1, {C_XX}, S_WS}, 197cdf0e10cSrcweir {WS1, {'\t', '\v', ' '}, WS1}, 198cdf0e10cSrcweir 199cdf0e10cSrcweir /* saw -, check --, -=, -> */ 200cdf0e10cSrcweir {MINUS1, {C_XX}, ACT(MINUS, S_SELFB)}, 201cdf0e10cSrcweir {MINUS1, {'-'}, ACT(MMINUS, S_SELF)}, 202cdf0e10cSrcweir {MINUS1, {'='}, ACT(ASMINUS, S_SELF)}, 203cdf0e10cSrcweir {MINUS1, {'>'}, ACT(ARROW, S_SELF)}, 204cdf0e10cSrcweir 205cdf0e10cSrcweir /* saw +, check ++, += */ 206cdf0e10cSrcweir {PLUS1, {C_XX}, ACT(PLUS, S_SELFB)}, 207cdf0e10cSrcweir {PLUS1, {'+'}, ACT(PPLUS, S_SELF)}, 208cdf0e10cSrcweir {PLUS1, {'='}, ACT(ASPLUS, S_SELF)}, 209cdf0e10cSrcweir 210cdf0e10cSrcweir /* saw <, check <<, <<=, <= */ 211cdf0e10cSrcweir {LT1, {C_XX}, ACT(LT, S_SELFB)}, 212cdf0e10cSrcweir {LT1, {'<'}, LT2}, 213cdf0e10cSrcweir {LT1, {'='}, ACT(LEQ, S_SELF)}, 214cdf0e10cSrcweir {LT2, {C_XX}, ACT(LSH, S_SELFB)}, 215cdf0e10cSrcweir {LT2, {'='}, ACT(ASLSH, S_SELF)}, 216cdf0e10cSrcweir 217cdf0e10cSrcweir /* saw >, check >>, >>=, >= */ 218cdf0e10cSrcweir {GT1, {C_XX}, ACT(GT, S_SELFB)}, 219cdf0e10cSrcweir {GT1, {'>'}, GT2}, 220cdf0e10cSrcweir {GT1, {'='}, ACT(GEQ, S_SELF)}, 221cdf0e10cSrcweir {GT2, {C_XX}, ACT(RSH, S_SELFB)}, 222cdf0e10cSrcweir {GT2, {'='}, ACT(ASRSH, S_SELF)}, 223cdf0e10cSrcweir 224cdf0e10cSrcweir /* = */ 225cdf0e10cSrcweir {ASG1, {C_XX}, ACT(ASGN, S_SELFB)}, 226cdf0e10cSrcweir {ASG1, {'='}, ACT(EQ, S_SELF)}, 227cdf0e10cSrcweir 228cdf0e10cSrcweir /* ! */ 229cdf0e10cSrcweir {NOT1, {C_XX}, ACT(NOT, S_SELFB)}, 230cdf0e10cSrcweir {NOT1, {'='}, ACT(NEQ, S_SELF)}, 231cdf0e10cSrcweir 232cdf0e10cSrcweir /* & */ 233cdf0e10cSrcweir {AND1, {C_XX}, ACT(AND, S_SELFB)}, 234cdf0e10cSrcweir {AND1, {'&'}, ACT(LAND, S_SELF)}, 235cdf0e10cSrcweir {AND1, {'='}, ACT(ASAND, S_SELF)}, 236cdf0e10cSrcweir 237cdf0e10cSrcweir /* | */ 238cdf0e10cSrcweir {OR1, {C_XX}, ACT(OR, S_SELFB)}, 239cdf0e10cSrcweir {OR1, {'|'}, ACT(LOR, S_SELF)}, 240cdf0e10cSrcweir {OR1, {'='}, ACT(ASOR, S_SELF)}, 241cdf0e10cSrcweir 242cdf0e10cSrcweir /* # */ 243cdf0e10cSrcweir {SHARP1, {C_XX}, ACT(SHARP, S_SELFB)}, 244cdf0e10cSrcweir {SHARP1, {'#'}, ACT(DSHARP, S_SELF)}, 245cdf0e10cSrcweir 246cdf0e10cSrcweir /* % */ 247cdf0e10cSrcweir {PCT1, {C_XX}, ACT(PCT, S_SELFB)}, 248cdf0e10cSrcweir {PCT1, {'='}, ACT(ASPCT, S_SELF)}, 249cdf0e10cSrcweir 250cdf0e10cSrcweir /* * */ 251cdf0e10cSrcweir {STAR1, {C_XX}, ACT(STAR, S_SELFB)}, 252cdf0e10cSrcweir {STAR1, {'='}, ACT(ASSTAR, S_SELF)}, 253cdf0e10cSrcweir 254cdf0e10cSrcweir /* ^ */ 255cdf0e10cSrcweir {CIRC1, {C_XX}, ACT(CIRC, S_SELFB)}, 256cdf0e10cSrcweir {CIRC1, {'='}, ACT(ASCIRC, S_SELF)}, 257cdf0e10cSrcweir 258cdf0e10cSrcweir {-1, "", 0} 259cdf0e10cSrcweir }; 260cdf0e10cSrcweir 261cdf0e10cSrcweir /* first index is char, second is state */ 262cdf0e10cSrcweir /* increase #states to power of 2 to encourage use of shift */ 263cdf0e10cSrcweir short bigfsm[256][MAXSTATE]; 264cdf0e10cSrcweir 265cdf0e10cSrcweir void expandlex(void)266cdf0e10cSrcweir expandlex(void) 267cdf0e10cSrcweir { 268cdf0e10cSrcweir /* const */ struct fsm *fp; 269cdf0e10cSrcweir int i, j, nstate; 270cdf0e10cSrcweir 271cdf0e10cSrcweir for (fp = fsm; fp->state >= 0; fp++) 272cdf0e10cSrcweir { 273cdf0e10cSrcweir for (i = 0; fp->ch[i]; i++) 274cdf0e10cSrcweir { 275cdf0e10cSrcweir nstate = fp->nextstate; 276cdf0e10cSrcweir if (nstate >= S_SELF) 277cdf0e10cSrcweir nstate = ~nstate; 278cdf0e10cSrcweir switch (fp->ch[i]) 279cdf0e10cSrcweir { 280cdf0e10cSrcweir 281cdf0e10cSrcweir case C_XX: /* random characters */ 282cdf0e10cSrcweir for (j = 0; j < 256; j++) 283cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 284cdf0e10cSrcweir continue; 285cdf0e10cSrcweir case C_ALPH: 286cdf0e10cSrcweir for (j = 0; j < 256; j++) 287cdf0e10cSrcweir #ifdef S390 288cdf0e10cSrcweir if( isalpha( j ) || (j == '_') ) 289cdf0e10cSrcweir #else 290cdf0e10cSrcweir if (('a' <= j && j <= 'z') || ('A' <= j && j <= 'Z') 291cdf0e10cSrcweir || j == '_') 292cdf0e10cSrcweir #endif 293cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 294cdf0e10cSrcweir continue; 295cdf0e10cSrcweir case C_NUM: 296cdf0e10cSrcweir for (j = '0'; j <= '9'; j++) 297cdf0e10cSrcweir bigfsm[j][fp->state] = (short) nstate; 298cdf0e10cSrcweir continue; 299cdf0e10cSrcweir default: 300cdf0e10cSrcweir bigfsm[fp->ch[i]][fp->state] = (short) nstate; 301cdf0e10cSrcweir } 302cdf0e10cSrcweir } 303cdf0e10cSrcweir } 304cdf0e10cSrcweir 305cdf0e10cSrcweir /* 306cdf0e10cSrcweir * install special cases for ? (trigraphs), \ (splicing), runes, and 307cdf0e10cSrcweir * EOB 308cdf0e10cSrcweir */ 309cdf0e10cSrcweir for (i = 0; i < MAXSTATE; i++) 310cdf0e10cSrcweir { 311cdf0e10cSrcweir for (j = 0; j < 0xFF; j++) 312cdf0e10cSrcweir if (j == '?' || j == '\\' || j == '\n' || j == '\r') 313cdf0e10cSrcweir { 314cdf0e10cSrcweir if (bigfsm[j][i] > 0) 315cdf0e10cSrcweir bigfsm[j][i] = ~bigfsm[j][i]; 316cdf0e10cSrcweir bigfsm[j][i] &= ~QBSBIT; 317cdf0e10cSrcweir } 318cdf0e10cSrcweir bigfsm[EOB][i] = ~S_EOB; 319cdf0e10cSrcweir if (bigfsm[EOFC][i] >= 0) 320cdf0e10cSrcweir bigfsm[EOFC][i] = ~S_EOF; 321cdf0e10cSrcweir } 322cdf0e10cSrcweir } 323cdf0e10cSrcweir 324cdf0e10cSrcweir void fixlex(void)325cdf0e10cSrcweir fixlex(void) 326cdf0e10cSrcweir { 327cdf0e10cSrcweir /* do C++ comments? */ 328cdf0e10cSrcweir if ((Cplusplus == 0) || (Cflag != 0)) 329cdf0e10cSrcweir bigfsm['/'][COM1] = bigfsm['x'][COM1]; 330cdf0e10cSrcweir } 331cdf0e10cSrcweir 332cdf0e10cSrcweir /* 333cdf0e10cSrcweir * fill in a row of tokens from input, terminated by NL or END 334cdf0e10cSrcweir * First token is put at trp->lp. 335cdf0e10cSrcweir * Reset is non-zero when the input buffer can be "rewound." 336cdf0e10cSrcweir * The value is a flag indicating that possible macros have 337cdf0e10cSrcweir * been seen in the row. 338cdf0e10cSrcweir */ 339cdf0e10cSrcweir int gettokens(Tokenrow * trp,int reset)340cdf0e10cSrcweir gettokens(Tokenrow * trp, int reset) 341cdf0e10cSrcweir { 342cdf0e10cSrcweir register int c, state, oldstate; 343cdf0e10cSrcweir register uchar *ip; 344cdf0e10cSrcweir register Token *tp, *maxp; 345cdf0e10cSrcweir int runelen; 346cdf0e10cSrcweir Source *s = cursource; 347cdf0e10cSrcweir int nmac = 0; 348cdf0e10cSrcweir 349cdf0e10cSrcweir tp = trp->lp; 350cdf0e10cSrcweir ip = s->inp; 351cdf0e10cSrcweir if (reset) 352cdf0e10cSrcweir { 353cdf0e10cSrcweir s->lineinc = 0; 354cdf0e10cSrcweir if (ip >= s->inl) 355cdf0e10cSrcweir { /* nothing in buffer */ 356cdf0e10cSrcweir s->inl = s->inb; 357cdf0e10cSrcweir fillbuf(s); 358cdf0e10cSrcweir ip = s->inp = s->inb; 359cdf0e10cSrcweir } 360cdf0e10cSrcweir else 361cdf0e10cSrcweir if (ip >= s->inb + (3 * INS / 4)) 362cdf0e10cSrcweir { 363cdf0e10cSrcweir memmove(s->inb, ip, 4 + s->inl - ip); 364cdf0e10cSrcweir s->inl = s->inb + (s->inl - ip); 365cdf0e10cSrcweir ip = s->inp = s->inb; 366cdf0e10cSrcweir } 367cdf0e10cSrcweir } 368cdf0e10cSrcweir maxp = &trp->bp[trp->max]; 369cdf0e10cSrcweir runelen = 1; 370cdf0e10cSrcweir for (;;) 371cdf0e10cSrcweir { 372cdf0e10cSrcweir continue2: 373cdf0e10cSrcweir if (tp >= maxp) 374cdf0e10cSrcweir { 375cdf0e10cSrcweir trp->lp = tp; 376cdf0e10cSrcweir tp = growtokenrow(trp); 377cdf0e10cSrcweir maxp = &trp->bp[trp->max]; 378cdf0e10cSrcweir } 379cdf0e10cSrcweir tp->type = UNCLASS; 380cdf0e10cSrcweir tp->t = ip; 381cdf0e10cSrcweir tp->wslen = 0; 382cdf0e10cSrcweir tp->flag = 0; 383cdf0e10cSrcweir state = START; 384cdf0e10cSrcweir for (;;) 385cdf0e10cSrcweir { 386cdf0e10cSrcweir oldstate = state; 387cdf0e10cSrcweir 388cdf0e10cSrcweir c = *ip; 389cdf0e10cSrcweir 390cdf0e10cSrcweir if ((state = bigfsm[c][state]) >= 0) 391cdf0e10cSrcweir { 392cdf0e10cSrcweir ip += runelen; 393cdf0e10cSrcweir runelen = 1; 394cdf0e10cSrcweir continue; 395cdf0e10cSrcweir } 396cdf0e10cSrcweir state = ~state; 397cdf0e10cSrcweir reswitch: 398cdf0e10cSrcweir switch (state & 0177) 399cdf0e10cSrcweir { 400cdf0e10cSrcweir case S_SELF: 401cdf0e10cSrcweir ip += runelen; 402cdf0e10cSrcweir runelen = 1; 403cdf0e10cSrcweir case S_SELFB: 404cdf0e10cSrcweir tp->type = (unsigned char) GETACT(state); 405cdf0e10cSrcweir tp->len = ip - tp->t; 406cdf0e10cSrcweir tp++; 407cdf0e10cSrcweir goto continue2; 408cdf0e10cSrcweir 409cdf0e10cSrcweir case S_NAME: /* like S_SELFB but with nmac check */ 410cdf0e10cSrcweir tp->type = NAME; 411cdf0e10cSrcweir tp->len = ip - tp->t; 412cdf0e10cSrcweir nmac |= quicklook(tp->t[0], tp->len > 1 ? tp->t[1] : 0); 413cdf0e10cSrcweir tp++; 414cdf0e10cSrcweir goto continue2; 415cdf0e10cSrcweir 416cdf0e10cSrcweir case S_WS: 417cdf0e10cSrcweir tp->wslen = ip - tp->t; 418cdf0e10cSrcweir tp->t = ip; 419cdf0e10cSrcweir state = START; 420cdf0e10cSrcweir continue; 421cdf0e10cSrcweir 422cdf0e10cSrcweir default: 423cdf0e10cSrcweir if ((state & QBSBIT) == 0) 424cdf0e10cSrcweir { 425cdf0e10cSrcweir ip += runelen; 426cdf0e10cSrcweir runelen = 1; 427cdf0e10cSrcweir continue; 428cdf0e10cSrcweir } 429cdf0e10cSrcweir state &= ~QBSBIT; 430cdf0e10cSrcweir s->inp = ip; 431cdf0e10cSrcweir 432cdf0e10cSrcweir if (c == '\n') 433cdf0e10cSrcweir { 434cdf0e10cSrcweir while (s->inp + 1 >= s->inl && fillbuf(s) != EOF); 435cdf0e10cSrcweir 436cdf0e10cSrcweir if (s->inp[1] == '\r') 437cdf0e10cSrcweir { 438cdf0e10cSrcweir memmove(s->inp + 1, s->inp + 2, s->inl - s->inp + 2); 439cdf0e10cSrcweir s->inl -= 1; 440cdf0e10cSrcweir } 441cdf0e10cSrcweir 442cdf0e10cSrcweir goto reswitch; 443cdf0e10cSrcweir } 444cdf0e10cSrcweir 445cdf0e10cSrcweir if (c == '\r') 446cdf0e10cSrcweir { 447cdf0e10cSrcweir while (s->inp + 1 >= s->inl && fillbuf(s) != EOF); 448cdf0e10cSrcweir 449cdf0e10cSrcweir if (s->inp[1] == '\n') 450cdf0e10cSrcweir { 451cdf0e10cSrcweir memmove(s->inp, s->inp + 1, s->inl - s->inp + 1); 452cdf0e10cSrcweir s->inl -= 1; 453cdf0e10cSrcweir } 454cdf0e10cSrcweir else 455cdf0e10cSrcweir *s->inp = '\n'; 456cdf0e10cSrcweir 457cdf0e10cSrcweir state = oldstate; 458cdf0e10cSrcweir continue; 459cdf0e10cSrcweir } 460cdf0e10cSrcweir 461cdf0e10cSrcweir if (c == '?') 462cdf0e10cSrcweir { /* check trigraph */ 463cdf0e10cSrcweir if (trigraph(s)) 464cdf0e10cSrcweir { 465cdf0e10cSrcweir state = oldstate; 466cdf0e10cSrcweir continue; 467cdf0e10cSrcweir } 468cdf0e10cSrcweir goto reswitch; 469cdf0e10cSrcweir } 470cdf0e10cSrcweir if (c == '\\') 471cdf0e10cSrcweir { /* line-folding */ 472cdf0e10cSrcweir if (foldline(s)) 473cdf0e10cSrcweir { 474cdf0e10cSrcweir s->lineinc++; 475cdf0e10cSrcweir state = oldstate; 476cdf0e10cSrcweir continue; 477cdf0e10cSrcweir } 478cdf0e10cSrcweir goto reswitch; 479cdf0e10cSrcweir } 480cdf0e10cSrcweir error(WARNING, "Lexical botch in cpp"); 481cdf0e10cSrcweir ip += runelen; 482cdf0e10cSrcweir runelen = 1; 483cdf0e10cSrcweir continue; 484cdf0e10cSrcweir 485cdf0e10cSrcweir case S_EOB: 486cdf0e10cSrcweir s->inp = ip; 487cdf0e10cSrcweir fillbuf(cursource); 488cdf0e10cSrcweir state = oldstate; 489cdf0e10cSrcweir continue; 490cdf0e10cSrcweir 491cdf0e10cSrcweir case S_EOF: 492cdf0e10cSrcweir tp->type = END; 493cdf0e10cSrcweir tp->len = 0; 494cdf0e10cSrcweir s->inp = ip; 495cdf0e10cSrcweir if (tp != trp->bp && (tp - 1)->type != NL && cursource->fd != -1) 496cdf0e10cSrcweir error(WARNING, "No newline at end of file"); 497cdf0e10cSrcweir trp->lp = tp + 1; 498cdf0e10cSrcweir return nmac; 499cdf0e10cSrcweir 500cdf0e10cSrcweir case S_STNL: 501cdf0e10cSrcweir error(ERROR, "Unterminated string or char const"); 502cdf0e10cSrcweir case S_NL: 503cdf0e10cSrcweir tp->t = ip; 504cdf0e10cSrcweir tp->type = NL; 505cdf0e10cSrcweir tp->len = 1; 506cdf0e10cSrcweir tp->wslen = 0; 507cdf0e10cSrcweir s->lineinc++; 508cdf0e10cSrcweir s->inp = ip + 1; 509cdf0e10cSrcweir trp->lp = tp + 1; 510cdf0e10cSrcweir return nmac; 511cdf0e10cSrcweir 512cdf0e10cSrcweir case S_EOFSTR: 513cdf0e10cSrcweir error(FATAL, "EOF in string or char constant"); 514cdf0e10cSrcweir break; 515cdf0e10cSrcweir 516cdf0e10cSrcweir case S_COMNL: 517cdf0e10cSrcweir s->lineinc++; 518cdf0e10cSrcweir state = COM2; 519cdf0e10cSrcweir ip += runelen; 520cdf0e10cSrcweir runelen = 1; 521cdf0e10cSrcweir continue; 522cdf0e10cSrcweir 523cdf0e10cSrcweir case S_EOFCOM: 524cdf0e10cSrcweir error(WARNING, "EOF inside comment"); 525cdf0e10cSrcweir --ip; 526cdf0e10cSrcweir case S_COMMENT: 527cdf0e10cSrcweir if (!Cflag) 528cdf0e10cSrcweir { 529cdf0e10cSrcweir tp->t = ++ip; 530cdf0e10cSrcweir tp->t[-1] = ' '; 531cdf0e10cSrcweir tp->wslen = 1; 532cdf0e10cSrcweir state = START; 533cdf0e10cSrcweir continue; 534cdf0e10cSrcweir } 535cdf0e10cSrcweir else 536cdf0e10cSrcweir { 537cdf0e10cSrcweir runelen = 1; 538cdf0e10cSrcweir s->lineinc = 0;; 539cdf0e10cSrcweir tp->type = COMMENT; 540cdf0e10cSrcweir tp->flag |= XTWS; 541cdf0e10cSrcweir } 542cdf0e10cSrcweir } 543cdf0e10cSrcweir break; 544cdf0e10cSrcweir } 545cdf0e10cSrcweir ip += runelen; 546cdf0e10cSrcweir runelen = 1; 547cdf0e10cSrcweir tp->len = ip - tp->t; 548cdf0e10cSrcweir tp++; 549cdf0e10cSrcweir } 550cdf0e10cSrcweir } 551cdf0e10cSrcweir 552cdf0e10cSrcweir /* have seen ?; handle the trigraph it starts (if any) else 0 */ 553cdf0e10cSrcweir int trigraph(Source * s)554cdf0e10cSrcweir trigraph(Source * s) 555cdf0e10cSrcweir { 556cdf0e10cSrcweir uchar c; 557cdf0e10cSrcweir 558cdf0e10cSrcweir while (s->inp + 2 >= s->inl && fillbuf(s) != EOF); 559cdf0e10cSrcweir ; 560cdf0e10cSrcweir if (s->inp[1] != '?') 561cdf0e10cSrcweir return 0; 562cdf0e10cSrcweir c = 0; 563cdf0e10cSrcweir switch (s->inp[2]) 564cdf0e10cSrcweir { 565cdf0e10cSrcweir case '=': 566cdf0e10cSrcweir c = '#'; 567cdf0e10cSrcweir break; 568cdf0e10cSrcweir case '(': 569cdf0e10cSrcweir c = '['; 570cdf0e10cSrcweir break; 571cdf0e10cSrcweir case '/': 572cdf0e10cSrcweir c = '\\'; 573cdf0e10cSrcweir break; 574cdf0e10cSrcweir case ')': 575cdf0e10cSrcweir c = ']'; 576cdf0e10cSrcweir break; 577cdf0e10cSrcweir case '\'': 578cdf0e10cSrcweir c = '^'; 579cdf0e10cSrcweir break; 580cdf0e10cSrcweir case '<': 581cdf0e10cSrcweir c = '{'; 582cdf0e10cSrcweir break; 583cdf0e10cSrcweir case '!': 584cdf0e10cSrcweir c = '|'; 585cdf0e10cSrcweir break; 586cdf0e10cSrcweir case '>': 587cdf0e10cSrcweir c = '}'; 588cdf0e10cSrcweir break; 589cdf0e10cSrcweir case '-': 590cdf0e10cSrcweir c = '~'; 591cdf0e10cSrcweir break; 592cdf0e10cSrcweir } 593cdf0e10cSrcweir if (c) 594cdf0e10cSrcweir { 595cdf0e10cSrcweir *s->inp = c; 596cdf0e10cSrcweir memmove(s->inp + 1, s->inp + 3, s->inl - s->inp + 2); 597cdf0e10cSrcweir s->inl -= 2; 598cdf0e10cSrcweir } 599cdf0e10cSrcweir return c; 600cdf0e10cSrcweir } 601cdf0e10cSrcweir 602cdf0e10cSrcweir int foldline(Source * s)603cdf0e10cSrcweir foldline(Source * s) 604cdf0e10cSrcweir { 605cdf0e10cSrcweir int n = 1; 606cdf0e10cSrcweir 607cdf0e10cSrcweir /* skip pending wihite spaces */ 608cdf0e10cSrcweir while ((s->inp[n] == ' ') || (s->inp[n] == '\t')) 609cdf0e10cSrcweir { 610cdf0e10cSrcweir n++; 611cdf0e10cSrcweir if ((s->inp + n >= s->inl) && (fillbuf(s) == EOF)) 612cdf0e10cSrcweir break; 613cdf0e10cSrcweir } 614cdf0e10cSrcweir 615cdf0e10cSrcweir /* refill buffer */ 616cdf0e10cSrcweir while (s->inp + (n + 1) >= s->inl && fillbuf(s) != EOF); 617cdf0e10cSrcweir 618cdf0e10cSrcweir /* skip DOS line ends */ 619cdf0e10cSrcweir if (((s->inp[n] == '\r') && (s->inp[n+1] == '\n')) || 620cdf0e10cSrcweir ((s->inp[n] == '\n') && (s->inp[n+1] == '\r'))) 621cdf0e10cSrcweir n++; 622cdf0e10cSrcweir 623cdf0e10cSrcweir if ((s->inp[n] == '\n') || (s->inp[n] == '\r')) 624cdf0e10cSrcweir { 625cdf0e10cSrcweir memmove(s->inp, s->inp + n + 1, s->inl - s->inp + n + 2); 626cdf0e10cSrcweir s->inl -= n + 1; 627cdf0e10cSrcweir return 1; 628cdf0e10cSrcweir } 629cdf0e10cSrcweir return 0; 630cdf0e10cSrcweir } 631cdf0e10cSrcweir 632cdf0e10cSrcweir int fillbuf(Source * s)633cdf0e10cSrcweir fillbuf(Source * s) 634cdf0e10cSrcweir { 635cdf0e10cSrcweir int n; 636cdf0e10cSrcweir 637cdf0e10cSrcweir if (s->fd < 0 || (n = read(s->fd, (char *) s->inl, INS / 8)) <= 0) 638cdf0e10cSrcweir n = 0; 639cdf0e10cSrcweir s->inl += n; 640cdf0e10cSrcweir s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOB; 641cdf0e10cSrcweir if (n == 0) 642cdf0e10cSrcweir { 643cdf0e10cSrcweir s->inl[0] = s->inl[1] = s->inl[2] = s->inl[3] = EOFC; 644cdf0e10cSrcweir return EOF; 645cdf0e10cSrcweir } 646cdf0e10cSrcweir return 0; 647cdf0e10cSrcweir } 648cdf0e10cSrcweir 649cdf0e10cSrcweir /* 650cdf0e10cSrcweir * Push down to new source of characters. 651cdf0e10cSrcweir * If fd>0 and str==NULL, then from a file `name'; 652cdf0e10cSrcweir * if fd==-1 and str, then from the string. 653cdf0e10cSrcweir */ 654cdf0e10cSrcweir Source * setsource(char * name,int path,int fd,char * str,int wrap)655cdf0e10cSrcweir setsource(char *name, int path, int fd, char *str, int wrap) 656cdf0e10cSrcweir { 657cdf0e10cSrcweir Source *s = new(Source); 658cdf0e10cSrcweir int len; 659cdf0e10cSrcweir 660cdf0e10cSrcweir s->line = 1; 661cdf0e10cSrcweir s->lineinc = 0; 662cdf0e10cSrcweir s->fd = fd; 663cdf0e10cSrcweir s->filename = name; 664cdf0e10cSrcweir s->next = cursource; 665cdf0e10cSrcweir s->ifdepth = 0; 666cdf0e10cSrcweir s->pathdepth = path; 667cdf0e10cSrcweir s->wrap = wrap; 668cdf0e10cSrcweir 669cdf0e10cSrcweir cursource = s; 670cdf0e10cSrcweir 671cdf0e10cSrcweir if (s->wrap) 672cdf0e10cSrcweir genwrap(0); 673cdf0e10cSrcweir 674cdf0e10cSrcweir /* slop at right for EOB */ 675cdf0e10cSrcweir if (str) 676cdf0e10cSrcweir { 677cdf0e10cSrcweir len = strlen(str); 678cdf0e10cSrcweir s->inb = domalloc(len + 4); 679cdf0e10cSrcweir s->inp = s->inb; 680cdf0e10cSrcweir strncpy((char *) s->inp, str, len); 681cdf0e10cSrcweir } 682cdf0e10cSrcweir else 683cdf0e10cSrcweir { 684cdf0e10cSrcweir s->inb = domalloc(INS + 4); 685cdf0e10cSrcweir s->inp = s->inb; 686cdf0e10cSrcweir len = 0; 687cdf0e10cSrcweir } 688cdf0e10cSrcweir s->inl = s->inp + len; 689cdf0e10cSrcweir s->inl[0] = s->inl[1] = EOB; 690cdf0e10cSrcweir 691cdf0e10cSrcweir return s; 692cdf0e10cSrcweir } 693cdf0e10cSrcweir 694cdf0e10cSrcweir void unsetsource(void)695cdf0e10cSrcweir unsetsource(void) 696cdf0e10cSrcweir { 697cdf0e10cSrcweir Source *s = cursource; 698cdf0e10cSrcweir 699cdf0e10cSrcweir if (s->wrap) 700cdf0e10cSrcweir genwrap(1); 701cdf0e10cSrcweir 702cdf0e10cSrcweir if (s->fd >= 0) 703cdf0e10cSrcweir { 704cdf0e10cSrcweir close(s->fd); 705cdf0e10cSrcweir dofree(s->inb); 706cdf0e10cSrcweir } 707cdf0e10cSrcweir cursource = s->next; 708cdf0e10cSrcweir dofree(s); 709cdf0e10cSrcweir } 710