1# ************************************************************* 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20# ************************************************************* 21 22import sys, os.path 23from globals import * 24import macroparser 25 26class EOF(Exception): 27 def __init__ (self): 28 pass 29 30 def str (self): 31 return "end of file" 32 33class BOF(Exception): 34 def __init__ (self): 35 pass 36 37 def str (self): 38 return "beginning of file" 39 40 41def removeHeaderQuotes (orig): 42 if len(orig) <= 2: 43 return orig 44 elif orig[0] == orig[-1] == '"': 45 return orig[1:-1] 46 elif orig[0] == '<' and orig[-1] == '>': 47 return orig[1:-1] 48 49 return orig 50 51 52def dumpTokens (tokens, toError=False): 53 54 scope = 0 55 indent = " " 56 line = '' 57 chars = '' 58 59 for token in tokens: 60 if token in '{<': 61 if len(line) > 0: 62 chars += indent*scope + line + "\n" 63 line = '' 64 chars += indent*scope + token + "\n" 65 scope += 1 66 67 elif token in '}>': 68 if len(line) > 0: 69 chars += indent*scope + line + "\n" 70 line = '' 71 scope -= 1 72 chars += indent*scope + token 73 74 elif token == ';': 75 if len(line) > 0: 76 chars += indent*scope + line + ";\n" 77 line = '' 78 else: 79 chars += ";\n" 80 elif len(token) > 0: 81 line += token + ' ' 82 83 if len(line) > 0: 84 chars += line 85 chars += "\n" 86 if toError: 87 sys.stderr.write(chars) 88 else: 89 sys.stdout.write(chars) 90 91 92class HeaderData(object): 93 def __init__ (self): 94 self.defines = {} 95 self.tokens = [] 96 97 98class SrcLexer(object): 99 """Lexicographical analyzer for .src format. 100 101The role of a lexer is to parse the source file and break it into 102appropriate tokens. Such tokens are later passed to a parser to 103build the syntax tree. 104""" 105 headerCache = {} 106 107 VISIBLE = 0 108 INVISIBLE_PRE = 1 109 INVISIBLE_POST = 2 110 111 def __init__ (self, chars, filepath = None): 112 self.filepath = filepath 113 self.parentLexer = None 114 self.chars = chars 115 self.bufsize = len(self.chars) 116 117 # TODO: use parameters for this 118 # Properties that can be copied. 119 self.headerDict = dict () 120 self.debug = False 121 self.debugMacro = False 122 self.includeDirs = list () 123 self.expandHeaders = True 124 self.inMacroDefine = False 125 self.stopOnHeader = False 126 127 def copyProperties (self, other): 128 """Copy properties from another instance of SrcLexer.""" 129 130 # TODO: use parameters for this 131 self.headerDict = other.headerDict 132 self.debug = other.debug 133 self.debugMacro = other.debugMacro 134 self.includeDirs = other.includeDirs[:] 135 self.expandHeaders = other.expandHeaders 136 self.inMacroDefine = other.inMacroDefine 137 self.stopOnHeader = other.stopOnHeader 138 139 def init (self): 140 self.firstNonBlank = '' 141 self.token = '' 142 self.tokens = [] 143 self.defines = {} 144 self.visibilityStack = [] 145 146 def getTokens (self): 147 return self.tokens 148 149 def getDefines (self): 150 return self.defines 151 152 def nextPos (self, i): 153 while True: 154 i += 1 155 try: 156 c = self.chars[i] 157 except IndexError: 158 raise EOF 159 160 if ord(c) in [0x0D]: 161 continue 162 break 163 return i 164 165 def prevPos (self, i): 166 while True: 167 i -= 1 168 try: 169 c = self.chars[i] 170 except IndexError: 171 raise BOF 172 173 if ord(c) in [0x0D]: 174 continue 175 break 176 return i 177 178 def isCodeVisible (self): 179 if len(self.visibilityStack) == 0: 180 return True 181 for item in self.visibilityStack: 182 if item != SrcLexer.VISIBLE: 183 return False 184 return True 185 186 def tokenize (self): 187 self.init() 188 189 i = 0 190 while True: 191 c = self.chars[i] 192 193 if self.firstNonBlank == '' and not c in [' ', "\n", "\t"]: 194 # Store the first non-blank in a line. 195 self.firstNonBlank = c 196 elif c == "\n": 197 self.firstNonBlank = '' 198 199 if c == '#': 200 i = self.pound(i) 201 elif c == '/': 202 i = self.slash(i) 203 elif c == "\n": 204 i = self.lineBreak(i) 205 elif c == '"': 206 i = self.doubleQuote(i) 207 elif c in [' ', "\t"]: 208 i = self.blank(i) 209 elif c in ";()[]{}<>,=+-*": 210 # Any outstanding single-character token. 211 i = self.anyToken(i, c) 212 elif self.isCodeVisible(): 213 self.token += c 214 215 try: 216 i = self.nextPos(i) 217 except EOF: 218 break 219 220 if len(self.token): 221 self.tokens.append(self.token) 222 223 if not self.parentLexer and self.debug: 224 progress ("-"*68 + "\n") 225 progress ("All defines found in this translation unit:\n") 226 keys = self.defines.keys() 227 keys.sort() 228 for key in keys: 229 progress ("@ %s\n"%key) 230 231 def dumpTokens (self, toError=False): 232 dumpTokens(self.tokens, toError) 233 234 235 def maybeAddToken (self): 236 if len(self.token) > 0: 237 self.tokens.append(self.token) 238 self.token = '' 239 240 241 #-------------------------------------------------------------------- 242 # character handlers 243 244 def blank (self, i): 245 if not self.isCodeVisible(): 246 return i 247 248 self.maybeAddToken() 249 return i 250 251 252 def pound (self, i): 253 254 if self.inMacroDefine: 255 return i 256 257 if not self.firstNonBlank == '#': 258 return i 259 260 self.maybeAddToken() 261 # We are in preprocessing mode. 262 263 # Get the macro command name '#<command> .....' 264 265 command, define, buf = '', '', '' 266 firstNonBlank = False 267 while True: 268 try: 269 i = self.nextPos(i) 270 c = self.chars[i] 271 if c == '\\' and self.chars[self.nextPos(i)] == "\n": 272 i = self.nextPos(i) 273 continue 274 except EOF: 275 break 276 277 if c == "\n": 278 if len(buf) > 0 and len(command) == 0: 279 command = buf 280 i = self.prevPos(i) 281 break 282 elif c in [' ', "\t"]: 283 if not firstNonBlank: 284 # Ignore any leading blanks after the '#'. 285 continue 286 287 if len(command) == 0: 288 command = buf 289 buf = '' 290 else: 291 buf += ' ' 292 elif c == '(': 293 if len(buf) > 0 and len(command) == 0: 294 command = buf 295 buf += c 296 else: 297 if not firstNonBlank: 298 firstNonBlank = True 299 buf += c 300 301 if command == 'define': 302 self.handleMacroDefine(buf) 303 elif command == 'include': 304 self.handleMacroInclude(buf) 305 elif command == 'ifdef': 306 defineName = buf.strip() 307 if self.defines.has_key(defineName): 308 self.visibilityStack.append(SrcLexer.VISIBLE) 309 else: 310 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 311 312 elif command == 'ifndef': 313 defineName = buf.strip() 314 if self.defines.has_key(defineName): 315 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 316 else: 317 self.visibilityStack.append(SrcLexer.VISIBLE) 318 319 elif command == 'if': 320 if self.evalCodeVisibility(buf): 321 self.visibilityStack.append(SrcLexer.VISIBLE) 322 else: 323 self.visibilityStack.append(SrcLexer.INVISIBLE_PRE) 324 325 elif command == 'elif': 326 if len(self.visibilityStack) == 0: 327 raise ParseError ('') 328 329 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 330 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 331 elif self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 332 # Evaluate only if the current visibility is false. 333 if self.evalCodeVisibility(buf): 334 self.visibilityStack[-1] = SrcLexer.VISIBLE 335 336 elif command == 'else': 337 if len(self.visibilityStack) == 0: 338 raise ParseError ('') 339 340 if self.visibilityStack[-1] == SrcLexer.VISIBLE: 341 self.visibilityStack[-1] = SrcLexer.INVISIBLE_POST 342 if self.visibilityStack[-1] == SrcLexer.INVISIBLE_PRE: 343 self.visibilityStack[-1] = SrcLexer.VISIBLE 344 345 elif command == 'endif': 346 if len(self.visibilityStack) == 0: 347 raise ParseError ('') 348 self.visibilityStack.pop() 349 350 elif command == 'undef': 351 pass 352 elif command in ['error', 'pragma']: 353 pass 354 else: 355 print "'%s' '%s'"%(command, buf) 356 print self.filepath 357 sys.exit(0) 358 359 return i 360 361 362 def evalCodeVisibility (self, buf): 363 try: 364 return eval(buf) 365 except: 366 return True 367 368 def handleMacroDefine (self, buf): 369 370 mparser = macroparser.MacroParser(buf) 371 mparser.debug = self.debugMacro 372 mparser.parse() 373 macro = mparser.getMacro() 374 if macro: 375 self.defines[macro.name] = macro 376 377 def handleMacroInclude (self, buf): 378 379 # Strip excess string if any. 380 pos = buf.find(' ') 381 if pos >= 0: 382 buf = buf[:pos] 383 headerSub = removeHeaderQuotes(buf) 384 385 if not self.expandHeaders: 386 # We don't want to expand headers. Bail out. 387 if self.debug: 388 progress ("%s ignored\n"%headerSub) 389 return 390 391 defines = {} 392 headerPath = None 393 for includeDir in self.includeDirs: 394 hpath = includeDir + '/' + headerSub 395 if os.path.isfile(hpath) and hpath != self.filepath: 396 headerPath = hpath 397 break 398 399 if not headerPath: 400 error("included header file " + headerSub + " not found\n", self.stopOnHeader) 401 return 402 403 if self.debug: 404 progress ("%s found\n"%headerPath) 405 406 if headerPath in self.headerDict: 407 if self.debug: 408 progress ("%s already included\n"%headerPath) 409 return 410 411 if SrcLexer.headerCache.has_key(headerPath): 412 if self.debug: 413 progress ("%s in cache\n"%headerPath) 414 for key in SrcLexer.headerCache[headerPath].defines.keys(): 415 self.defines[key] = SrcLexer.headerCache[headerPath].defines[key] 416 return 417 418 chars = open(headerPath, 'r').read() 419 mclexer = SrcLexer(chars, headerPath) 420 mclexer.copyProperties(self) 421 mclexer.parentLexer = self 422 mclexer.tokenize() 423 hdrData = HeaderData() 424 hdrData.tokens = mclexer.getTokens() 425 headerDefines = mclexer.getDefines() 426 for key in headerDefines.keys(): 427 defines[key] = headerDefines[key] 428 hdrData.defines[key] = headerDefines[key] 429 430 self.headerDict[headerPath] = True 431 SrcLexer.headerCache[headerPath] = hdrData 432 433 # Update the list of headers that have already been expaneded. 434 for key in mclexer.headerDict.keys(): 435 self.headerDict[key] = True 436 437 if self.debug: 438 progress ("defines found in header %s:\n"%headerSub) 439 for key in defines.keys(): 440 progress (" '%s'\n"%key) 441 442 for key in defines.keys(): 443 self.defines[key] = defines[key] 444 445 446 def slash (self, i): 447 if not self.isCodeVisible(): 448 return i 449 450 if i < self.bufsize - 1 and self.chars[i+1] == '/': 451 # Parse line comment. 452 line = '' 453 i += 2 454 while i < self.bufsize: 455 c = self.chars[i] 456 if ord(c) in [0x0A, 0x0D]: 457 return i - 1 458 line += c 459 i += 1 460 self.token = '' 461 elif i < self.bufsize - 1 and self.chars[i+1] == '*': 462 comment = '' 463 i += 2 464 while i < self.bufsize: 465 c = self.chars[i] 466 if c == '/' and self.chars[i-1] == '*': 467 return i 468 comment += c 469 i += 1 470 else: 471 return self.anyToken(i, '/') 472 473 return i 474 475 476 def lineBreak (self, i): 477 if not self.isCodeVisible(): 478 return i 479 480 self.maybeAddToken() 481 482 return i 483 484 485 def doubleQuote (self, i): 486 if not self.isCodeVisible(): 487 return i 488 489 literal = '' 490 i += 1 491 while i < self.bufsize: 492 c = self.chars[i] 493 if c == '"': 494 self.tokens.append('"'+literal+'"') 495 break 496 literal += c 497 i += 1 498 499 return i 500 501 502 def anyToken (self, i, token): 503 if not self.isCodeVisible(): 504 return i 505 506 self.maybeAddToken() 507 self.token = token 508 self.maybeAddToken() 509 return i 510