| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
"""Parser classes for Cheetah's Compiler |
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
__author__ = "Tavis Rudd <tavis@damnsimple.com>" |
|---|
| 19 |
__revision__ = "$Revision: 1.130 $"[11:-2] |
|---|
| 20 |
|
|---|
| 21 |
import os |
|---|
| 22 |
import sys |
|---|
| 23 |
import re |
|---|
| 24 |
from re import DOTALL, MULTILINE |
|---|
| 25 |
from types import StringType, ListType, TupleType, ClassType, TypeType |
|---|
| 26 |
import time |
|---|
| 27 |
from tokenize import pseudoprog |
|---|
| 28 |
import inspect |
|---|
| 29 |
import new |
|---|
| 30 |
import traceback |
|---|
| 31 |
|
|---|
| 32 |
from Cheetah.SourceReader import SourceReader |
|---|
| 33 |
from Cheetah import Filters |
|---|
| 34 |
from Cheetah import ErrorCatchers |
|---|
| 35 |
from Cheetah.Unspecified import Unspecified |
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
def escapeRegexChars(txt, |
|---|
| 39 |
escapeRE=re.compile(r'([\$\^\*\+\.\?\{\}\[\]\(\)\|\\])')): |
|---|
| 40 |
|
|---|
| 41 |
"""Return a txt with all special regular expressions chars escaped.""" |
|---|
| 42 |
|
|---|
| 43 |
return escapeRE.sub(r'\\\1' , txt) |
|---|
| 44 |
|
|---|
| 45 |
def group(*choices): return '(' + '|'.join(choices) + ')' |
|---|
| 46 |
def nongroup(*choices): return '(?:' + '|'.join(choices) + ')' |
|---|
| 47 |
def namedGroup(name, *choices): return '(P:<' + name +'>' + '|'.join(choices) + ')' |
|---|
| 48 |
def any(*choices): return apply(group, choices) + '*' |
|---|
| 49 |
def maybe(*choices): return apply(group, choices) + '?' |
|---|
| 50 |
|
|---|
| 51 |
|
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
NO_CACHE = 0 |
|---|
| 55 |
STATIC_CACHE = 1 |
|---|
| 56 |
REFRESH_CACHE = 2 |
|---|
| 57 |
|
|---|
| 58 |
SET_LOCAL = 0 |
|---|
| 59 |
SET_GLOBAL = 1 |
|---|
| 60 |
SET_MODULE = 2 |
|---|
| 61 |
|
|---|
| 62 |
|
|---|
| 63 |
|
|---|
| 64 |
|
|---|
| 65 |
|
|---|
| 66 |
identchars = "abcdefghijklmnopqrstuvwxyz" \ |
|---|
| 67 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ_" |
|---|
| 68 |
namechars = identchars + "0123456789" |
|---|
| 69 |
|
|---|
| 70 |
|
|---|
| 71 |
powerOp = '**' |
|---|
| 72 |
unaryArithOps = ('+', '-', '~') |
|---|
| 73 |
binaryArithOps = ('+', '-', '/', '//','%') |
|---|
| 74 |
shiftOps = ('>>','<<') |
|---|
| 75 |
bitwiseOps = ('&','|','^') |
|---|
| 76 |
assignOp = '=' |
|---|
| 77 |
augAssignOps = ('+=','-=','/=','*=', '**=','^=','%=', |
|---|
| 78 |
'>>=','<<=','&=','|=', ) |
|---|
| 79 |
assignmentOps = (assignOp,) + augAssignOps |
|---|
| 80 |
|
|---|
| 81 |
compOps = ('<','>','==','!=','<=','>=', '<>', 'is', 'in',) |
|---|
| 82 |
booleanOps = ('and','or','not') |
|---|
| 83 |
operators = (powerOp,) + unaryArithOps + binaryArithOps \ |
|---|
| 84 |
+ shiftOps + bitwiseOps + assignmentOps \ |
|---|
| 85 |
+ compOps + booleanOps |
|---|
| 86 |
|
|---|
| 87 |
delimeters = ('(',')','{','}','[',']', |
|---|
| 88 |
',','.',':',';','=','`') + augAssignOps |
|---|
| 89 |
|
|---|
| 90 |
|
|---|
| 91 |
keywords = ('and', 'del', 'for', 'is', 'raise', |
|---|
| 92 |
'assert', 'elif', 'from', 'lambda', 'return', |
|---|
| 93 |
'break', 'else', 'global', 'not', 'try', |
|---|
| 94 |
'class', 'except', 'if', 'or', 'while', |
|---|
| 95 |
'continue', 'exec', 'import', 'pass', |
|---|
| 96 |
'def', 'finally', 'in', 'print', |
|---|
| 97 |
) |
|---|
| 98 |
|
|---|
| 99 |
single3 = "'''" |
|---|
| 100 |
double3 = '"""' |
|---|
| 101 |
|
|---|
| 102 |
tripleQuotedStringStarts = ("'''", '"""', |
|---|
| 103 |
"r'''", 'r"""', "R'''", 'R"""', |
|---|
| 104 |
"u'''", 'u"""', "U'''", 'U"""', |
|---|
| 105 |
"ur'''", 'ur"""', "Ur'''", 'Ur"""', |
|---|
| 106 |
"uR'''", 'uR"""', "UR'''", 'UR"""') |
|---|
| 107 |
|
|---|
| 108 |
tripleQuotedStringPairs = {"'''": single3, '"""': double3, |
|---|
| 109 |
"r'''": single3, 'r"""': double3, |
|---|
| 110 |
"u'''": single3, 'u"""': double3, |
|---|
| 111 |
"ur'''": single3, 'ur"""': double3, |
|---|
| 112 |
"R'''": single3, 'R"""': double3, |
|---|
| 113 |
"U'''": single3, 'U"""': double3, |
|---|
| 114 |
"uR'''": single3, 'uR"""': double3, |
|---|
| 115 |
"Ur'''": single3, 'Ur"""': double3, |
|---|
| 116 |
"UR'''": single3, 'UR"""': double3, |
|---|
| 117 |
} |
|---|
| 118 |
|
|---|
| 119 |
closurePairs= {')':'(',']':'[','}':'{'} |
|---|
| 120 |
closurePairsRev= {'(':')','[':']','{':'}'} |
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 |
|
|---|
| 124 |
|
|---|
| 125 |
tripleQuotedStringREs = {} |
|---|
| 126 |
def makeTripleQuoteRe(start, end): |
|---|
| 127 |
start = escapeRegexChars(start) |
|---|
| 128 |
end = escapeRegexChars(end) |
|---|
| 129 |
return re.compile(r'(?:' + start + r').*?' + r'(?:' + end + r')', re.DOTALL) |
|---|
| 130 |
|
|---|
| 131 |
for start, end in tripleQuotedStringPairs.items(): |
|---|
| 132 |
tripleQuotedStringREs[start] = makeTripleQuoteRe(start, end) |
|---|
| 133 |
|
|---|
| 134 |
WS = r'[ \f\t]*' |
|---|
| 135 |
EOL = r'\r\n|\n|\r' |
|---|
| 136 |
EOLZ = EOL + r'|\Z' |
|---|
| 137 |
escCharLookBehind = nongroup(r'(?<=\A)',r'(?<!\\)') |
|---|
| 138 |
nameCharLookAhead = r'(?=[A-Za-z_])' |
|---|
| 139 |
identRE=re.compile(r'[a-zA-Z_][a-zA-Z_0-9]*') |
|---|
| 140 |
EOLre=re.compile(r'(?:\r\n|\r|\n)') |
|---|
| 141 |
|
|---|
| 142 |
specialVarRE=re.compile(r'([a-zA-z_]+)@') |
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 |
directiveNamesAndParsers = { |
|---|
| 146 |
|
|---|
| 147 |
'import':None, |
|---|
| 148 |
'from':None, |
|---|
| 149 |
'extends': 'eatExtends', |
|---|
| 150 |
'implements': 'eatImplements', |
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
'slurp': 'eatSlurp', |
|---|
| 154 |
'raw': 'eatRaw', |
|---|
| 155 |
'include': 'eatInclude', |
|---|
| 156 |
'cache': 'eatCache', |
|---|
| 157 |
'filter': 'eatFilter', |
|---|
| 158 |
'echo': None, |
|---|
| 159 |
'silent': None, |
|---|
| 160 |
|
|---|
| 161 |
'call': 'eatCall', |
|---|
| 162 |
'arg': 'eatCallArg', |
|---|
| 163 |
|
|---|
| 164 |
'capture': 'eatCapture', |
|---|
| 165 |
|
|---|
| 166 |
|
|---|
| 167 |
'attr': 'eatAttr', |
|---|
| 168 |
'def': 'eatDef', |
|---|
| 169 |
'block': 'eatBlock', |
|---|
| 170 |
'@': 'eatDecorator', |
|---|
| 171 |
'defmacro': 'eatDefMacro', |
|---|
| 172 |
|
|---|
| 173 |
'closure': 'eatClosure', |
|---|
| 174 |
|
|---|
| 175 |
'set': 'eatSet', |
|---|
| 176 |
'del': None, |
|---|
| 177 |
|
|---|
| 178 |
|
|---|
| 179 |
'if': 'eatIf', |
|---|
| 180 |
'while': None, |
|---|
| 181 |
'for': None, |
|---|
| 182 |
'else': None, |
|---|
| 183 |
'elif': None, |
|---|
| 184 |
'pass': None, |
|---|
| 185 |
'break': None, |
|---|
| 186 |
'continue': None, |
|---|
| 187 |
'stop': None, |
|---|
| 188 |
'return': None, |
|---|
| 189 |
'yield': None, |
|---|
| 190 |
|
|---|
| 191 |
|
|---|
| 192 |
'repeat': None, |
|---|
| 193 |
'unless': None, |
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 |
'assert': None, |
|---|
| 197 |
'raise': None, |
|---|
| 198 |
'try': None, |
|---|
| 199 |
'except': None, |
|---|
| 200 |
'finally': None, |
|---|
| 201 |
'errorCatcher': 'eatErrorCatcher', |
|---|
| 202 |
|
|---|
| 203 |
|
|---|
| 204 |
'breakpoint': 'eatBreakPoint', |
|---|
| 205 |
'compiler': 'eatCompiler', |
|---|
| 206 |
'compiler-settings': 'eatCompilerSettings', |
|---|
| 207 |
|
|---|
| 208 |
|
|---|
| 209 |
'shBang': 'eatShbang', |
|---|
| 210 |
'encoding': 'eatEncoding', |
|---|
| 211 |
|
|---|
| 212 |
'end': 'eatEndDirective', |
|---|
| 213 |
} |
|---|
| 214 |
|
|---|
| 215 |
endDirectiveNamesAndHandlers = { |
|---|
| 216 |
'def': 'handleEndDef', |
|---|
| 217 |
'block': None, |
|---|
| 218 |
'closure': None, |
|---|
| 219 |
'cache': None, |
|---|
| 220 |
'call': None, |
|---|
| 221 |
'capture': None, |
|---|
| 222 |
'filter': None, |
|---|
| 223 |
'errorCatcher':None, |
|---|
| 224 |
'while': None, |
|---|
| 225 |
'for': None, |
|---|
| 226 |
'if': None, |
|---|
| 227 |
'try': None, |
|---|
| 228 |
'repeat': None, |
|---|
| 229 |
'unless': None, |
|---|
| 230 |
} |
|---|
| 231 |
|
|---|
| 232 |
|
|---|
| 233 |
|
|---|
| 234 |
|
|---|
| 235 |
|
|---|
| 236 |
|
|---|
| 237 |
class ParseError(ValueError): |
|---|
| 238 |
def __init__(self, stream, msg='Invalid Syntax', extMsg='', lineno=None, col=None): |
|---|
| 239 |
self.stream = stream |
|---|
| 240 |
if stream.pos() >= len(stream): |
|---|
| 241 |
stream.setPos(len(stream) -1) |
|---|
| 242 |
self.msg = msg |
|---|
| 243 |
self.extMsg = extMsg |
|---|
| 244 |
self.lineno = lineno |
|---|
| 245 |
self.col = col |
|---|
| 246 |
|
|---|
| 247 |
def __str__(self): |
|---|
| 248 |
return self.report() |
|---|
| 249 |
|
|---|
| 250 |
def report(self): |
|---|
| 251 |
stream = self.stream |
|---|
| 252 |
if stream.filename(): |
|---|
| 253 |
f = " in file %s" % stream.filename() |
|---|
| 254 |
else: |
|---|
| 255 |
f = '' |
|---|
| 256 |
report = '' |
|---|
| 257 |
if self.lineno: |
|---|
| 258 |
lineno = self.lineno |
|---|
| 259 |
row, col, line = (lineno, (self.col or 0), |
|---|
| 260 |
self.stream.splitlines()[lineno-1]) |
|---|
| 261 |
else: |
|---|
| 262 |
row, col, line = self.stream.getRowColLine() |
|---|
| 263 |
|
|---|
| 264 |
|
|---|
| 265 |
lines = stream.splitlines() |
|---|
| 266 |
prevLines = [] |
|---|
| 267 |
for i in range(1,4): |
|---|
| 268 |
if row-1-i <=0: |
|---|
| 269 |
break |
|---|
| 270 |
prevLines.append( (row-i,lines[row-1-i]) ) |
|---|
| 271 |
|
|---|
| 272 |
nextLines = [] |
|---|
| 273 |
for i in range(1,4): |
|---|
| 274 |
if not row-1+i < len(lines): |
|---|
| 275 |
break |
|---|
| 276 |
nextLines.append( (row+i,lines[row-1+i]) ) |
|---|
| 277 |
nextLines.reverse() |
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 |
report += "\n\n%s\n" %self.msg |
|---|
| 281 |
report += "Line %i, column %i%s\n\n" % (row, col, f) |
|---|
| 282 |
report += 'Line|Cheetah Code\n' |
|---|
| 283 |
report += '----|-------------------------------------------------------------\n' |
|---|
| 284 |
while prevLines: |
|---|
| 285 |
lineInfo = prevLines.pop() |
|---|
| 286 |
report += "%(row)-4d|%(line)s\n"% {'row':lineInfo[0], 'line':lineInfo[1]} |
|---|
| 287 |
report += "%(row)-4d|%(line)s\n"% {'row':row, 'line':line} |
|---|
| 288 |
report += ' '*5 +' '*(col-1) + "^\n" |
|---|
| 289 |
|
|---|
| 290 |
while nextLines: |
|---|
| 291 |
lineInfo = nextLines.pop() |
|---|
| 292 |
report += "%(row)-4d|%(line)s\n"% {'row':lineInfo[0], 'line':lineInfo[1]} |
|---|
| 293 |
|
|---|
| 294 |
if self.extMsg: |
|---|
| 295 |
report += self.extMsg + '\n' |
|---|
| 296 |
|
|---|
| 297 |
return report |
|---|
| 298 |
|
|---|
| 299 |
class ForbiddenSyntax(ParseError): pass |
|---|
| 300 |
class ForbiddenExpression(ForbiddenSyntax): pass |
|---|
| 301 |
class ForbiddenDirective(ForbiddenSyntax): pass |
|---|
| 302 |
|
|---|
| 303 |
class CheetahVariable: |
|---|
| 304 |
def __init__(self, nameChunks, useNameMapper=True, cacheToken=None, |
|---|
| 305 |
rawSource=None): |
|---|
| 306 |
self.nameChunks = nameChunks |
|---|
| 307 |
self.useNameMapper = useNameMapper |
|---|
| 308 |
self.cacheToken = cacheToken |
|---|
| 309 |
self.rawSource = rawSource |
|---|
| 310 |
|
|---|
| 311 |
class Placeholder(CheetahVariable): pass |
|---|
| 312 |
|
|---|
| 313 |
class ArgList: |
|---|
| 314 |
"""Used by _LowLevelParser.getArgList()""" |
|---|
| 315 |
|
|---|
| 316 |
def __init__(self): |
|---|
| 317 |
self.argNames = [] |
|---|
| 318 |
self.defVals = [] |
|---|
| 319 |
self.i = 0 |
|---|
| 320 |
|
|---|
| 321 |
def addArgName(self, name): |
|---|
| 322 |
self.argNames.append( name ) |
|---|
| 323 |
self.defVals.append( None ) |
|---|
| 324 |
|
|---|
| 325 |
def next(self): |
|---|
| 326 |
self.i += 1 |
|---|
| 327 |
|
|---|
| 328 |
def addToDefVal(self, token): |
|---|
| 329 |
i = self.i |
|---|
| 330 |
if self.defVals[i] == None: |
|---|
| 331 |
self.defVals[i] = '' |
|---|
| 332 |
self.defVals[i] += token |
|---|
| 333 |
|
|---|
| 334 |
def merge(self): |
|---|
| 335 |
defVals = self.defVals |
|---|
| 336 |
for i in range(len(defVals)): |
|---|
| 337 |
if type(defVals[i]) == StringType: |
|---|
| 338 |
defVals[i] = defVals[i].strip() |
|---|
| 339 |
|
|---|
| 340 |
return map(None, [i.strip() for i in self.argNames], defVals) |
|---|
| 341 |
|
|---|
| 342 |
def __str__(self): |
|---|
| 343 |
return str(self.merge()) |
|---|
| 344 |
|
|---|
| 345 |
class _LowLevelParser(SourceReader): |
|---|
| 346 |
"""This class implements the methods to match or extract ('get*') the basic |
|---|
| 347 |
elements of Cheetah's grammar. It does NOT handle any code generation or |
|---|
| 348 |
state management. |
|---|
| 349 |
""" |
|---|
| 350 |
|
|---|
| 351 |
_settingsManager = None |
|---|
| 352 |
|
|---|
| 353 |
def setSettingsManager(self, settingsManager): |
|---|
| 354 |
self._settingsManager = settingsManager |
|---|
| 355 |
|
|---|
| 356 |
def setting(self, key, default=Unspecified): |
|---|
| 357 |
if default is Unspecified: |
|---|
| 358 |
return self._settingsManager.setting(key) |
|---|
| 359 |
else: |
|---|
| 360 |
return self._settingsManager.setting(key, default=default) |
|---|
| 361 |
|
|---|
| 362 |
def setSetting(self, key, val): |
|---|
| 363 |
self._settingsManager.setSetting(key, val) |
|---|
| 364 |
|
|---|
| 365 |
def settings(self): |
|---|
| 366 |
return self._settingsManager.settings() |
|---|
| 367 |
|
|---|
| 368 |
def updateSettings(self, settings): |
|---|
| 369 |
self._settingsManager.updateSettings(settings) |
|---|
| 370 |
|
|---|
| 371 |
def _initializeSettings(self): |
|---|
| 372 |
self._settingsManager._initializeSettings() |
|---|
| 373 |
|
|---|
| 374 |
def configureParser(self): |
|---|
| 375 |
"""Is called by the Compiler instance after the parser has had a |
|---|
| 376 |
settingsManager assigned with self.setSettingsManager() |
|---|
| 377 |
""" |
|---|
| 378 |
self._makeCheetahVarREs() |
|---|
| 379 |
self._makeCommentREs() |
|---|
| 380 |
self._makeDirectiveREs() |
|---|
| 381 |
self._makePspREs() |
|---|
| 382 |
self._possibleNonStrConstantChars = ( |
|---|
| 383 |
self.setting('commentStartToken')[0] + |
|---|
| 384 |
self.setting('multiLineCommentStartToken')[0] + |
|---|
| 385 |
self.setting('cheetahVarStartToken')[0] + |
|---|
| 386 |
self.setting('directiveStartToken')[0] + |
|---|
| 387 |
self.setting('PSPStartToken')[0]) |
|---|
| 388 |
self._nonStrConstMatchers = [ |
|---|
| 389 |
self.matchCommentStartToken, |
|---|
| 390 |
self.matchMultiLineCommentStartToken, |
|---|
| 391 |
self.matchVariablePlaceholderStart, |
|---|
| 392 |
self.matchExpressionPlaceholderStart, |
|---|
| 393 |
self.matchDirective, |
|---|
| 394 |
self.matchPSPStartToken, |
|---|
| 395 |
self.matchEOLSlurpToken, |
|---|
| 396 |
] |
|---|
| 397 |
|
|---|
| 398 |
|
|---|
| 399 |
|
|---|
| 400 |
def _makeCheetahVarREs(self): |
|---|
| 401 |
|
|---|
| 402 |
"""Setup the regexs for Cheetah $var parsing.""" |
|---|
| 403 |
|
|---|
| 404 |
num = r'[0-9\.]+' |
|---|
| 405 |
interval = (r'(?P<interval>' + |
|---|
| 406 |
num + r's|' + |
|---|
| 407 |
num + r'm|' + |
|---|
| 408 |
num + r'h|' + |
|---|
| 409 |
num + r'd|' + |
|---|
| 410 |
num + r'w|' + |
|---|
| 411 |
num + ')' |
|---|
| 412 |
) |
|---|
| 413 |
|
|---|
| 414 |
cacheToken = (r'(?:' + |
|---|
| 415 |
r'(?P<REFRESH_CACHE>\*' + interval + '\*)'+ |
|---|
| 416 |
'|' + |
|---|
| 417 |
r'(?P<STATIC_CACHE>\*)' + |
|---|
| 418 |
'|' + |
|---|
| 419 |
r'(?P<NO_CACHE>)' + |
|---|
| 420 |
')') |
|---|
| 421 |
self.cacheTokenRE = re.compile(cacheToken) |
|---|
| 422 |
|
|---|
| 423 |
silentPlaceholderToken = (r'(?:' + |
|---|
| 424 |
r'(?P<SILENT>' +escapeRegexChars('!')+')'+ |
|---|
| 425 |
'|' + |
|---|
| 426 |
r'(?P<NOT_SILENT>)' + |
|---|
| 427 |
')') |
|---|
| 428 |
self.silentPlaceholderTokenRE = re.compile(silentPlaceholderToken) |
|---|
| 429 |
|
|---|
| 430 |
self.cheetahVarStartRE = re.compile( |
|---|
| 431 |
escCharLookBehind + |
|---|
| 432 |
r'(?P<startToken>'+escapeRegexChars(self.setting('cheetahVarStartToken'))+')'+ |
|---|
| 433 |
r'(?P<silenceToken>'+silentPlaceholderToken+')'+ |
|---|
| 434 |
r'(?P<cacheToken>'+cacheToken+')'+ |
|---|
| 435 |
r'(?P<enclosure>|(?:(?:\{|\(|\[)[ \t\f]*))' + |
|---|
| 436 |
r'(?=[A-Za-z_])') |
|---|
| 437 |
validCharsLookAhead = r'(?=[A-Za-z_\*!\{\(\[])' |
|---|
| 438 |
self.cheetahVarStartToken = self.setting('cheetahVarStartToken') |
|---|
| 439 |
self.cheetahVarStartTokenRE = re.compile( |
|---|
| 440 |
escCharLookBehind + |
|---|
| 441 |
escapeRegexChars(self.setting('cheetahVarStartToken')) |
|---|
| 442 |
+validCharsLookAhead |
|---|
| 443 |
) |
|---|
| 444 |
|
|---|
| 445 |
self.cheetahVarInExpressionStartTokenRE = re.compile( |
|---|
| 446 |
escapeRegexChars(self.setting('cheetahVarStartToken')) |
|---|
| 447 |
+r'(?=[A-Za-z_])' |
|---|
| 448 |
) |
|---|
| 449 |
|
|---|
| 450 |
self.expressionPlaceholderStartRE = re.compile( |
|---|
| 451 |
escCharLookBehind + |
|---|
| 452 |
r'(?P<startToken>' + escapeRegexChars(self.setting('cheetahVarStartToken')) + ')' + |
|---|
| 453 |
r'(?P<cacheToken>' + cacheToken + ')' + |
|---|
| 454 |
|
|---|
| 455 |
r'(?:\{|\(|\[)[ \t\f]*' |
|---|
| 456 |
+ r'(?=[^\)\}\]])' |
|---|
| 457 |
) |
|---|
| 458 |
|
|---|
| 459 |
if self.setting('EOLSlurpToken'): |
|---|
| 460 |
self.EOLSlurpRE = re.compile( |
|---|
| 461 |
escapeRegexChars(self.setting('EOLSlurpToken')) |
|---|
| 462 |
+ r'[ \t\f]*' |
|---|
| 463 |
+ r'(?:'+EOL+')' |
|---|
| 464 |
) |
|---|
| 465 |
else: |
|---|
| 466 |
self.EOLSlurpRE = None |
|---|
| 467 |
|
|---|
| 468 |
|
|---|
| 469 |
def _makeCommentREs(self): |
|---|
| 470 |
"""Construct the regex bits that are used in comment parsing.""" |
|---|
| 471 |
startTokenEsc = escapeRegexChars(self.setting('commentStartToken')) |
|---|
| 472 |
self.commentStartTokenRE = re.compile(escCharLookBehind + startTokenEsc) |
|---|
| 473 |
del startTokenEsc |
|---|
| 474 |
|
|---|
| 475 |
startTokenEsc = escapeRegexChars( |
|---|
| 476 |
self.setting('multiLineCommentStartToken')) |
|---|
| 477 |
endTokenEsc = escapeRegexChars( |
|---|
| 478 |
self.setting('multiLineCommentEndToken')) |
|---|
| 479 |
self.multiLineCommentTokenStartRE = re.compile(escCharLookBehind + |
|---|
| 480 |
startTokenEsc) |
|---|
| 481 |
self.multiLineCommentEndTokenRE = re.compile(escCharLookBehind + |
|---|
| 482 |
endTokenEsc) |
|---|
| 483 |
|
|---|
| 484 |
def _makeDirectiveREs(self): |
|---|
| 485 |
"""Construct the regexs that are used in directive parsing.""" |
|---|
| 486 |
startToken = self.setting('directiveStartToken') |
|---|
| 487 |
endToken = self.setting('directiveEndToken') |
|---|
| 488 |
startTokenEsc = escapeRegexChars(startToken) |
|---|
| 489 |
endTokenEsc = escapeRegexChars(endToken) |
|---|
| 490 |
validSecondCharsLookAhead = r'(?=[A-Za-z_@])' |
|---|
| 491 |
reParts = [escCharLookBehind, startTokenEsc] |
|---|
| 492 |
if self.setting('allowWhitespaceAfterDirectiveStartToken'): |
|---|
| 493 |
reParts.append('[ \t]*') |
|---|
| 494 |
reParts.append(validSecondCharsLookAhead) |
|---|
| 495 |
self.directiveStartTokenRE = re.compile(''.join(reParts)) |
|---|
| 496 |
self.directiveEndTokenRE = re.compile(escCharLookBehind + endTokenEsc) |
|---|
| 497 |
|
|---|
| 498 |
def _makePspREs(self): |
|---|
| 499 |
"""Setup the regexs for PSP parsing.""" |
|---|
| 500 |
startToken = self.setting('PSPStartToken') |
|---|
| 501 |
startTokenEsc = escapeRegexChars(startToken) |
|---|
| 502 |
self.PSPStartTokenRE = re.compile(escCharLookBehind + startTokenEsc) |
|---|
| 503 |
endToken = self.setting('PSPEndToken') |
|---|
| 504 |
endTokenEsc = escapeRegexChars(endToken) |
|---|
| 505 |
self.PSPEndTokenRE = re.compile(escCharLookBehind + endTokenEsc) |
|---|
| 506 |
|
|---|
| 507 |
|
|---|
| 508 |
def isLineClearToStartToken(self, pos=None): |
|---|
| 509 |
return self.isLineClearToPos(pos) |
|---|
| 510 |
|
|---|
| 511 |
def matchTopLevelToken(self): |
|---|
| 512 |
"""Returns the first match found from the following methods: |
|---|
| 513 |
self.matchCommentStartToken |
|---|
| 514 |
self.matchMultiLineCommentStartToken |
|---|
| 515 |
self.matchVariablePlaceholderStart |
|---|
| 516 |
self.matchExpressionPlaceholderStart |
|---|
| 517 |
self.matchDirective |
|---|
| 518 |
self.matchPSPStartToken |
|---|
| 519 |
self.matchEOLSlurpToken |
|---|
| 520 |
|
|---|
| 521 |
Returns None if no match. |
|---|
| 522 |
""" |
|---|
| 523 |
match = None |
|---|
| 524 |
if self.peek() in self._possibleNonStrConstantChars: |
|---|
| 525 |
for matcher in self._nonStrConstMatchers: |
|---|
| 526 |
match = matcher() |
|---|
| 527 |
if match: |
|---|
| 528 |
break |
|---|
| 529 |
return match |
|---|
| 530 |
|
|---|
| 531 |
def matchPyToken(self): |
|---|
| 532 |
match = pseudoprog.match(self.src(), self.pos()) |
|---|
| 533 |
|
|---|
| 534 |
if match and match.group() in tripleQuotedStringStarts: |
|---|
| 535 |
TQSmatch = tripleQuotedStringREs[match.group()].match(self.src(), self.pos()) |
|---|
| 536 |
if TQSmatch: |
|---|
| 537 |
return TQSmatch |
|---|
| 538 |
return match |
|---|
| 539 |
|
|---|
| 540 |
def getPyToken(self): |
|---|
| 541 |
match = self.matchPyToken() |
|---|
| 542 |
if match is None: |
|---|
| 543 |
raise ParseError(self) |
|---|
| 544 |
elif match.group() in tripleQuotedStringStarts: |
|---|
| 545 |
raise ParseError(self, msg='Malformed triple-quoted string') |
|---|
| 546 |
return self.readTo(match.end()) |
|---|
| 547 |
|
|---|
| 548 |
def matchEOLSlurpToken(self): |
|---|
| 549 |
if self.EOLSlurpRE: |
|---|
| 550 |
return self.EOLSlurpRE.match(self.src(), self.pos()) |
|---|
| 551 |
|
|---|
| 552 |
def getEOLSlurpToken(self): |
|---|
| 553 |
match = self.matchEOLSlurpToken() |
|---|
| 554 |
if not match: |
|---|
| 555 |
raise ParseError(self, msg='Invalid EOL slurp token') |
|---|
| 556 |
return self.readTo(match.end()) |
|---|
| 557 |
|
|---|
| 558 |
def matchCommentStartToken(self): |
|---|
| 559 |
return self.commentStartTokenRE.match(self.src(), self.pos()) |
|---|
| 560 |
|
|---|
| 561 |
def getCommentStartToken(self): |
|---|
| 562 |
match = self.matchCommentStartToken() |
|---|
| 563 |
if not match: |
|---|
| 564 |
raise ParseError(self, msg='Invalid single-line comment start token') |
|---|
| 565 |
return self.readTo(match.end()) |
|---|
| 566 |
|
|---|
| 567 |
def matchMultiLineCommentStartToken(self): |
|---|
| 568 |
return self.multiLineCommentTokenStartRE.match(self.src(), self.pos()) |
|---|
| 569 |
|
|---|
| 570 |
def getMultiLineCommentStartToken(self): |
|---|
| 571 |
match = self.matchMultiLineCommentStartToken() |
|---|
| 572 |
if not match: |
|---|
| 573 |
raise ParseError(self, msg='Invalid multi-line comment start token') |
|---|
| 574 |
return self.readTo(match.end()) |
|---|
| 575 |
|
|---|
| 576 |
def matchMultiLineCommentEndToken(self): |
|---|
| 577 |
return self.multiLineCommentEndTokenRE.match(self.src(), self.pos()) |
|---|
| 578 |
|
|---|
| 579 |
def getMultiLineCommentEndToken(self): |
|---|
| 580 |
match = self.matchMultiLineCommentEndToken() |
|---|
| 581 |
if not match: |
|---|
| 582 |
raise ParseError(self, msg='Invalid multi-line comment end token') |
|---|
| 583 |
return self.readTo(match.end()) |
|---|
| 584 |
|
|---|
| 585 |
def getDottedName(self): |
|---|
| 586 |
srcLen = len(self) |
|---|
| 587 |
nameChunks = [] |
|---|
| 588 |
|
|---|
| 589 |
if not self.peek() in identchars: |
|---|
| 590 |
raise ParseError(self) |
|---|
| 591 |
|
|---|
| 592 |
while self.pos() < srcLen: |
|---|
| 593 |
c = self.peek() |
|---|
| 594 |
if c in namechars: |
|---|
| 595 |
nameChunk = self.getIdentifier() |
|---|
| 596 |
nameChunks.append(nameChunk) |
|---|
| 597 |
elif c == '.': |
|---|
| 598 |
if self.pos()+1 <srcLen and self.peek(1) in identchars: |
|---|
| 599 |
nameChunks.append(self.getc()) |
|---|
| 600 |
else: |
|---|
| 601 |
break |
|---|
| 602 |
else: |
|---|
| 603 |
break |
|---|
| 604 |
|
|---|
| 605 |
return ''.join(nameChunks) |
|---|
| 606 |
|
|---|
| 607 |
def matchIdentifier(self): |
|---|
| 608 |
return identRE.match(self.src(), self.pos()) |
|---|
| 609 |
|
|---|
| 610 |
def getIdentifier(self): |
|---|
| 611 |
match = self.matchIdentifier() |
|---|
| 612 |
if not match: |
|---|
| 613 |
raise ParseError(self, msg='Invalid identifier') |
|---|
| 614 |
return self.readTo(match.end()) |
|---|
| 615 |
|
|---|
| 616 |
def matchOperator(self): |
|---|
| 617 |
match = self.matchPyToken() |
|---|
| 618 |
if match and match.group() not in operators: |
|---|
| 619 |
match = None |
|---|
| 620 |
return match |
|---|
| 621 |
|
|---|
| 622 |
def getOperator(self): |
|---|
| 623 |
match = self.matchOperator() |
|---|
| 624 |
if not match: |
|---|
| 625 |
raise ParseError(self, msg='Expected operator') |
|---|
| 626 |
return self.readTo( match.end() ) |
|---|
| 627 |
|
|---|
| 628 |
def matchAssignmentOperator(self): |
|---|
| 629 |
match = self.matchPyToken() |
|---|
| 630 |
if match and match.group() not in assignmentOps: |
|---|
| 631 |
match = None |
|---|
| 632 |
return match |
|---|
| 633 |
|
|---|
| 634 |
def getAssignmentOperator(self): |
|---|
| 635 |
match = self.matchAssignmentOperator() |
|---|
| 636 |
if not match: |
|---|
| 637 |
raise ParseError(self, msg='Expected assignment operator') |
|---|
| 638 |
return self.readTo( match.end() ) |
|---|
| 639 |
|
|---|
| 640 |
def matchDirective(self): |
|---|
| 641 |
"""Returns False or the name of the directive matched. |
|---|
| 642 |
""" |
|---|
| 643 |
startPos = self.pos() |
|---|
| 644 |
if not self.matchDirectiveStartToken(): |
|---|
| 645 |
return False |
|---|
| 646 |
self.getDirectiveStartToken() |
|---|
| 647 |
directiveName = self.matchDirectiveName() |
|---|
| 648 |
self.setPos(startPos) |
|---|
| 649 |
return directiveName |
|---|
| 650 |
|
|---|
| 651 |
def matchDirectiveName(self, directiveNameChars=identchars+'0123456789-@'): |
|---|
| 652 |
startPos = self.pos() |
|---|
| 653 |
directives = self._directiveNamesAndParsers.keys() |
|---|
| 654 |
possibleMatches = [] |
|---|
| 655 |
name = '' |
|---|
| 656 |
while not self.atEnd(): |
|---|
| 657 |
c = self.getc() |
|---|
| 658 |
if not c in directiveNameChars: |
|---|
| 659 |
break |
|---|
| 660 |
name += c |
|---|
| 661 |
if name in directives: |
|---|
| 662 |
possibleMatches.append(name) |
|---|
| 663 |
|
|---|
| 664 |
possibleMatches.sort() |
|---|
| 665 |
possibleMatches.reverse() |
|---|
| 666 |
|
|---|
| 667 |
directiveName = False |
|---|
| 668 |
if possibleMatches: |
|---|
| 669 |
directiveName = possibleMatches[0] |
|---|
| 670 |
|
|---|
| 671 |
self.setPos(startPos) |
|---|
| 672 |
return directiveName |
|---|
| 673 |
|
|---|
| 674 |
def matchDirectiveStartToken(self): |
|---|
| 675 |
return self.directiveStartTokenRE.match(self.src(), self.pos()) |
|---|
| 676 |
|
|---|
| 677 |
def getDirectiveStartToken(self): |
|---|
| 678 |
match = self.matchDirectiveStartToken() |
|---|
| 679 |
if not match: |
|---|
| 680 |
raise ParseError(self, msg='Invalid directive start token') |
|---|
| 681 |
return self.readTo(match.end()) |
|---|
| 682 |
|
|---|
| 683 |
def matchDirectiveEndToken(self): |
|---|
| 684 |
return self.directiveEndTokenRE.match(self.src(), self.pos()) |
|---|
| 685 |
|
|---|
| 686 |
def getDirectiveEndToken(self): |
|---|
| 687 |
match = self.matchDirectiveEndToken() |
|---|
| 688 |
if not match: |
|---|
| 689 |
raise ParseError(self, msg='Invalid directive end token') |
|---|
| 690 |
return self.readTo(match.end()) |
|---|
| 691 |
|
|---|
| 692 |
|
|---|
| 693 |
def matchColonForSingleLineShortFormDirective(self): |
|---|
| 694 |
if not self.atEnd() and self.peek()==':': |
|---|
| 695 |
restOfLine = self[self.pos()+1:self.findEOL()] |
|---|
| 696 |
restOfLine = restOfLine.strip() |
|---|
| 697 |
if not restOfLine: |
|---|
| 698 |
return False |
|---|
| 699 |
elif self.commentStartTokenRE.match(restOfLine): |
|---|
| 700 |
return False |
|---|
| 701 |
else: |
|---|
| 702 |
return True |
|---|
| 703 |
return False |
|---|
| 704 |
|
|---|
| 705 |
def matchPSPStartToken(self): |
|---|
| 706 |
return self.PSPStartTokenRE.match(self.src(), self.pos()) |
|---|
| 707 |
|
|---|
| 708 |
def matchPSPEndToken(self): |
|---|
| 709 |
return self.PSPEndTokenRE.match(self.src(), self.pos()) |
|---|
| 710 |
|
|---|
| 711 |
def getPSPStartToken(self): |
|---|
| 712 |
match = self.matchPSPStartToken() |
|---|
| 713 |
if not match: |
|---|
| 714 |
raise ParseError(self, msg='Invalid psp start token') |
|---|
| 715 |
return self.readTo(match.end()) |
|---|
| 716 |
|
|---|
| 717 |
def getPSPEndToken(self): |
|---|
| 718 |
match = self.matchPSPEndToken() |
|---|
| 719 |
if not match: |
|---|
| 720 |
raise ParseError(self, msg='Invalid psp end token') |
|---|
| 721 |
return self.readTo(match.end()) |
|---|
| 722 |
|
|---|
| 723 |
def matchCheetahVarStart(self): |
|---|
| 724 |
"""includes the enclosure and cache token""" |
|---|
| 725 |
return self.cheetahVarStartRE.match(self.src(), self.pos()) |
|---|
| 726 |
|
|---|
| 727 |
def matchCheetahVarStartToken(self): |
|---|
| 728 |
"""includes the enclosure and cache token""" |
|---|
| 729 |
return self.cheetahVarStartTokenRE.match(self.src(), self.pos()) |
|---|
| 730 |
|
|---|
| 731 |
def matchCheetahVarInExpressionStartToken(self): |
|---|
| 732 |
"""no enclosures or cache tokens allowed""" |
|---|
| 733 |
return self.cheetahVarInExpressionStartTokenRE.match(self.src(), self.pos()) |
|---|
| 734 |
|
|---|
| 735 |
def matchVariablePlaceholderStart(self): |
|---|
| 736 |
"""includes the enclosure and cache token""" |
|---|
| 737 |
return self.cheetahVarStartRE.match(self.src(), self.pos()) |
|---|
| 738 |
|
|---|
| 739 |
def matchExpressionPlaceholderStart(self): |
|---|
| 740 |
"""includes the enclosure and cache token""" |
|---|
| 741 |
return self.expressionPlaceholderStartRE.match(self.src(), self.pos()) |
|---|
| 742 |
|
|---|
| 743 |
def getCheetahVarStartToken(self): |
|---|
| 744 |
"""just the start token, not the enclosure or cache token""" |
|---|
| 745 |
match = self.matchCheetahVarStartToken() |
|---|
| 746 |
if not match: |
|---|
| 747 |
raise ParseError(self, msg='Expected Cheetah $var start token') |
|---|
| 748 |
return self.readTo( match.end() ) |
|---|
| 749 |
|
|---|
| 750 |
|
|---|
| 751 |
def getCacheToken(self): |
|---|
| 752 |
try: |
|---|
| 753 |
token = self.cacheTokenRE.match(self.src(), self.pos()) |
|---|
| 754 |
self.setPos( token.end() ) |
|---|
| 755 |
return token.group() |
|---|
| 756 |
except: |
|---|
| 757 |
raise ParseError(self, msg='Expected cache token') |
|---|
| 758 |
|
|---|
| 759 |
def getSilentPlaceholderToken(self): |
|---|
| 760 |
try: |
|---|
| 761 |
token = self.silentPlaceholderTokenRE.match(self.src(), self.pos()) |
|---|
| 762 |
self.setPos( token.end() ) |
|---|
| 763 |
return token.group() |
|---|
| 764 |
except: |
|---|
| 765 |
raise ParseError(self, msg='Expected silent placeholder token') |
|---|
| 766 |
|
|---|
| 767 |
|
|---|
| 768 |
|
|---|
| 769 |
def getTargetVarsList(self): |
|---|
| 770 |
varnames = [] |
|---|
| 771 |
while not self.atEnd(): |
|---|
| 772 |
if self.peek() in ' \t\f': |
|---|
| 773 |
self.getWhiteSpace() |
|---|
| 774 |
elif self.peek() in '\r\n': |
|---|
| 775 |
break |
|---|
| 776 |
elif self.startswith(','): |
|---|
| 777 |
self.advance() |
|---|
| 778 |
elif self.startswith('in ') or self.startswith('in\t'): |
|---|
| 779 |
break |
|---|
| 780 |
|
|---|
| 781 |
elif self.matchCheetahVarInExpressionStartToken(): |
|---|
| 782 |
self.getCheetahVarStartToken() |
|---|
| 783 |
self.getSilentPlaceholderToken() |
|---|
| 784 |
self.getCacheToken() |
|---|
| 785 |
varnames.append( self.getDottedName() ) |
|---|
| 786 |
elif self.matchIdentifier(): |
|---|
| 787 |
varnames.append( self.getDottedName() ) |
|---|
| 788 |
else: |
|---|
| 789 |
break |
|---|
| 790 |
return varnames |
|---|
| 791 |
|
|---|
| 792 |
def getCheetahVar(self, plain=False, skipStartToken=False): |
|---|
| 793 |
"""This is called when parsing inside expressions. Cache tokens are only |
|---|
| 794 |
valid in placeholders so this method discards any cache tokens found. |
|---|
| 795 |
|
|---|