Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.1" 
  61  __versionTime__ = "21 Mar 2016 05:04 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75  import traceback 
  76   
  77  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  78   
  79  __all__ = [ 
  80  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  81  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  82  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  83  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  84  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  85  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  86  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  87  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  88  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  89  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  90  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  91  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  92  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  93  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  94  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  95  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  96  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  97  ] 
  98   
  99  PY_3 = sys.version.startswith('3') 
 100  if PY_3: 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103      unichr = chr 
 104      _ustr = str 
 105   
 106      # build list of single arg builtins, that can be used as parse actions 
 107      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 108   
 109  else: 
 110      _MAX_INT = sys.maxint 
 111      range = xrange 
112 113 - def _ustr(obj):
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 116 then < returns the unicode object | encodes it with the default encoding | ... >. 117 """ 118 if isinstance(obj,unicode): 119 return obj 120 121 try: 122 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 123 # it won't break any existing code. 124 return str(obj) 125 126 except UnicodeEncodeError: 127 # Else encode it 128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 129 xmlcharref = Regex('&#\d+;') 130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 131 return xmlcharref.transformString(ret)
132 133 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 134 singleArgBuiltins = [] 135 import __builtin__ 136 for fname in "sum len sorted reversed list tuple set any all min max".split(): 137 try: 138 singleArgBuiltins.append(getattr(__builtin__,fname)) 139 except AttributeError: 140 continue 141 142 _generatorType = type((y for y in range(1)))
143 144 -def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data.""" 146 147 # ampersand must be replaced first 148 from_symbols = '&><"\'' 149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 150 for from_,to_ in zip(from_symbols, to_symbols): 151 data = data.replace(from_, to_) 152 return data
153
154 -class _Constants(object):
155 pass
156 157 alphas = string.ascii_uppercase + string.ascii_lowercase 158 nums = "0123456789" 159 hexnums = nums + "ABCDEFabcdef" 160 alphanums = alphas + nums 161 _bslash = chr(92) 162 printables = "".join(c for c in string.printable if c not in string.whitespace)
163 164 -class ParseBaseException(Exception):
165 """base exception class for all parsing runtime exceptions""" 166 # Performance tuning: we construct a *lot* of these, so keep this 167 # constructor as small and fast as possible
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc 170 if msg is None: 171 self.msg = pstr 172 self.pstr = "" 173 else: 174 self.msg = msg 175 self.pstr = pstr 176 self.parserElement = elem
177
178 - def __getattr__( self, aname ):
179 """supported attributes by name are: 180 - lineno - returns the line number of the exception text 181 - col - returns the column number of the exception text 182 - line - returns the line containing the exception text 183 """ 184 if( aname == "lineno" ): 185 return lineno( self.loc, self.pstr ) 186 elif( aname in ("col", "column") ): 187 return col( self.loc, self.pstr ) 188 elif( aname == "line" ): 189 return line( self.loc, self.pstr ) 190 else: 191 raise AttributeError(aname)
192
193 - def __str__( self ):
194 return "%s (at char %d), (line:%d, col:%d)" % \ 195 ( self.msg, self.loc, self.lineno, self.column )
196 - def __repr__( self ):
197 return _ustr(self)
198 - def markInputline( self, markerString = ">!<" ):
199 """Extracts the exception line from the input string, and marks 200 the location of the exception with a special symbol. 201 """ 202 line_str = self.line 203 line_column = self.column - 1 204 if markerString: 205 line_str = "".join((line_str[:line_column], 206 markerString, line_str[line_column:])) 207 return line_str.strip()
208 - def __dir__(self):
209 return "lineno col line".split() + dir(type(self))
210
211 -class ParseException(ParseBaseException):
212 """exception thrown when parse expressions don't match class; 213 supported attributes by name are: 214 - lineno - returns the line number of the exception text 215 - col - returns the column number of the exception text 216 - line - returns the line containing the exception text 217 """ 218 pass
219
220 -class ParseFatalException(ParseBaseException):
221 """user-throwable exception thrown when inconsistent parse content 222 is found; stops all parsing immediately""" 223 pass
224
225 -class ParseSyntaxException(ParseFatalException):
226 """just like C{L{ParseFatalException}}, but thrown internally when an 227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 228 an unbacktrackable syntax error has been found"""
229 - def __init__(self, pe):
230 super(ParseSyntaxException, self).__init__( 231 pe.pstr, pe.loc, pe.msg, pe.parserElement)
232
233 #~ class ReparseException(ParseBaseException): 234 #~ """Experimental class - parse actions can raise this exception to cause 235 #~ pyparsing to reparse the input string: 236 #~ - with a modified input string, and/or 237 #~ - with a modified start location 238 #~ Set the values of the ReparseException in the constructor, and raise the 239 #~ exception in a parse action to cause pyparsing to use the new string/location. 240 #~ Setting the values as None causes no change to be made. 241 #~ """ 242 #~ def __init_( self, newstring, restartLoc ): 243 #~ self.newParseText = newstring 244 #~ self.reparseLoc = restartLoc 245 246 -class RecursiveGrammarException(Exception):
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
251 - def __str__( self ):
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
254 -class _ParseResultsWithOffset(object):
255 - def __init__(self,p1,p2):
256 self.tup = (p1,p2)
257 - def __getitem__(self,i):
258 return self.tup[i]
259 - def __repr__(self):
260 return repr(self.tup)
261 - def setOffset(self,i):
262 self.tup = (self.tup[0],i)
263
264 -class ParseResults(object):
265 """Structured parse results, to provide multiple means of access to the parsed data: 266 - as a list (C{len(results)}) 267 - by list index (C{results[0], results[1]}, etc.) 268 - by attribute (C{results.<resultsName>}) 269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls): 272 return toklist 273 retobj = object.__new__(cls) 274 retobj.__doinit = True 275 return retobj
276 277 # Performance tuning: we construct a *lot* of these, so keep this 278 # constructor as small and fast as possible
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit: 281 self.__doinit = False 282 self.__name = None 283 self.__parent = None 284 self.__accumNames = {} 285 self.__asList = asList 286 self.__modal = modal 287 if toklist is None: 288 toklist = [] 289 if isinstance(toklist, list): 290 self.__toklist = toklist[:] 291 elif isinstance(toklist, _generatorType): 292 self.__toklist = list(toklist) 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,(int,slice)): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 #~ for name in self.__tokdict: 355 #~ occurrences = self.__tokdict[name] 356 #~ for j in removed: 357 #~ for k, (value, position) in enumerate(occurrences): 358 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 for name,occurrences in self.__tokdict.items(): 360 for j in removed: 361 for k, (value, position) in enumerate(occurrences): 362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 363 else: 364 del self.__tokdict[i]
365
366 - def __contains__( self, k ):
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
374 - def iterkeys( self ):
375 """Returns all named result keys.""" 376 if hasattr(self.__tokdict, "iterkeys"): 377 return self.__tokdict.iterkeys() 378 else: 379 return iter(self.__tokdict)
380
381 - def itervalues( self ):
382 """Returns all named result values.""" 383 return (self[k] for k in self.iterkeys())
384
385 - def iteritems( self ):
386 return ((k, self[k]) for k in self.iterkeys())
387 388 if PY_3: 389 keys = iterkeys 390 values = itervalues 391 items = iteritems 392 else:
393 - def keys( self ):
394 """Returns all named result keys.""" 395 return list(self.iterkeys())
396
397 - def values( self ):
398 """Returns all named result values.""" 399 return list(self.itervalues())
400
401 - def items( self ):
402 """Returns all named result keys and values as a list of tuples.""" 403 return list(self.iteritems())
404
405 - def haskeys( self ):
406 """Since keys() returns an iterator, this method is helpful in bypassing 407 code that looks for the existence of any defined results names.""" 408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last). 412 Supports both list and dict semantics for pop(). If passed no 413 argument or an integer argument, it will use list semantics 414 and pop tokens from the list of parsed tokens. If passed a 415 non-integer argument (most likely a string), it will use dict 416 semantics and pop the corresponding value from any defined 417 results names. A second default return value argument is 418 supported, just as in dict.pop().""" 419 if not args: 420 args = [-1] 421 for k,v in kwargs.items(): 422 if k == 'default': 423 args = (args[0], v) 424 else: 425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 426 if (isinstance(args[0], int) or 427 len(args) == 1 or 428 args[0] in self): 429 index = args[0] 430 ret = self[index] 431 del self[index] 432 return ret 433 else: 434 defaultvalue = args[1] 435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no 439 such name, then returns the given C{defaultValue} or C{None} if no 440 C{defaultValue} is specified.""" 441 if key in self: 442 return self[key] 443 else: 444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens.""" 448 self.__toklist.insert(index, insStr) 449 # fixup indices in token dictionary 450 #~ for name in self.__tokdict: 451 #~ occurrences = self.__tokdict[name] 452 #~ for k, (value, position) in enumerate(occurrences): 453 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 454 for name,occurrences in self.__tokdict.items(): 455 for k, (value, position) in enumerate(occurrences): 456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
458 - def append( self, item ):
459 """Add single element to end of ParseResults list of elements.""" 460 self.__toklist.append(item)
461
462 - def extend( self, itemseq ):
463 """Add sequence of elements to end of ParseResults list of elements.""" 464 if isinstance(itemseq, ParseResults): 465 self += itemseq 466 else: 467 self.__toklist.extend(itemseq)
468
469 - def clear( self ):
470 """Clear all elements and results names.""" 471 del self.__toklist[:] 472 self.__tokdict.clear()
473
474 - def __getattr__( self, name ):
475 try: 476 return self[name] 477 except KeyError: 478 return "" 479 480 if name in self.__tokdict: 481 if name not in self.__accumNames: 482 return self.__tokdict[name][-1][0] 483 else: 484 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 485 else: 486 return ""
487
488 - def __add__( self, other ):
489 ret = self.copy() 490 ret += other 491 return ret
492
493 - def __iadd__( self, other ):
494 if other.__tokdict: 495 offset = len(self.__toklist) 496 addoffset = lambda a: offset if a<0 else a+offset 497 otheritems = other.__tokdict.items() 498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 499 for (k,vlist) in otheritems for v in vlist] 500 for k,v in otherdictitems: 501 self[k] = v 502 if isinstance(v[0],ParseResults): 503 v[0].__parent = wkref(self) 504 505 self.__toklist += other.__toklist 506 self.__accumNames.update( other.__accumNames ) 507 return self
508
509 - def __radd__(self, other):
510 if isinstance(other,int) and other == 0: 511 # useful for merging many ParseResults using sum() builtin 512 return self.copy() 513 else: 514 # this may raise a TypeError - so be it 515 return other + self
516
517 - def __repr__( self ):
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
520 - def __str__( self ):
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
523 - def _asStringList( self, sep='' ):
524 out = [] 525 for item in self.__toklist: 526 if out and sep: 527 out.append(sep) 528 if isinstance( item, ParseResults ): 529 out += item._asStringList() 530 else: 531 out.append( _ustr(item) ) 532 return out
533
534 - def asList( self ):
535 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
538 - def asDict( self ):
539 """Returns the named parse results as a nested dictionary.""" 540 if PY_3: 541 item_fn = self.items 542 else: 543 item_fn = self.iteritems 544 545 def toItem(obj): 546 if isinstance(obj, ParseResults): 547 if obj.haskeys(): 548 return obj.asDict() 549 else: 550 return [toItem(v) for v in obj] 551 else: 552 return obj
553 554 return dict((k,toItem(v)) for k,v in item_fn())
555
556 - def copy( self ):
557 """Returns a new copy of a C{ParseResults} object.""" 558 ret = ParseResults( self.__toklist ) 559 ret.__tokdict = self.__tokdict.copy() 560 ret.__parent = self.__parent 561 ret.__accumNames.update( self.__accumNames ) 562 ret.__name = self.__name 563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 567 nl = "\n" 568 out = [] 569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 570 for v in vlist) 571 nextLevelIndent = indent + " " 572 573 # collapse out indents if formatting is not desired 574 if not formatted: 575 indent = "" 576 nextLevelIndent = "" 577 nl = "" 578 579 selfTag = None 580 if doctag is not None: 581 selfTag = doctag 582 else: 583 if self.__name: 584 selfTag = self.__name 585 586 if not selfTag: 587 if namedItemsOnly: 588 return "" 589 else: 590 selfTag = "ITEM" 591 592 out += [ nl, indent, "<", selfTag, ">" ] 593 594 for i,res in enumerate(self.__toklist): 595 if isinstance(res,ParseResults): 596 if i in namedItems: 597 out += [ res.asXML(namedItems[i], 598 namedItemsOnly and doctag is None, 599 nextLevelIndent, 600 formatted)] 601 else: 602 out += [ res.asXML(None, 603 namedItemsOnly and doctag is None, 604 nextLevelIndent, 605 formatted)] 606 else: 607 # individual token, see if there is a name for it 608 resTag = None 609 if i in namedItems: 610 resTag = namedItems[i] 611 if not resTag: 612 if namedItemsOnly: 613 continue 614 else: 615 resTag = "ITEM" 616 xmlBodyText = _xml_escape(_ustr(res)) 617 out += [ nl, nextLevelIndent, "<", resTag, ">", 618 xmlBodyText, 619 "</", resTag, ">" ] 620 621 out += [ nl, indent, "</", selfTag, ">" ] 622 return "".join(out)
623
624 - def __lookup(self,sub):
625 for k,vlist in self.__tokdict.items(): 626 for v,loc in vlist: 627 if sub is v: 628 return k 629 return None
630
631 - def getName(self):
632 """Returns the results name for this token expression.""" 633 if self.__name: 634 return self.__name 635 elif self.__parent: 636 par = self.__parent() 637 if par: 638 return par.__lookup(self) 639 else: 640 return None 641 elif (len(self) == 1 and 642 len(self.__tokdict) == 1 and 643 self.__tokdict.values()[0][0][1] in (0,-1)): 644 return self.__tokdict.keys()[0] 645 else: 646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}. 650 Accepts an optional C{indent} argument so that this string can be embedded 651 in a nested display of other data.""" 652 out = [] 653 NL = '\n' 654 out.append( indent+_ustr(self.asList()) ) 655 if self.haskeys(): 656 items = sorted(self.items()) 657 for k,v in items: 658 if out: 659 out.append(NL) 660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 661 if isinstance(v,ParseResults): 662 if v: 663 out.append( v.dump(indent,depth+1) ) 664 else: 665 out.append(_ustr(v)) 666 else: 667 out.append(_ustr(v)) 668 elif any(isinstance(vv,ParseResults) for vv in self): 669 v = self 670 for i,vv in enumerate(v): 671 if isinstance(vv,ParseResults): 672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 673 else: 674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 675 676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module. 680 Accepts additional positional or keyword args as defined for the 681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 682 pprint.pprint(self.asList(), *args, **kwargs)
683 684 # add support for pickle protocol
685 - def __getstate__(self):
686 return ( self.__toklist, 687 ( self.__tokdict.copy(), 688 self.__parent is not None and self.__parent() or None, 689 self.__accumNames, 690 self.__name ) )
691
692 - def __setstate__(self,state):
693 self.__toklist = state[0] 694 (self.__tokdict, 695 par, 696 inAccumNames, 697 self.__name) = state[1] 698 self.__accumNames = {} 699 self.__accumNames.update(inAccumNames) 700 if par is not None: 701 self.__parent = wkref(par) 702 else: 703 self.__parent = None
704
705 - def __getnewargs__(self):
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
708 - def __dir__(self):
709 return (dir(type(self)) + list(self.keys()))
710 711 collections.MutableMapping.register(ParseResults)
712 713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators. 715 The first column is number 1. 716 717 Note: the default parsing behavior is to expand tabs in the input string 718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 720 consistent view of the parsed string, the parse location, and line and column 721 positions within the parsed string. 722 """ 723 s = strg 724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
726 -def lineno(loc,strg):
727 """Returns current line number within a string, counting newlines as line separators. 728 The first line is number 1. 729 730 Note: the default parsing behavior is to expand tabs in the input string 731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 733 consistent view of the parsed string, the parse location, and line and column 734 positions within the parsed string. 735 """ 736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators. 740 """ 741 lastCR = strg.rfind("\n", 0, loc) 742 nextCR = strg.find("\n", loc) 743 if nextCR >= 0: 744 return strg[lastCR+1:nextCR] 745 else: 746 return strg[lastCR+1:]
747
748 -def _defaultStartDebugAction( instring, loc, expr ):
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
751 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
754 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
755 print ("Exception raised:" + _ustr(exc))
756
757 -def nullDebugAction(*args):
758 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 759 pass
760 761 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 762 #~ 'decorator to trim function calls to match the arity of the target' 763 #~ def _trim_arity(func, maxargs=3): 764 #~ if func in singleArgBuiltins: 765 #~ return lambda s,l,t: func(t) 766 #~ limit = 0 767 #~ foundArity = False 768 #~ def wrapper(*args): 769 #~ nonlocal limit,foundArity 770 #~ while 1: 771 #~ try: 772 #~ ret = func(*args[limit:]) 773 #~ foundArity = True 774 #~ return ret 775 #~ except TypeError: 776 #~ if limit == maxargs or foundArity: 777 #~ raise 778 #~ limit += 1 779 #~ continue 780 #~ return wrapper 781 782 # this version is Python 2.x-3.x cross-compatible 783 'decorator to trim function calls to match the arity of the target'
784 -def _trim_arity(func, maxargs=2):
785 if func in singleArgBuiltins: 786 return lambda s,l,t: func(t) 787 limit = [0] 788 foundArity = [False] 789 def wrapper(*args): 790 while 1: 791 try: 792 ret = func(*args[limit[0]:]) #~@$^*)+_(&%#!=-`~;:"[]{} 793 foundArity[0] = True 794 return ret 795 except TypeError: 796 # re-raise TypeErrors if they did not come from our arity testing 797 if foundArity[0]: 798 raise 799 else: 800 try: 801 tb = sys.exc_info()[-1] 802 exc_source_line = traceback.extract_tb(tb)[-1][-1] 803 if not exc_source_line.endswith('#~@$^*)+_(&%#!=-`~;:"[]{}'): 804 raise 805 finally: 806 del tb 807 808 if limit[0] <= maxargs: 809 limit[0] += 1 810 continue 811 raise
812 return wrapper 813
814 -class ParserElement(object):
815 """Abstract base level parser element class.""" 816 DEFAULT_WHITE_CHARS = " \n\t\r" 817 verbose_stacktrace = False 818 819 @staticmethod
820 - def setDefaultWhitespaceChars( chars ):
821 """Overrides the default whitespace chars 822 """ 823 ParserElement.DEFAULT_WHITE_CHARS = chars
824 825 @staticmethod
826 - def inlineLiteralsUsing(cls):
827 """ 828 Set class to be used for inclusion of string literals into a parser. 829 """ 830 ParserElement.literalStringClass = cls
831
832 - def __init__( self, savelist=False ):
833 self.parseAction = list() 834 self.failAction = None 835 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 836 self.strRepr = None 837 self.resultsName = None 838 self.saveAsList = savelist 839 self.skipWhitespace = True 840 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 841 self.copyDefaultWhiteChars = True 842 self.mayReturnEmpty = False # used when checking for left-recursion 843 self.keepTabs = False 844 self.ignoreExprs = list() 845 self.debug = False 846 self.streamlined = False 847 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 848 self.errmsg = "" 849 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 850 self.debugActions = ( None, None, None ) #custom debug actions 851 self.re = None 852 self.callPreparse = True # used to avoid redundant calls to preParse 853 self.callDuringTry = False
854
855 - def copy( self ):
856 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 857 for the same parsing pattern, using copies of the original parse element.""" 858 cpy = copy.copy( self ) 859 cpy.parseAction = self.parseAction[:] 860 cpy.ignoreExprs = self.ignoreExprs[:] 861 if self.copyDefaultWhiteChars: 862 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 863 return cpy
864
865 - def setName( self, name ):
866 """Define name for this expression, for use in debugging.""" 867 self.name = name 868 self.errmsg = "Expected " + self.name 869 if hasattr(self,"exception"): 870 self.exception.msg = self.errmsg 871 return self
872
873 - def setResultsName( self, name, listAllMatches=False ):
874 """Define name for referencing matching tokens as a nested attribute 875 of the returned parse results. 876 NOTE: this returns a *copy* of the original C{ParserElement} object; 877 this is so that the client can define a basic element, such as an 878 integer, and reference it in multiple places with different names. 879 880 You can also set results names using the abbreviated syntax, 881 C{expr("name")} in place of C{expr.setResultsName("name")} - 882 see L{I{__call__}<__call__>}. 883 """ 884 newself = self.copy() 885 if name.endswith("*"): 886 name = name[:-1] 887 listAllMatches=True 888 newself.resultsName = name 889 newself.modalResults = not listAllMatches 890 return newself
891
892 - def setBreak(self,breakFlag = True):
893 """Method to invoke the Python pdb debugger when this element is 894 about to be parsed. Set C{breakFlag} to True to enable, False to 895 disable. 896 """ 897 if breakFlag: 898 _parseMethod = self._parse 899 def breaker(instring, loc, doActions=True, callPreParse=True): 900 import pdb 901 pdb.set_trace() 902 return _parseMethod( instring, loc, doActions, callPreParse )
903 breaker._originalParseMethod = _parseMethod 904 self._parse = breaker 905 else: 906 if hasattr(self._parse,"_originalParseMethod"): 907 self._parse = self._parse._originalParseMethod 908 return self
909
910 - def setParseAction( self, *fns, **kwargs ):
911 """Define action to perform when successfully matching parse element definition. 912 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 913 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 914 - s = the original string being parsed (see note below) 915 - loc = the location of the matching substring 916 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 917 If the functions in fns modify the tokens, they can return them as the return 918 value from fn, and the modified list of tokens will replace the original. 919 Otherwise, fn does not need to return any value. 920 921 Note: the default parsing behavior is to expand tabs in the input string 922 before starting the parsing process. See L{I{parseString}<parseString>} for more information 923 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 924 consistent view of the parsed string, the parse location, and line and column 925 positions within the parsed string. 926 """ 927 self.parseAction = list(map(_trim_arity, list(fns))) 928 self.callDuringTry = kwargs.get("callDuringTry", False) 929 return self
930
931 - def addParseAction( self, *fns, **kwargs ):
932 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 933 self.parseAction += list(map(_trim_arity, list(fns))) 934 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 935 return self
936
937 - def addCondition(self, *fns, **kwargs):
938 """Add a boolean predicate function to expression's list of parse actions. See 939 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 940 be used to define a custom message to be used in the raised exception.""" 941 msg = kwargs.get("message") or "failed user-defined condition" 942 for fn in fns: 943 def pa(s,l,t): 944 if not bool(_trim_arity(fn)(s,l,t)): 945 raise ParseException(s,l,msg) 946 return t
947 self.parseAction.append(pa) 948 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 949 return self 950
951 - def setFailAction( self, fn ):
952 """Define action to perform if parsing fails at this expression. 953 Fail acton fn is a callable function that takes the arguments 954 C{fn(s,loc,expr,err)} where: 955 - s = string being parsed 956 - loc = location where expression match was attempted and failed 957 - expr = the parse expression that failed 958 - err = the exception thrown 959 The function returns no value. It may throw C{L{ParseFatalException}} 960 if it is desired to stop parsing immediately.""" 961 self.failAction = fn 962 return self
963
964 - def _skipIgnorables( self, instring, loc ):
965 exprsFound = True 966 while exprsFound: 967 exprsFound = False 968 for e in self.ignoreExprs: 969 try: 970 while 1: 971 loc,dummy = e._parse( instring, loc ) 972 exprsFound = True 973 except ParseException: 974 pass 975 return loc
976
977 - def preParse( self, instring, loc ):
978 if self.ignoreExprs: 979 loc = self._skipIgnorables( instring, loc ) 980 981 if self.skipWhitespace: 982 wt = self.whiteChars 983 instrlen = len(instring) 984 while loc < instrlen and instring[loc] in wt: 985 loc += 1 986 987 return loc
988
989 - def parseImpl( self, instring, loc, doActions=True ):
990 return loc, []
991
992 - def postParse( self, instring, loc, tokenlist ):
993 return tokenlist
994 995 #~ @profile
996 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
997 debugging = ( self.debug ) #and doActions ) 998 999 if debugging or self.failAction: 1000 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1001 if (self.debugActions[0] ): 1002 self.debugActions[0]( instring, loc, self ) 1003 if callPreParse and self.callPreparse: 1004 preloc = self.preParse( instring, loc ) 1005 else: 1006 preloc = loc 1007 tokensStart = preloc 1008 try: 1009 try: 1010 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1011 except IndexError: 1012 raise ParseException( instring, len(instring), self.errmsg, self ) 1013 except ParseBaseException as err: 1014 #~ print ("Exception raised:", err) 1015 if self.debugActions[2]: 1016 self.debugActions[2]( instring, tokensStart, self, err ) 1017 if self.failAction: 1018 self.failAction( instring, tokensStart, self, err ) 1019 raise 1020 else: 1021 if callPreParse and self.callPreparse: 1022 preloc = self.preParse( instring, loc ) 1023 else: 1024 preloc = loc 1025 tokensStart = preloc 1026 if self.mayIndexError or loc >= len(instring): 1027 try: 1028 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1029 except IndexError: 1030 raise ParseException( instring, len(instring), self.errmsg, self ) 1031 else: 1032 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1033 1034 tokens = self.postParse( instring, loc, tokens ) 1035 1036 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1037 if self.parseAction and (doActions or self.callDuringTry): 1038 if debugging: 1039 try: 1040 for fn in self.parseAction: 1041 tokens = fn( instring, tokensStart, retTokens ) 1042 if tokens is not None: 1043 retTokens = ParseResults( tokens, 1044 self.resultsName, 1045 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1046 modal=self.modalResults ) 1047 except ParseBaseException as err: 1048 #~ print "Exception raised in user parse action:", err 1049 if (self.debugActions[2] ): 1050 self.debugActions[2]( instring, tokensStart, self, err ) 1051 raise 1052 else: 1053 for fn in self.parseAction: 1054 tokens = fn( instring, tokensStart, retTokens ) 1055 if tokens is not None: 1056 retTokens = ParseResults( tokens, 1057 self.resultsName, 1058 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1059 modal=self.modalResults ) 1060 1061 if debugging: 1062 #~ print ("Matched",self,"->",retTokens.asList()) 1063 if (self.debugActions[1] ): 1064 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1065 1066 return loc, retTokens
1067
1068 - def tryParse( self, instring, loc ):
1069 try: 1070 return self._parse( instring, loc, doActions=False )[0] 1071 except ParseFatalException: 1072 raise ParseException( instring, loc, self.errmsg, self)
1073
1074 - def canParseNext(self, instring, loc):
1075 try: 1076 self.tryParse(instring, loc) 1077 except (ParseException, IndexError): 1078 return False 1079 else: 1080 return True
1081 1082 # this method gets repeatedly called during backtracking with the same arguments - 1083 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1084 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1085 lookup = (self,instring,loc,callPreParse,doActions) 1086 if lookup in ParserElement._exprArgCache: 1087 value = ParserElement._exprArgCache[ lookup ] 1088 if isinstance(value, Exception): 1089 raise value 1090 return (value[0],value[1].copy()) 1091 else: 1092 try: 1093 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1094 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1095 return value 1096 except ParseBaseException as pe: 1097 pe.__traceback__ = None 1098 ParserElement._exprArgCache[ lookup ] = pe 1099 raise
1100 1101 _parse = _parseNoCache 1102 1103 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1104 _exprArgCache = {} 1105 @staticmethod
1106 - def resetCache():
1107 ParserElement._exprArgCache.clear()
1108 1109 _packratEnabled = False 1110 @staticmethod
1111 - def enablePackrat():
1112 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1113 Repeated parse attempts at the same string location (which happens 1114 often in many complex grammars) can immediately return a cached value, 1115 instead of re-executing parsing/validating code. Memoizing is done of 1116 both valid results and parsing exceptions. 1117 1118 This speedup may break existing programs that use parse actions that 1119 have side-effects. For this reason, packrat parsing is disabled when 1120 you first import pyparsing. To activate the packrat feature, your 1121 program must call the class method C{ParserElement.enablePackrat()}. If 1122 your program uses C{psyco} to "compile as you go", you must call 1123 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1124 Python will crash. For best results, call C{enablePackrat()} immediately 1125 after importing pyparsing. 1126 """ 1127 if not ParserElement._packratEnabled: 1128 ParserElement._packratEnabled = True 1129 ParserElement._parse = ParserElement._parseCache
1130
1131 - def parseString( self, instring, parseAll=False ):
1132 """Execute the parse expression with the given string. 1133 This is the main interface to the client code, once the complete 1134 expression has been built. 1135 1136 If you want the grammar to require that the entire input string be 1137 successfully parsed, then set C{parseAll} to True (equivalent to ending 1138 the grammar with C{L{StringEnd()}}). 1139 1140 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1141 in order to report proper column numbers in parse actions. 1142 If the input string contains tabs and 1143 the grammar uses parse actions that use the C{loc} argument to index into the 1144 string being parsed, you can ensure you have a consistent view of the input 1145 string by: 1146 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1147 (see L{I{parseWithTabs}<parseWithTabs>}) 1148 - define your parse action using the full C{(s,loc,toks)} signature, and 1149 reference the input string using the parse action's C{s} argument 1150 - explictly expand the tabs in your input string before calling 1151 C{parseString} 1152 """ 1153 ParserElement.resetCache() 1154 if not self.streamlined: 1155 self.streamline() 1156 #~ self.saveAsList = True 1157 for e in self.ignoreExprs: 1158 e.streamline() 1159 if not self.keepTabs: 1160 instring = instring.expandtabs() 1161 try: 1162 loc, tokens = self._parse( instring, 0 ) 1163 if parseAll: 1164 loc = self.preParse( instring, loc ) 1165 se = Empty() + StringEnd() 1166 se._parse( instring, loc ) 1167 except ParseBaseException as exc: 1168 if ParserElement.verbose_stacktrace: 1169 raise 1170 else: 1171 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1172 raise exc 1173 else: 1174 return tokens
1175
1176 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1177 """Scan the input string for expression matches. Each match will return the 1178 matching tokens, start location, and end location. May be called with optional 1179 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1180 C{overlap} is specified, then overlapping matches will be reported. 1181 1182 Note that the start and end locations are reported relative to the string 1183 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1184 strings with embedded tabs.""" 1185 if not self.streamlined: 1186 self.streamline() 1187 for e in self.ignoreExprs: 1188 e.streamline() 1189 1190 if not self.keepTabs: 1191 instring = _ustr(instring).expandtabs() 1192 instrlen = len(instring) 1193 loc = 0 1194 preparseFn = self.preParse 1195 parseFn = self._parse 1196 ParserElement.resetCache() 1197 matches = 0 1198 try: 1199 while loc <= instrlen and matches < maxMatches: 1200 try: 1201 preloc = preparseFn( instring, loc ) 1202 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1203 except ParseException: 1204 loc = preloc+1 1205 else: 1206 if nextLoc > loc: 1207 matches += 1 1208 yield tokens, preloc, nextLoc 1209 if overlap: 1210 nextloc = preparseFn( instring, loc ) 1211 if nextloc > loc: 1212 loc = nextLoc 1213 else: 1214 loc += 1 1215 else: 1216 loc = nextLoc 1217 else: 1218 loc = preloc+1 1219 except ParseBaseException as exc: 1220 if ParserElement.verbose_stacktrace: 1221 raise 1222 else: 1223 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1224 raise exc
1225
1226 - def transformString( self, instring ):
1227 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1228 be returned from a parse action. To use C{transformString}, define a grammar and 1229 attach a parse action to it that modifies the returned token list. 1230 Invoking C{transformString()} on a target string will then scan for matches, 1231 and replace the matched text patterns according to the logic in the parse 1232 action. C{transformString()} returns the resulting transformed string.""" 1233 out = [] 1234 lastE = 0 1235 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1236 # keep string locs straight between transformString and scanString 1237 self.keepTabs = True 1238 try: 1239 for t,s,e in self.scanString( instring ): 1240 out.append( instring[lastE:s] ) 1241 if t: 1242 if isinstance(t,ParseResults): 1243 out += t.asList() 1244 elif isinstance(t,list): 1245 out += t 1246 else: 1247 out.append(t) 1248 lastE = e 1249 out.append(instring[lastE:]) 1250 out = [o for o in out if o] 1251 return "".join(map(_ustr,_flatten(out))) 1252 except ParseBaseException as exc: 1253 if ParserElement.verbose_stacktrace: 1254 raise 1255 else: 1256 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1257 raise exc
1258
1259 - def searchString( self, instring, maxMatches=_MAX_INT ):
1260 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1261 to match the given parse expression. May be called with optional 1262 C{maxMatches} argument, to clip searching after 'n' matches are found. 1263 """ 1264 try: 1265 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1266 except ParseBaseException as exc: 1267 if ParserElement.verbose_stacktrace: 1268 raise 1269 else: 1270 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1271 raise exc
1272
1273 - def __add__(self, other ):
1274 """Implementation of + operator - returns C{L{And}}""" 1275 if isinstance( other, basestring ): 1276 other = ParserElement.literalStringClass( other ) 1277 if not isinstance( other, ParserElement ): 1278 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1279 SyntaxWarning, stacklevel=2) 1280 return None 1281 return And( [ self, other ] )
1282
1283 - def __radd__(self, other ):
1284 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1285 if isinstance( other, basestring ): 1286 other = ParserElement.literalStringClass( other ) 1287 if not isinstance( other, ParserElement ): 1288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1289 SyntaxWarning, stacklevel=2) 1290 return None 1291 return other + self
1292
1293 - def __sub__(self, other):
1294 """Implementation of - operator, returns C{L{And}} with error stop""" 1295 if isinstance( other, basestring ): 1296 other = ParserElement.literalStringClass( other ) 1297 if not isinstance( other, ParserElement ): 1298 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1299 SyntaxWarning, stacklevel=2) 1300 return None 1301 return And( [ self, And._ErrorStop(), other ] )
1302
1303 - def __rsub__(self, other ):
1304 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1305 if isinstance( other, basestring ): 1306 other = ParserElement.literalStringClass( other ) 1307 if not isinstance( other, ParserElement ): 1308 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1309 SyntaxWarning, stacklevel=2) 1310 return None 1311 return other - self
1312
1313 - def __mul__(self,other):
1314 """Implementation of * operator, allows use of C{expr * 3} in place of 1315 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1316 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1317 may also include C{None} as in: 1318 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1319 to C{expr*n + L{ZeroOrMore}(expr)} 1320 (read as "at least n instances of C{expr}") 1321 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1322 (read as "0 to n instances of C{expr}") 1323 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1324 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1325 1326 Note that C{expr*(None,n)} does not raise an exception if 1327 more than n exprs exist in the input stream; that is, 1328 C{expr*(None,n)} does not enforce a maximum number of expr 1329 occurrences. If this behavior is desired, then write 1330 C{expr*(None,n) + ~expr} 1331 1332 """ 1333 if isinstance(other,int): 1334 minElements, optElements = other,0 1335 elif isinstance(other,tuple): 1336 other = (other + (None, None))[:2] 1337 if other[0] is None: 1338 other = (0, other[1]) 1339 if isinstance(other[0],int) and other[1] is None: 1340 if other[0] == 0: 1341 return ZeroOrMore(self) 1342 if other[0] == 1: 1343 return OneOrMore(self) 1344 else: 1345 return self*other[0] + ZeroOrMore(self) 1346 elif isinstance(other[0],int) and isinstance(other[1],int): 1347 minElements, optElements = other 1348 optElements -= minElements 1349 else: 1350 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1351 else: 1352 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1353 1354 if minElements < 0: 1355 raise ValueError("cannot multiply ParserElement by negative value") 1356 if optElements < 0: 1357 raise ValueError("second tuple value must be greater or equal to first tuple value") 1358 if minElements == optElements == 0: 1359 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1360 1361 if (optElements): 1362 def makeOptionalList(n): 1363 if n>1: 1364 return Optional(self + makeOptionalList(n-1)) 1365 else: 1366 return Optional(self)
1367 if minElements: 1368 if minElements == 1: 1369 ret = self + makeOptionalList(optElements) 1370 else: 1371 ret = And([self]*minElements) + makeOptionalList(optElements) 1372 else: 1373 ret = makeOptionalList(optElements) 1374 else: 1375 if minElements == 1: 1376 ret = self 1377 else: 1378 ret = And([self]*minElements) 1379 return ret 1380
1381 - def __rmul__(self, other):
1382 return self.__mul__(other)
1383
1384 - def __or__(self, other ):
1385 """Implementation of | operator - returns C{L{MatchFirst}}""" 1386 if isinstance( other, basestring ): 1387 other = ParserElement.literalStringClass( other ) 1388 if not isinstance( other, ParserElement ): 1389 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1390 SyntaxWarning, stacklevel=2) 1391 return None 1392 return MatchFirst( [ self, other ] )
1393
1394 - def __ror__(self, other ):
1395 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1396 if isinstance( other, basestring ): 1397 other = ParserElement.literalStringClass( other ) 1398 if not isinstance( other, ParserElement ): 1399 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1400 SyntaxWarning, stacklevel=2) 1401 return None 1402 return other | self
1403
1404 - def __xor__(self, other ):
1405 """Implementation of ^ operator - returns C{L{Or}}""" 1406 if isinstance( other, basestring ): 1407 other = ParserElement.literalStringClass( other ) 1408 if not isinstance( other, ParserElement ): 1409 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1410 SyntaxWarning, stacklevel=2) 1411 return None 1412 return Or( [ self, other ] )
1413
1414 - def __rxor__(self, other ):
1415 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1416 if isinstance( other, basestring ): 1417 other = ParserElement.literalStringClass( other ) 1418 if not isinstance( other, ParserElement ): 1419 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1420 SyntaxWarning, stacklevel=2) 1421 return None 1422 return other ^ self
1423
1424 - def __and__(self, other ):
1425 """Implementation of & operator - returns C{L{Each}}""" 1426 if isinstance( other, basestring ): 1427 other = ParserElement.literalStringClass( other ) 1428 if not isinstance( other, ParserElement ): 1429 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1430 SyntaxWarning, stacklevel=2) 1431 return None 1432 return Each( [ self, other ] )
1433
1434 - def __rand__(self, other ):
1435 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1436 if isinstance( other, basestring ): 1437 other = ParserElement.literalStringClass( other ) 1438 if not isinstance( other, ParserElement ): 1439 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1440 SyntaxWarning, stacklevel=2) 1441 return None 1442 return other & self
1443
1444 - def __invert__( self ):
1445 """Implementation of ~ operator - returns C{L{NotAny}}""" 1446 return NotAny( self )
1447
1448 - def __call__(self, name=None):
1449 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1450 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1451 could be written as:: 1452 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1453 1454 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1455 passed as C{True}. 1456 1457 If C{name} is omitted, same as calling C{L{copy}}. 1458 """ 1459 if name is not None: 1460 return self.setResultsName(name) 1461 else: 1462 return self.copy()
1463
1464 - def suppress( self ):
1465 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1466 cluttering up returned output. 1467 """ 1468 return Suppress( self )
1469
1470 - def leaveWhitespace( self ):
1471 """Disables the skipping of whitespace before matching the characters in the 1472 C{ParserElement}'s defined pattern. This is normally only used internally by 1473 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1474 """ 1475 self.skipWhitespace = False 1476 return self
1477
1478 - def setWhitespaceChars( self, chars ):
1479 """Overrides the default whitespace chars 1480 """ 1481 self.skipWhitespace = True 1482 self.whiteChars = chars 1483 self.copyDefaultWhiteChars = False 1484 return self
1485
1486 - def parseWithTabs( self ):
1487 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1488 Must be called before C{parseString} when the input grammar contains elements that 1489 match C{<TAB>} characters.""" 1490 self.keepTabs = True 1491 return self
1492
1493 - def ignore( self, other ):
1494 """Define expression to be ignored (e.g., comments) while doing pattern 1495 matching; may be called repeatedly, to define multiple comment or other 1496 ignorable patterns. 1497 """ 1498 if isinstance(other, basestring): 1499 other = Suppress(other) 1500 1501 if isinstance( other, Suppress ): 1502 if other not in self.ignoreExprs: 1503 self.ignoreExprs.append(other) 1504 else: 1505 self.ignoreExprs.append( Suppress( other.copy() ) ) 1506 return self
1507
1508 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1509 """Enable display of debugging messages while doing pattern matching.""" 1510 self.debugActions = (startAction or _defaultStartDebugAction, 1511 successAction or _defaultSuccessDebugAction, 1512 exceptionAction or _defaultExceptionDebugAction) 1513 self.debug = True 1514 return self
1515
1516 - def setDebug( self, flag=True ):
1517 """Enable display of debugging messages while doing pattern matching. 1518 Set C{flag} to True to enable, False to disable.""" 1519 if flag: 1520 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1521 else: 1522 self.debug = False 1523 return self
1524
1525 - def __str__( self ):
1526 return self.name
1527
1528 - def __repr__( self ):
1529 return _ustr(self)
1530
1531 - def streamline( self ):
1532 self.streamlined = True 1533 self.strRepr = None 1534 return self
1535
1536 - def checkRecursion( self, parseElementList ):
1537 pass
1538
1539 - def validate( self, validateTrace=[] ):
1540 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1541 self.checkRecursion( [] )
1542
1543 - def parseFile( self, file_or_filename, parseAll=False ):
1544 """Execute the parse expression on the given file or filename. 1545 If a filename is specified (instead of a file object), 1546 the entire file is opened, read, and closed before parsing. 1547 """ 1548 try: 1549 file_contents = file_or_filename.read() 1550 except AttributeError: 1551 f = open(file_or_filename, "r") 1552 file_contents = f.read() 1553 f.close() 1554 try: 1555 return self.parseString(file_contents, parseAll) 1556 except ParseBaseException as exc: 1557 if ParserElement.verbose_stacktrace: 1558 raise 1559 else: 1560 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1561 raise exc
1562
1563 - def __eq__(self,other):
1564 if isinstance(other, ParserElement): 1565 return self is other or vars(self) == vars(other) 1566 elif isinstance(other, basestring): 1567 try: 1568 self.parseString(_ustr(other), parseAll=True) 1569 return True 1570 except ParseBaseException: 1571 return False 1572 else: 1573 return super(ParserElement,self)==other
1574
1575 - def __ne__(self,other):
1576 return not (self == other)
1577
1578 - def __hash__(self):
1579 return hash(id(self))
1580
1581 - def __req__(self,other):
1582 return self == other
1583
1584 - def __rne__(self,other):
1585 return not (self == other)
1586
1587 - def runTests(self, tests, parseAll=False):
1588 """Execute the parse expression on a series of test strings, showing each 1589 test, the parsed results or where the parse failed. Quick and easy way to 1590 run a parse expression against a list of sample strings. 1591 1592 Parameters: 1593 - tests - a list of separate test strings, or a multiline string of test strings 1594 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1595 """ 1596 if isinstance(tests, basestring): 1597 tests = map(str.strip, tests.splitlines()) 1598 for t in tests: 1599 out = [t] 1600 try: 1601 out.append(self.parseString(t, parseAll=parseAll).dump()) 1602 except ParseException as pe: 1603 if '\n' in t: 1604 out.append(line(pe.loc, t)) 1605 out.append(' '*(col(pe.loc,t)-1) + '^') 1606 else: 1607 out.append(' '*pe.loc + '^') 1608 out.append(str(pe)) 1609 out.append('') 1610 print('\n'.join(out))
1611
1612 1613 -class Token(ParserElement):
1614 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1615 - def __init__( self ):
1616 super(Token,self).__init__( savelist=False )
1617
1618 1619 -class Empty(Token):
1620 """An empty token, will always match."""
1621 - def __init__( self ):
1622 super(Empty,self).__init__() 1623 self.name = "Empty" 1624 self.mayReturnEmpty = True 1625 self.mayIndexError = False
1626
1627 1628 -class NoMatch(Token):
1629 """A token that will never match."""
1630 - def __init__( self ):
1631 super(NoMatch,self).__init__() 1632 self.name = "NoMatch" 1633 self.mayReturnEmpty = True 1634 self.mayIndexError = False 1635 self.errmsg = "Unmatchable token"
1636
1637 - def parseImpl( self, instring, loc, doActions=True ):
1638 raise ParseException(instring, loc, self.errmsg, self)
1639
1640 1641 -class Literal(Token):
1642 """Token to exactly match a specified string."""
1643 - def __init__( self, matchString ):
1644 super(Literal,self).__init__() 1645 self.match = matchString 1646 self.matchLen = len(matchString) 1647 try: 1648 self.firstMatchChar = matchString[0] 1649 except IndexError: 1650 warnings.warn("null string passed to Literal; use Empty() instead", 1651 SyntaxWarning, stacklevel=2) 1652 self.__class__ = Empty 1653 self.name = '"%s"' % _ustr(self.match) 1654 self.errmsg = "Expected " + self.name 1655 self.mayReturnEmpty = False 1656 self.mayIndexError = False
1657 1658 # Performance tuning: this routine gets called a *lot* 1659 # if this is a single character match string and the first character matches, 1660 # short-circuit as quickly as possible, and avoid calling startswith 1661 #~ @profile
1662 - def parseImpl( self, instring, loc, doActions=True ):
1663 if (instring[loc] == self.firstMatchChar and 1664 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1665 return loc+self.matchLen, self.match 1666 raise ParseException(instring, loc, self.errmsg, self)
1667 _L = Literal 1668 ParserElement.literalStringClass = Literal
1669 1670 -class Keyword(Token):
1671 """Token to exactly match a specified string as a keyword, that is, it must be 1672 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1673 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1674 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1675 Accepts two optional constructor arguments in addition to the keyword string: 1676 C{identChars} is a string of characters that would be valid identifier characters, 1677 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1678 matching, default is C{False}. 1679 """ 1680 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1681
1682 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1683 super(Keyword,self).__init__() 1684 self.match = matchString 1685 self.matchLen = len(matchString) 1686 try: 1687 self.firstMatchChar = matchString[0] 1688 except IndexError: 1689 warnings.warn("null string passed to Keyword; use Empty() instead", 1690 SyntaxWarning, stacklevel=2) 1691 self.name = '"%s"' % self.match 1692 self.errmsg = "Expected " + self.name 1693 self.mayReturnEmpty = False 1694 self.mayIndexError = False 1695 self.caseless = caseless 1696 if caseless: 1697 self.caselessmatch = matchString.upper() 1698 identChars = identChars.upper() 1699 self.identChars = set(identChars)
1700
1701 - def parseImpl( self, instring, loc, doActions=True ):
1702 if self.caseless: 1703 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1704 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1705 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1706 return loc+self.matchLen, self.match 1707 else: 1708 if (instring[loc] == self.firstMatchChar and 1709 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1710 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1711 (loc == 0 or instring[loc-1] not in self.identChars) ): 1712 return loc+self.matchLen, self.match 1713 raise ParseException(instring, loc, self.errmsg, self)
1714
1715 - def copy(self):
1716 c = super(Keyword,self).copy() 1717 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1718 return c
1719 1720 @staticmethod
1721 - def setDefaultKeywordChars( chars ):
1722 """Overrides the default Keyword chars 1723 """ 1724 Keyword.DEFAULT_KEYWORD_CHARS = chars
1725
1726 -class CaselessLiteral(Literal):
1727 """Token to match a specified string, ignoring case of letters. 1728 Note: the matched results will always be in the case of the given 1729 match string, NOT the case of the input text. 1730 """
1731 - def __init__( self, matchString ):
1732 super(CaselessLiteral,self).__init__( matchString.upper() ) 1733 # Preserve the defining literal. 1734 self.returnString = matchString 1735 self.name = "'%s'" % self.returnString 1736 self.errmsg = "Expected " + self.name
1737
1738 - def parseImpl( self, instring, loc, doActions=True ):
1739 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1740 return loc+self.matchLen, self.returnString 1741 raise ParseException(instring, loc, self.errmsg, self)
1742
1743 -class CaselessKeyword(Keyword):
1744 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1745 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1746
1747 - def parseImpl( self, instring, loc, doActions=True ):
1748 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1749 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1750 return loc+self.matchLen, self.match 1751 raise ParseException(instring, loc, self.errmsg, self)
1752
1753 -class Word(Token):
1754 """Token for matching words composed of allowed character sets. 1755 Defined with string containing all allowed initial characters, 1756 an optional string containing allowed body characters (if omitted, 1757 defaults to the initial character set), and an optional minimum, 1758 maximum, and/or exact length. The default value for C{min} is 1 (a 1759 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1760 are 0, meaning no maximum or exact length restriction. An optional 1761 C{excludeChars} parameter can list characters that might be found in 1762 the input C{bodyChars} string; useful to define a word of all printables 1763 except for one or two characters, for instance. 1764 """
1765 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1766 super(Word,self).__init__() 1767 if excludeChars: 1768 initChars = ''.join(c for c in initChars if c not in excludeChars) 1769 if bodyChars: 1770 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1771 self.initCharsOrig = initChars 1772 self.initChars = set(initChars) 1773 if bodyChars : 1774 self.bodyCharsOrig = bodyChars 1775 self.bodyChars = set(bodyChars) 1776 else: 1777 self.bodyCharsOrig = initChars 1778 self.bodyChars = set(initChars) 1779 1780 self.maxSpecified = max > 0 1781 1782 if min < 1: 1783 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1784 1785 self.minLen = min 1786 1787 if max > 0: 1788 self.maxLen = max 1789 else: 1790 self.maxLen = _MAX_INT 1791 1792 if exact > 0: 1793 self.maxLen = exact 1794 self.minLen = exact 1795 1796 self.name = _ustr(self) 1797 self.errmsg = "Expected " + self.name 1798 self.mayIndexError = False 1799 self.asKeyword = asKeyword 1800 1801 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1802 if self.bodyCharsOrig == self.initCharsOrig: 1803 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1804 elif len(self.initCharsOrig) == 1: 1805 self.reString = "%s[%s]*" % \ 1806 (re.escape(self.initCharsOrig), 1807 _escapeRegexRangeChars(self.bodyCharsOrig),) 1808 else: 1809 self.reString = "[%s][%s]*" % \ 1810 (_escapeRegexRangeChars(self.initCharsOrig), 1811 _escapeRegexRangeChars(self.bodyCharsOrig),) 1812 if self.asKeyword: 1813 self.reString = r"\b"+self.reString+r"\b" 1814 try: 1815 self.re = re.compile( self.reString ) 1816 except: 1817 self.re = None
1818
1819 - def parseImpl( self, instring, loc, doActions=True ):
1820 if self.re: 1821 result = self.re.match(instring,loc) 1822 if not result: 1823 raise ParseException(instring, loc, self.errmsg, self) 1824 1825 loc = result.end() 1826 return loc, result.group() 1827 1828 if not(instring[ loc ] in self.initChars): 1829 raise ParseException(instring, loc, self.errmsg, self) 1830 1831 start = loc 1832 loc += 1 1833 instrlen = len(instring) 1834 bodychars = self.bodyChars 1835 maxloc = start + self.maxLen 1836 maxloc = min( maxloc, instrlen ) 1837 while loc < maxloc and instring[loc] in bodychars: 1838 loc += 1 1839 1840 throwException = False 1841 if loc - start < self.minLen: 1842 throwException = True 1843 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1844 throwException = True 1845 if self.asKeyword: 1846 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1847 throwException = True 1848 1849 if throwException: 1850 raise ParseException(instring, loc, self.errmsg, self) 1851 1852 return loc, instring[start:loc]
1853
1854 - def __str__( self ):
1855 try: 1856 return super(Word,self).__str__() 1857 except: 1858 pass 1859 1860 1861 if self.strRepr is None: 1862 1863 def charsAsStr(s): 1864 if len(s)>4: 1865 return s[:4]+"..." 1866 else: 1867 return s
1868 1869 if ( self.initCharsOrig != self.bodyCharsOrig ): 1870 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1871 else: 1872 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1873 1874 return self.strRepr
1875
1876 1877 -class Regex(Token):
1878 """Token for matching strings that match a given regular expression. 1879 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1880 """ 1881 compiledREtype = type(re.compile("[A-Z]"))
1882 - def __init__( self, pattern, flags=0):
1883 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1884 super(Regex,self).__init__() 1885 1886 if isinstance(pattern, basestring): 1887 if not pattern: 1888 warnings.warn("null string passed to Regex; use Empty() instead", 1889 SyntaxWarning, stacklevel=2) 1890 1891 self.pattern = pattern 1892 self.flags = flags 1893 1894 try: 1895 self.re = re.compile(self.pattern, self.flags) 1896 self.reString = self.pattern 1897 except sre_constants.error: 1898 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1899 SyntaxWarning, stacklevel=2) 1900 raise 1901 1902 elif isinstance(pattern, Regex.compiledREtype): 1903 self.re = pattern 1904 self.pattern = \ 1905 self.reString = str(pattern) 1906 self.flags = flags 1907 1908 else: 1909 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1910 1911 self.name = _ustr(self) 1912 self.errmsg = "Expected " + self.name 1913 self.mayIndexError = False 1914 self.mayReturnEmpty = True
1915
1916 - def parseImpl( self, instring, loc, doActions=True ):
1917 result = self.re.match(instring,loc) 1918 if not result: 1919 raise ParseException(instring, loc, self.errmsg, self) 1920 1921 loc = result.end() 1922 d = result.groupdict() 1923 ret = ParseResults(result.group()) 1924 if d: 1925 for k in d: 1926 ret[k] = d[k] 1927 return loc,ret
1928
1929 - def __str__( self ):
1930 try: 1931 return super(Regex,self).__str__() 1932 except: 1933 pass 1934 1935 if self.strRepr is None: 1936 self.strRepr = "Re:(%s)" % repr(self.pattern) 1937 1938 return self.strRepr
1939
1940 1941 -class QuotedString(Token):
1942 """Token for matching strings that are delimited by quoting characters. 1943 """
1944 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1945 r"""Defined with the following parameters: 1946 - quoteChar - string of one or more characters defining the quote delimiting string 1947 - escChar - character to escape quotes, typically backslash (default=None) 1948 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1949 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1950 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1951 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1952 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 1953 """ 1954 super(QuotedString,self).__init__() 1955 1956 # remove white space from quote chars - wont work anyway 1957 quoteChar = quoteChar.strip() 1958 if not quoteChar: 1959 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1960 raise SyntaxError() 1961 1962 if endQuoteChar is None: 1963 endQuoteChar = quoteChar 1964 else: 1965 endQuoteChar = endQuoteChar.strip() 1966 if not endQuoteChar: 1967 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1968 raise SyntaxError() 1969 1970 self.quoteChar = quoteChar 1971 self.quoteCharLen = len(quoteChar) 1972 self.firstQuoteChar = quoteChar[0] 1973 self.endQuoteChar = endQuoteChar 1974 self.endQuoteCharLen = len(endQuoteChar) 1975 self.escChar = escChar 1976 self.escQuote = escQuote 1977 self.unquoteResults = unquoteResults 1978 self.convertWhitespaceEscapes = convertWhitespaceEscapes 1979 1980 if multiline: 1981 self.flags = re.MULTILINE | re.DOTALL 1982 self.pattern = r'%s(?:[^%s%s]' % \ 1983 ( re.escape(self.quoteChar), 1984 _escapeRegexRangeChars(self.endQuoteChar[0]), 1985 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1986 else: 1987 self.flags = 0 1988 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1989 ( re.escape(self.quoteChar), 1990 _escapeRegexRangeChars(self.endQuoteChar[0]), 1991 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1992 if len(self.endQuoteChar) > 1: 1993 self.pattern += ( 1994 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1995 _escapeRegexRangeChars(self.endQuoteChar[i])) 1996 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1997 ) 1998 if escQuote: 1999 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2000 if escChar: 2001 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2002 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2003 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2004 2005 try: 2006 self.re = re.compile(self.pattern, self.flags) 2007 self.reString = self.pattern 2008 except sre_constants.error: 2009 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2010 SyntaxWarning, stacklevel=2) 2011 raise 2012 2013 self.name = _ustr(self) 2014 self.errmsg = "Expected " + self.name 2015 self.mayIndexError = False 2016 self.mayReturnEmpty = True
2017
2018 - def parseImpl( self, instring, loc, doActions=True ):
2019 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2020 if not result: 2021 raise ParseException(instring, loc, self.errmsg, self) 2022 2023 loc = result.end() 2024 ret = result.group() 2025 2026 if self.unquoteResults: 2027 2028 # strip off quotes 2029 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2030 2031 if isinstance(ret,basestring): 2032 # replace escaped whitespace 2033 if '\\' in ret and self.convertWhitespaceEscapes: 2034 ws_map = { 2035 r'\t' : '\t', 2036 r'\n' : '\n', 2037 r'\f' : '\f', 2038 r'\r' : '\r', 2039 } 2040 for wslit,wschar in ws_map.items(): 2041 ret = ret.replace(wslit, wschar) 2042 2043 # replace escaped characters 2044 if self.escChar: 2045 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2046 2047 # replace escaped quotes 2048 if self.escQuote: 2049 ret = ret.replace(self.escQuote, self.endQuoteChar) 2050 2051 return loc, ret
2052
2053 - def __str__( self ):
2054 try: 2055 return super(QuotedString,self).__str__() 2056 except: 2057 pass 2058 2059 if self.strRepr is None: 2060 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2061 2062 return self.strRepr
2063
2064 2065 -class CharsNotIn(Token):
2066 """Token for matching words composed of characters *not* in a given set. 2067 Defined with string containing all disallowed characters, and an optional 2068 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2069 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2070 are 0, meaning no maximum or exact length restriction. 2071 """
2072 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2073 super(CharsNotIn,self).__init__() 2074 self.skipWhitespace = False 2075 self.notChars = notChars 2076 2077 if min < 1: 2078 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2079 2080 self.minLen = min 2081 2082 if max > 0: 2083 self.maxLen = max 2084 else: 2085 self.maxLen = _MAX_INT 2086 2087 if exact > 0: 2088 self.maxLen = exact 2089 self.minLen = exact 2090 2091 self.name = _ustr(self) 2092 self.errmsg = "Expected " + self.name 2093 self.mayReturnEmpty = ( self.minLen == 0 ) 2094 self.mayIndexError = False
2095
2096 - def parseImpl( self, instring, loc, doActions=True ):
2097 if instring[loc] in self.notChars: 2098 raise ParseException(instring, loc, self.errmsg, self) 2099 2100 start = loc 2101 loc += 1 2102 notchars = self.notChars 2103 maxlen = min( start+self.maxLen, len(instring) ) 2104 while loc < maxlen and \ 2105 (instring[loc] not in notchars): 2106 loc += 1 2107 2108 if loc - start < self.minLen: 2109 raise ParseException(instring, loc, self.errmsg, self) 2110 2111 return loc, instring[start:loc]
2112
2113 - def __str__( self ):
2114 try: 2115 return super(CharsNotIn, self).__str__() 2116 except: 2117 pass 2118 2119 if self.strRepr is None: 2120 if len(self.notChars) > 4: 2121 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2122 else: 2123 self.strRepr = "!W:(%s)" % self.notChars 2124 2125 return self.strRepr
2126
2127 -class White(Token):
2128 """Special matching class for matching whitespace. Normally, whitespace is ignored 2129 by pyparsing grammars. This class is included when some whitespace structures 2130 are significant. Define with a string containing the whitespace characters to be 2131 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2132 as defined for the C{L{Word}} class.""" 2133 whiteStrs = { 2134 " " : "<SPC>", 2135 "\t": "<TAB>", 2136 "\n": "<LF>", 2137 "\r": "<CR>", 2138 "\f": "<FF>", 2139 }
2140 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2141 super(White,self).__init__() 2142 self.matchWhite = ws 2143 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2144 #~ self.leaveWhitespace() 2145 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2146 self.mayReturnEmpty = True 2147 self.errmsg = "Expected " + self.name 2148 2149 self.minLen = min 2150 2151 if max > 0: 2152 self.maxLen = max 2153 else: 2154 self.maxLen = _MAX_INT 2155 2156 if exact > 0: 2157 self.maxLen = exact 2158 self.minLen = exact
2159
2160 - def parseImpl( self, instring, loc, doActions=True ):
2161 if not(instring[ loc ] in self.matchWhite): 2162 raise ParseException(instring, loc, self.errmsg, self) 2163 start = loc 2164 loc += 1 2165 maxloc = start + self.maxLen 2166 maxloc = min( maxloc, len(instring) ) 2167 while loc < maxloc and instring[loc] in self.matchWhite: 2168 loc += 1 2169 2170 if loc - start < self.minLen: 2171 raise ParseException(instring, loc, self.errmsg, self) 2172 2173 return loc, instring[start:loc]
2174
2175 2176 -class _PositionToken(Token):
2177 - def __init__( self ):
2178 super(_PositionToken,self).__init__() 2179 self.name=self.__class__.__name__ 2180 self.mayReturnEmpty = True 2181 self.mayIndexError = False
2182
2183 -class GoToColumn(_PositionToken):
2184 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2185 - def __init__( self, colno ):
2186 super(GoToColumn,self).__init__() 2187 self.col = colno
2188
2189 - def preParse( self, instring, loc ):
2190 if col(loc,instring) != self.col: 2191 instrlen = len(instring) 2192 if self.ignoreExprs: 2193 loc = self._skipIgnorables( instring, loc ) 2194 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2195 loc += 1 2196 return loc
2197
2198 - def parseImpl( self, instring, loc, doActions=True ):
2199 thiscol = col( loc, instring ) 2200 if thiscol > self.col: 2201 raise ParseException( instring, loc, "Text not in expected column", self ) 2202 newloc = loc + self.col - thiscol 2203 ret = instring[ loc: newloc ] 2204 return newloc, ret
2205
2206 -class LineStart(_PositionToken):
2207 """Matches if current position is at the beginning of a line within the parse string"""
2208 - def __init__( self ):
2209 super(LineStart,self).__init__() 2210 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2211 self.errmsg = "Expected start of line"
2212
2213 - def preParse( self, instring, loc ):
2214 preloc = super(LineStart,self).preParse(instring,loc) 2215 if instring[preloc] == "\n": 2216 loc += 1 2217 return loc
2218
2219 - def parseImpl( self, instring, loc, doActions=True ):
2220 if not( loc==0 or 2221 (loc == self.preParse( instring, 0 )) or 2222 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2223 raise ParseException(instring, loc, self.errmsg, self) 2224 return loc, []
2225
2226 -class LineEnd(_PositionToken):
2227 """Matches if current position is at the end of a line within the parse string"""
2228 - def __init__( self ):
2229 super(LineEnd,self).__init__() 2230 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2231 self.errmsg = "Expected end of line"
2232
2233 - def parseImpl( self, instring, loc, doActions=True ):
2234 if loc<len(instring): 2235 if instring[loc] == "\n": 2236 return loc+1, "\n" 2237 else: 2238 raise ParseException(instring, loc, self.errmsg, self) 2239 elif loc == len(instring): 2240 return loc+1, [] 2241 else: 2242 raise ParseException(instring, loc, self.errmsg, self)
2243
2244 -class StringStart(_PositionToken):
2245 """Matches if current position is at the beginning of the parse string"""
2246 - def __init__( self ):
2247 super(StringStart,self).__init__() 2248 self.errmsg = "Expected start of text"
2249
2250 - def parseImpl( self, instring, loc, doActions=True ):
2251 if loc != 0: 2252 # see if entire string up to here is just whitespace and ignoreables 2253 if loc != self.preParse( instring, 0 ): 2254 raise ParseException(instring, loc, self.errmsg, self) 2255 return loc, []
2256
2257 -class StringEnd(_PositionToken):
2258 """Matches if current position is at the end of the parse string"""
2259 - def __init__( self ):
2260 super(StringEnd,self).__init__() 2261 self.errmsg = "Expected end of text"
2262
2263 - def parseImpl( self, instring, loc, doActions=True ):
2264 if loc < len(instring): 2265 raise ParseException(instring, loc, self.errmsg, self) 2266 elif loc == len(instring): 2267 return loc+1, [] 2268 elif loc > len(instring): 2269 return loc, [] 2270 else: 2271 raise ParseException(instring, loc, self.errmsg, self)
2272
2273 -class WordStart(_PositionToken):
2274 """Matches if the current position is at the beginning of a Word, and 2275 is not preceded by any character in a given set of C{wordChars} 2276 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2277 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2278 the string being parsed, or at the beginning of a line. 2279 """
2280 - def __init__(self, wordChars = printables):
2281 super(WordStart,self).__init__() 2282 self.wordChars = set(wordChars) 2283 self.errmsg = "Not at the start of a word"
2284
2285 - def parseImpl(self, instring, loc, doActions=True ):
2286 if loc != 0: 2287 if (instring[loc-1] in self.wordChars or 2288 instring[loc] not in self.wordChars): 2289 raise ParseException(instring, loc, self.errmsg, self) 2290 return loc, []
2291
2292 -class WordEnd(_PositionToken):
2293 """Matches if the current position is at the end of a Word, and 2294 is not followed by any character in a given set of C{wordChars} 2295 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2296 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2297 the string being parsed, or at the end of a line. 2298 """
2299 - def __init__(self, wordChars = printables):
2300 super(WordEnd,self).__init__() 2301 self.wordChars = set(wordChars) 2302 self.skipWhitespace = False 2303 self.errmsg = "Not at the end of a word"
2304
2305 - def parseImpl(self, instring, loc, doActions=True ):
2306 instrlen = len(instring) 2307 if instrlen>0 and loc<instrlen: 2308 if (instring[loc] in self.wordChars or 2309 instring[loc-1] not in self.wordChars): 2310 raise ParseException(instring, loc, self.errmsg, self) 2311 return loc, []
2312
2313 2314 -class ParseExpression(ParserElement):
2315 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2316 - def __init__( self, exprs, savelist = False ):
2317 super(ParseExpression,self).__init__(savelist) 2318 if isinstance( exprs, _generatorType ): 2319 exprs = list(exprs) 2320 2321 if isinstance( exprs, basestring ): 2322 self.exprs = [ Literal( exprs ) ] 2323 elif isinstance( exprs, collections.Sequence ): 2324 # if sequence of strings provided, wrap with Literal 2325 if all(isinstance(expr, basestring) for expr in exprs): 2326 exprs = map(Literal, exprs) 2327 self.exprs = list(exprs) 2328 else: 2329 try: 2330 self.exprs = list( exprs ) 2331 except TypeError: 2332 self.exprs = [ exprs ] 2333 self.callPreparse = False
2334
2335 - def __getitem__( self, i ):
2336 return self.exprs[i]
2337
2338 - def append( self, other ):
2339 self.exprs.append( other ) 2340 self.strRepr = None 2341 return self
2342
2343 - def leaveWhitespace( self ):
2344 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2345 all contained expressions.""" 2346 self.skipWhitespace = False 2347 self.exprs = [ e.copy() for e in self.exprs ] 2348 for e in self.exprs: 2349 e.leaveWhitespace() 2350 return self
2351
2352 - def ignore( self, other ):
2353 if isinstance( other, Suppress ): 2354 if other not in self.ignoreExprs: 2355 super( ParseExpression, self).ignore( other ) 2356 for e in self.exprs: 2357 e.ignore( self.ignoreExprs[-1] ) 2358 else: 2359 super( ParseExpression, self).ignore( other ) 2360 for e in self.exprs: 2361 e.ignore( self.ignoreExprs[-1] ) 2362 return self
2363
2364 - def __str__( self ):
2365 try: 2366 return super(ParseExpression,self).__str__() 2367 except: 2368 pass 2369 2370 if self.strRepr is None: 2371 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2372 return self.strRepr
2373
2374 - def streamline( self ):
2375 super(ParseExpression,self).streamline() 2376 2377 for e in self.exprs: 2378 e.streamline() 2379 2380 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2381 # but only if there are no parse actions or resultsNames on the nested And's 2382 # (likewise for Or's and MatchFirst's) 2383 if ( len(self.exprs) == 2 ): 2384 other = self.exprs[0] 2385 if ( isinstance( other, self.__class__ ) and 2386 not(other.parseAction) and 2387 other.resultsName is None and 2388 not other.debug ): 2389 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2390 self.strRepr = None 2391 self.mayReturnEmpty |= other.mayReturnEmpty 2392 self.mayIndexError |= other.mayIndexError 2393 2394 other = self.exprs[-1] 2395 if ( isinstance( other, self.__class__ ) and 2396 not(other.parseAction) and 2397 other.resultsName is None and 2398 not other.debug ): 2399 self.exprs = self.exprs[:-1] + other.exprs[:] 2400 self.strRepr = None 2401 self.mayReturnEmpty |= other.mayReturnEmpty 2402 self.mayIndexError |= other.mayIndexError 2403 2404 self.errmsg = "Expected " + _ustr(self) 2405 2406 return self
2407
2408 - def setResultsName( self, name, listAllMatches=False ):
2409 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2410 return ret
2411
2412 - def validate( self, validateTrace=[] ):
2413 tmp = validateTrace[:]+[self] 2414 for e in self.exprs: 2415 e.validate(tmp) 2416 self.checkRecursion( [] )
2417
2418 - def copy(self):
2419 ret = super(ParseExpression,self).copy() 2420 ret.exprs = [e.copy() for e in self.exprs] 2421 return ret
2422
2423 -class And(ParseExpression):
2424 """Requires all given C{ParseExpression}s to be found in the given order. 2425 Expressions may be separated by whitespace. 2426 May be constructed using the C{'+'} operator. 2427 """ 2428
2429 - class _ErrorStop(Empty):
2430 - def __init__(self, *args, **kwargs):
2431 super(And._ErrorStop,self).__init__(*args, **kwargs) 2432 self.name = '-' 2433 self.leaveWhitespace()
2434
2435 - def __init__( self, exprs, savelist = True ):
2436 super(And,self).__init__(exprs, savelist) 2437 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2438 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2439 self.skipWhitespace = self.exprs[0].skipWhitespace 2440 self.callPreparse = True
2441
2442 - def parseImpl( self, instring, loc, doActions=True ):
2443 # pass False as last arg to _parse for first element, since we already 2444 # pre-parsed the string as part of our And pre-parsing 2445 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2446 errorStop = False 2447 for e in self.exprs[1:]: 2448 if isinstance(e, And._ErrorStop): 2449 errorStop = True 2450 continue 2451 if errorStop: 2452 try: 2453 loc, exprtokens = e._parse( instring, loc, doActions ) 2454 except ParseSyntaxException: 2455 raise 2456 except ParseBaseException as pe: 2457 pe.__traceback__ = None 2458 raise ParseSyntaxException(pe) 2459 except IndexError: 2460 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2461 else: 2462 loc, exprtokens = e._parse( instring, loc, doActions ) 2463 if exprtokens or exprtokens.haskeys(): 2464 resultlist += exprtokens 2465 return loc, resultlist
2466
2467 - def __iadd__(self, other ):
2468 if isinstance( other, basestring ): 2469 other = Literal( other ) 2470 return self.append( other ) #And( [ self, other ] )
2471
2472 - def checkRecursion( self, parseElementList ):
2473 subRecCheckList = parseElementList[:] + [ self ] 2474 for e in self.exprs: 2475 e.checkRecursion( subRecCheckList ) 2476 if not e.mayReturnEmpty: 2477 break
2478
2479 - def __str__( self ):
2480 if hasattr(self,"name"): 2481 return self.name 2482 2483 if self.strRepr is None: 2484 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2485 2486 return self.strRepr
2487
2488 2489 -class Or(ParseExpression):
2490 """Requires that at least one C{ParseExpression} is found. 2491 If two expressions match, the expression that matches the longest string will be used. 2492 May be constructed using the C{'^'} operator. 2493 """
2494 - def __init__( self, exprs, savelist = False ):
2495 super(Or,self).__init__(exprs, savelist) 2496 if self.exprs: 2497 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2498 else: 2499 self.mayReturnEmpty = True
2500
2501 - def parseImpl( self, instring, loc, doActions=True ):
2502 maxExcLoc = -1 2503 maxException = None 2504 matches = [] 2505 for e in self.exprs: 2506 try: 2507 loc2 = e.tryParse( instring, loc ) 2508 except ParseException as err: 2509 err.__traceback__ = None 2510 if err.loc > maxExcLoc: 2511 maxException = err 2512 maxExcLoc = err.loc 2513 except IndexError: 2514 if len(instring) > maxExcLoc: 2515 maxException = ParseException(instring,len(instring),e.errmsg,self) 2516 maxExcLoc = len(instring) 2517 else: 2518 # save match among all matches, to retry longest to shortest 2519 matches.append((loc2, e)) 2520 2521 if matches: 2522 matches.sort(key=lambda x: -x[0]) 2523 for _,e in matches: 2524 try: 2525 return e._parse( instring, loc, doActions ) 2526 except ParseException as err: 2527 err.__traceback__ = None 2528 if err.loc > maxExcLoc: 2529 maxException = err 2530 maxExcLoc = err.loc 2531 2532 if maxException is not None: 2533 maxException.msg = self.errmsg 2534 raise maxException 2535 else: 2536 raise ParseException(instring, loc, "no defined alternatives to match", self)
2537 2538
2539 - def __ixor__(self, other ):
2540 if isinstance( other, basestring ): 2541 other = ParserElement.literalStringClass( other ) 2542 return self.append( other ) #Or( [ self, other ] )
2543
2544 - def __str__( self ):
2545 if hasattr(self,"name"): 2546 return self.name 2547 2548 if self.strRepr is None: 2549 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2550 2551 return self.strRepr
2552
2553 - def checkRecursion( self, parseElementList ):
2554 subRecCheckList = parseElementList[:] + [ self ] 2555 for e in self.exprs: 2556 e.checkRecursion( subRecCheckList )
2557
2558 2559 -class MatchFirst(ParseExpression):
2560 """Requires that at least one C{ParseExpression} is found. 2561 If two expressions match, the first one listed is the one that will match. 2562 May be constructed using the C{'|'} operator. 2563 """
2564 - def __init__( self, exprs, savelist = False ):
2565 super(MatchFirst,self).__init__(exprs, savelist) 2566 if self.exprs: 2567 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2568 else: 2569 self.mayReturnEmpty = True
2570
2571 - def parseImpl( self, instring, loc, doActions=True ):
2572 maxExcLoc = -1 2573 maxException = None 2574 for e in self.exprs: 2575 try: 2576 ret = e._parse( instring, loc, doActions ) 2577 return ret 2578 except ParseException as err: 2579 if err.loc > maxExcLoc: 2580 maxException = err 2581 maxExcLoc = err.loc 2582 except IndexError: 2583 if len(instring) > maxExcLoc: 2584 maxException = ParseException(instring,len(instring),e.errmsg,self) 2585 maxExcLoc = len(instring) 2586 2587 # only got here if no expression matched, raise exception for match that made it the furthest 2588 else: 2589 if maxException is not None: 2590 maxException.msg = self.errmsg 2591 raise maxException 2592 else: 2593 raise ParseException(instring, loc, "no defined alternatives to match", self)
2594
2595 - def __ior__(self, other ):
2596 if isinstance( other, basestring ): 2597 other = ParserElement.literalStringClass( other ) 2598 return self.append( other ) #MatchFirst( [ self, other ] )
2599
2600 - def __str__( self ):
2601 if hasattr(self,"name"): 2602 return self.name 2603 2604 if self.strRepr is None: 2605 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2606 2607 return self.strRepr
2608
2609 - def checkRecursion( self, parseElementList ):
2610 subRecCheckList = parseElementList[:] + [ self ] 2611 for e in self.exprs: 2612 e.checkRecursion( subRecCheckList )
2613
2614 2615 -class Each(ParseExpression):
2616 """Requires all given C{ParseExpression}s to be found, but in any order. 2617 Expressions may be separated by whitespace. 2618 May be constructed using the C{'&'} operator. 2619 """
2620 - def __init__( self, exprs, savelist = True ):
2621 super(Each,self).__init__(exprs, savelist) 2622 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2623 self.skipWhitespace = True 2624 self.initExprGroups = True
2625
2626 - def parseImpl( self, instring, loc, doActions=True ):
2627 if self.initExprGroups: 2628 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2629 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2630 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2631 self.optionals = opt1 + opt2 2632 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2633 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2634 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2635 self.required += self.multirequired 2636 self.initExprGroups = False 2637 tmpLoc = loc 2638 tmpReqd = self.required[:] 2639 tmpOpt = self.optionals[:] 2640 matchOrder = [] 2641 2642 keepMatching = True 2643 while keepMatching: 2644 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2645 failed = [] 2646 for e in tmpExprs: 2647 try: 2648 tmpLoc = e.tryParse( instring, tmpLoc ) 2649 except ParseException: 2650 failed.append(e) 2651 else: 2652 matchOrder.append(self.opt1map.get(id(e),e)) 2653 if e in tmpReqd: 2654 tmpReqd.remove(e) 2655 elif e in tmpOpt: 2656 tmpOpt.remove(e) 2657 if len(failed) == len(tmpExprs): 2658 keepMatching = False 2659 2660 if tmpReqd: 2661 missing = ", ".join(_ustr(e) for e in tmpReqd) 2662 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2663 2664 # add any unmatched Optionals, in case they have default values defined 2665 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2666 2667 resultlist = [] 2668 for e in matchOrder: 2669 loc,results = e._parse(instring,loc,doActions) 2670 resultlist.append(results) 2671 2672 finalResults = ParseResults() 2673 for r in resultlist: 2674 dups = {} 2675 for k in r.keys(): 2676 if k in finalResults: 2677 tmp = ParseResults(finalResults[k]) 2678 tmp += ParseResults(r[k]) 2679 dups[k] = tmp 2680 finalResults += ParseResults(r) 2681 for k,v in dups.items(): 2682 finalResults[k] = v 2683 return loc, finalResults
2684
2685 - def __str__( self ):
2686 if hasattr(self,"name"): 2687 return self.name 2688 2689 if self.strRepr is None: 2690 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2691 2692 return self.strRepr
2693
2694 - def checkRecursion( self, parseElementList ):
2695 subRecCheckList = parseElementList[:] + [ self ] 2696 for e in self.exprs: 2697 e.checkRecursion( subRecCheckList )
2698
2699 2700 -class ParseElementEnhance(ParserElement):
2701 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2702 - def __init__( self, expr, savelist=False ):
2703 super(ParseElementEnhance,self).__init__(savelist) 2704 if isinstance( expr, basestring ): 2705 expr = Literal(expr) 2706 self.expr = expr 2707 self.strRepr = None 2708 if expr is not None: 2709 self.mayIndexError = expr.mayIndexError 2710 self.mayReturnEmpty = expr.mayReturnEmpty 2711 self.setWhitespaceChars( expr.whiteChars ) 2712 self.skipWhitespace = expr.skipWhitespace 2713 self.saveAsList = expr.saveAsList 2714 self.callPreparse = expr.callPreparse 2715 self.ignoreExprs.extend(expr.ignoreExprs)
2716
2717 - def parseImpl( self, instring, loc, doActions=True ):
2718 if self.expr is not None: 2719 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2720 else: 2721 raise ParseException("",loc,self.errmsg,self)
2722
2723 - def leaveWhitespace( self ):
2724 self.skipWhitespace = False 2725 self.expr = self.expr.copy() 2726 if self.expr is not None: 2727 self.expr.leaveWhitespace() 2728 return self
2729
2730 - def ignore( self, other ):
2731 if isinstance( other, Suppress ): 2732 if other not in self.ignoreExprs: 2733 super( ParseElementEnhance, self).ignore( other ) 2734 if self.expr is not None: 2735 self.expr.ignore( self.ignoreExprs[-1] ) 2736 else: 2737 super( ParseElementEnhance, self).ignore( other ) 2738 if self.expr is not None: 2739 self.expr.ignore( self.ignoreExprs[-1] ) 2740 return self
2741
2742 - def streamline( self ):
2743 super(ParseElementEnhance,self).streamline() 2744 if self.expr is not None: 2745 self.expr.streamline() 2746 return self
2747
2748 - def checkRecursion( self, parseElementList ):
2749 if self in parseElementList: 2750 raise RecursiveGrammarException( parseElementList+[self] ) 2751 subRecCheckList = parseElementList[:] + [ self ] 2752 if self.expr is not None: 2753 self.expr.checkRecursion( subRecCheckList )
2754
2755 - def validate( self, validateTrace=[] ):
2756 tmp = validateTrace[:]+[self] 2757 if self.expr is not None: 2758 self.expr.validate(tmp) 2759 self.checkRecursion( [] )
2760
2761 - def __str__( self ):
2762 try: 2763 return super(ParseElementEnhance,self).__str__() 2764 except: 2765 pass 2766 2767 if self.strRepr is None and self.expr is not None: 2768 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2769 return self.strRepr
2770
2771 2772 -class FollowedBy(ParseElementEnhance):
2773 """Lookahead matching of the given parse expression. C{FollowedBy} 2774 does *not* advance the parsing position within the input string, it only 2775 verifies that the specified parse expression matches at the current 2776 position. C{FollowedBy} always returns a null token list."""
2777 - def __init__( self, expr ):
2778 super(FollowedBy,self).__init__(expr) 2779 self.mayReturnEmpty = True
2780
2781 - def parseImpl( self, instring, loc, doActions=True ):
2782 self.expr.tryParse( instring, loc ) 2783 return loc, []
2784
2785 2786 -class NotAny(ParseElementEnhance):
2787 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2788 does *not* advance the parsing position within the input string, it only 2789 verifies that the specified parse expression does *not* match at the current 2790 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2791 always returns a null token list. May be constructed using the '~' operator."""
2792 - def __init__( self, expr ):
2793 super(NotAny,self).__init__(expr) 2794 #~ self.leaveWhitespace() 2795 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2796 self.mayReturnEmpty = True 2797 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2798
2799 - def parseImpl( self, instring, loc, doActions=True ):
2800 if self.expr.canParseNext(instring, loc): 2801 raise ParseException(instring, loc, self.errmsg, self) 2802 return loc, []
2803
2804 - def __str__( self ):
2805 if hasattr(self,"name"): 2806 return self.name 2807 2808 if self.strRepr is None: 2809 self.strRepr = "~{" + _ustr(self.expr) + "}" 2810 2811 return self.strRepr
2812
2813 2814 -class OneOrMore(ParseElementEnhance):
2815 """Repetition of one or more of the given expression. 2816 2817 Parameters: 2818 - expr - expression that must match one or more times 2819 - stopOn - (default=None) - expression for a terminating sentinel 2820 (only required if the sentinel would ordinarily match the repetition 2821 expression) 2822 """
2823 - def __init__( self, expr, stopOn=None):
2824 super(OneOrMore, self).__init__(expr) 2825 ender = stopOn 2826 if isinstance(ender, basestring): 2827 ender = Literal(ender) 2828 self.not_ender = ~ender if ender is not None else None
2829
2830 - def parseImpl( self, instring, loc, doActions=True ):
2831 self_expr_parse = self.expr._parse 2832 self_skip_ignorables = self._skipIgnorables 2833 check_ender = self.not_ender is not None 2834 if check_ender: 2835 try_not_ender = self.not_ender.tryParse 2836 2837 # must be at least one (but first see if we are the stopOn sentinel; 2838 # if so, fail) 2839 if check_ender: 2840 try_not_ender(instring, loc) 2841 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2842 try: 2843 hasIgnoreExprs = (not not self.ignoreExprs) 2844 while 1: 2845 if check_ender: 2846 try_not_ender(instring, loc) 2847 if hasIgnoreExprs: 2848 preloc = self_skip_ignorables( instring, loc ) 2849 else: 2850 preloc = loc 2851 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2852 if tmptokens or tmptokens.haskeys(): 2853 tokens += tmptokens 2854 except (ParseException,IndexError): 2855 pass 2856 2857 return loc, tokens
2858
2859 - def __str__( self ):
2860 if hasattr(self,"name"): 2861 return self.name 2862 2863 if self.strRepr is None: 2864 self.strRepr = "{" + _ustr(self.expr) + "}..." 2865 2866 return self.strRepr
2867
2868 - def setResultsName( self, name, listAllMatches=False ):
2869 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2870 ret.saveAsList = True 2871 return ret
2872
2873 -class ZeroOrMore(OneOrMore):
2874 """Optional repetition of zero or more of the given expression. 2875 2876 Parameters: 2877 - expr - expression that must match zero or more times 2878 - stopOn - (default=None) - expression for a terminating sentinel 2879 (only required if the sentinel would ordinarily match the repetition 2880 expression) 2881 """
2882 - def __init__( self, expr, stopOn=None):
2883 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2884 self.mayReturnEmpty = True
2885
2886 - def parseImpl( self, instring, loc, doActions=True ):
2887 try: 2888 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 2889 except (ParseException,IndexError): 2890 return loc, []
2891
2892 - def __str__( self ):
2893 if hasattr(self,"name"): 2894 return self.name 2895 2896 if self.strRepr is None: 2897 self.strRepr = "[" + _ustr(self.expr) + "]..." 2898 2899 return self.strRepr
2900
2901 -class _NullToken(object):
2902 - def __bool__(self):
2903 return False
2904 __nonzero__ = __bool__
2905 - def __str__(self):
2906 return ""
2907 2908 _optionalNotMatched = _NullToken()
2909 -class Optional(ParseElementEnhance):
2910 """Optional matching of the given expression. 2911 2912 Parameters: 2913 - expr - expression that must match zero or more times 2914 - default (optional) - value to be returned if the optional expression 2915 is not found. 2916 """
2917 - def __init__( self, expr, default=_optionalNotMatched ):
2918 super(Optional,self).__init__( expr, savelist=False ) 2919 self.defaultValue = default 2920 self.mayReturnEmpty = True
2921
2922 - def parseImpl( self, instring, loc, doActions=True ):
2923 try: 2924 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2925 except (ParseException,IndexError): 2926 if self.defaultValue is not _optionalNotMatched: 2927 if self.expr.resultsName: 2928 tokens = ParseResults([ self.defaultValue ]) 2929 tokens[self.expr.resultsName] = self.defaultValue 2930 else: 2931 tokens = [ self.defaultValue ] 2932 else: 2933 tokens = [] 2934 return loc, tokens
2935
2936 - def __str__( self ):
2937 if hasattr(self,"name"): 2938 return self.name 2939 2940 if self.strRepr is None: 2941 self.strRepr = "[" + _ustr(self.expr) + "]" 2942 2943 return self.strRepr
2944
2945 -class SkipTo(ParseElementEnhance):
2946 """Token for skipping over all undefined text until the matched expression is found. 2947 2948 Parameters: 2949 - expr - target expression marking the end of the data to be skipped 2950 - include - (default=False) if True, the target expression is also parsed 2951 (the skipped text and target expression are returned as a 2-element list). 2952 - ignore - (default=None) used to define grammars (typically quoted strings and 2953 comments) that might contain false matches to the target expression 2954 - failOn - (default=None) define expressions that are not allowed to be 2955 included in the skipped test; if found before the target expression is found, 2956 the SkipTo is not a match 2957 """
2958 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2959 super( SkipTo, self ).__init__( other ) 2960 self.ignoreExpr = ignore 2961 self.mayReturnEmpty = True 2962 self.mayIndexError = False 2963 self.includeMatch = include 2964 self.asList = False 2965 if isinstance(failOn, basestring): 2966 self.failOn = Literal(failOn) 2967 else: 2968 self.failOn = failOn 2969 self.errmsg = "No match found for "+_ustr(self.expr)
2970
2971 - def parseImpl( self, instring, loc, doActions=True ):
2972 startloc = loc 2973 instrlen = len(instring) 2974 expr = self.expr 2975 expr_parse = self.expr._parse 2976 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 2977 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 2978 2979 tmploc = loc 2980 while tmploc <= instrlen: 2981 if self_failOn_canParseNext is not None: 2982 # break if failOn expression matches 2983 if self_failOn_canParseNext(instring, tmploc): 2984 break 2985 2986 if self_ignoreExpr_tryParse is not None: 2987 # advance past ignore expressions 2988 while 1: 2989 try: 2990 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 2991 except ParseBaseException: 2992 break 2993 2994 try: 2995 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 2996 except (ParseException, IndexError): 2997 # no match, advance loc in string 2998 tmploc += 1 2999 else: 3000 # matched skipto expr, done 3001 break 3002 3003 else: 3004 # ran off the end of the input string without matching skipto expr, fail 3005 raise ParseException(instring, loc, self.errmsg, self) 3006 3007 # build up return values 3008 loc = tmploc 3009 skiptext = instring[startloc:loc] 3010 skipresult = ParseResults(skiptext) 3011 3012 if self.includeMatch: 3013 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3014 skipresult += mat 3015 3016 return loc, skipresult
3017
3018 -class Forward(ParseElementEnhance):
3019 """Forward declaration of an expression to be defined later - 3020 used for recursive grammars, such as algebraic infix notation. 3021 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3022 3023 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3024 Specifically, '|' has a lower precedence than '<<', so that:: 3025 fwdExpr << a | b | c 3026 will actually be evaluated as:: 3027 (fwdExpr << a) | b | c 3028 thereby leaving b and c out as parseable alternatives. It is recommended that you 3029 explicitly group the values inserted into the C{Forward}:: 3030 fwdExpr << (a | b | c) 3031 Converting to use the '<<=' operator instead will avoid this problem. 3032 """
3033 - def __init__( self, other=None ):
3034 super(Forward,self).__init__( other, savelist=False )
3035
3036 - def __lshift__( self, other ):
3037 if isinstance( other, basestring ): 3038 other = ParserElement.literalStringClass(other) 3039 self.expr = other 3040 self.strRepr = None 3041 self.mayIndexError = self.expr.mayIndexError 3042 self.mayReturnEmpty = self.expr.mayReturnEmpty 3043 self.setWhitespaceChars( self.expr.whiteChars ) 3044 self.skipWhitespace = self.expr.skipWhitespace 3045 self.saveAsList = self.expr.saveAsList 3046 self.ignoreExprs.extend(self.expr.ignoreExprs) 3047 return self
3048
3049 - def __ilshift__(self, other):
3050 return self << other
3051
3052 - def leaveWhitespace( self ):
3053 self.skipWhitespace = False 3054 return self
3055
3056 - def streamline( self ):
3057 if not self.streamlined: 3058 self.streamlined = True 3059 if self.expr is not None: 3060 self.expr.streamline() 3061 return self
3062
3063 - def validate( self, validateTrace=[] ):
3064 if self not in validateTrace: 3065 tmp = validateTrace[:]+[self] 3066 if self.expr is not None: 3067 self.expr.validate(tmp) 3068 self.checkRecursion([])
3069
3070 - def __str__( self ):
3071 if hasattr(self,"name"): 3072 return self.name 3073 return self.__class__.__name__ + ": ..." 3074 3075 # stubbed out for now - creates awful memory and perf issues 3076 self._revertClass = self.__class__ 3077 self.__class__ = _ForwardNoRecurse 3078 try: 3079 if self.expr is not None: 3080 retString = _ustr(self.expr) 3081 else: 3082 retString = "None" 3083 finally: 3084 self.__class__ = self._revertClass 3085 return self.__class__.__name__ + ": " + retString
3086
3087 - def copy(self):
3088 if self.expr is not None: 3089 return super(Forward,self).copy() 3090 else: 3091 ret = Forward() 3092 ret <<= self 3093 return ret
3094
3095 -class _ForwardNoRecurse(Forward):
3096 - def __str__( self ):
3097 return "..."
3098
3099 -class TokenConverter(ParseElementEnhance):
3100 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3101 - def __init__( self, expr, savelist=False ):
3102 super(TokenConverter,self).__init__( expr )#, savelist ) 3103 self.saveAsList = False
3104
3105 -class Combine(TokenConverter):
3106 """Converter to concatenate all matching tokens to a single string. 3107 By default, the matching patterns must also be contiguous in the input string; 3108 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3109 """
3110 - def __init__( self, expr, joinString="", adjacent=True ):
3111 super(Combine,self).__init__( expr ) 3112 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3113 if adjacent: 3114 self.leaveWhitespace() 3115 self.adjacent = adjacent 3116 self.skipWhitespace = True 3117 self.joinString = joinString 3118 self.callPreparse = True
3119
3120 - def ignore( self, other ):
3121 if self.adjacent: 3122 ParserElement.ignore(self, other) 3123 else: 3124 super( Combine, self).ignore( other ) 3125 return self
3126
3127 - def postParse( self, instring, loc, tokenlist ):
3128 retToks = tokenlist.copy() 3129 del retToks[:] 3130 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3131 3132 if self.resultsName and retToks.haskeys(): 3133 return [ retToks ] 3134 else: 3135 return retToks
3136
3137 -class Group(TokenConverter):
3138 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3139 - def __init__( self, expr ):
3140 super(Group,self).__init__( expr ) 3141 self.saveAsList = True
3142
3143 - def postParse( self, instring, loc, tokenlist ):
3144 return [ tokenlist ]
3145
3146 -class Dict(TokenConverter):
3147 """Converter to return a repetitive expression as a list, but also as a dictionary. 3148 Each element can also be referenced using the first token in the expression as its key. 3149 Useful for tabular report scraping when the first column can be used as a item key. 3150 """
3151 - def __init__( self, expr ):
3152 super(Dict,self).__init__( expr ) 3153 self.saveAsList = True
3154
3155 - def postParse( self, instring, loc, tokenlist ):
3156 for i,tok in enumerate(tokenlist): 3157 if len(tok) == 0: 3158 continue 3159 ikey = tok[0] 3160 if isinstance(ikey,int): 3161 ikey = _ustr(tok[0]).strip() 3162 if len(tok)==1: 3163 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3164 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3165 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3166 else: 3167 dictvalue = tok.copy() #ParseResults(i) 3168 del dictvalue[0] 3169 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3170 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3171 else: 3172 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3173 3174 if self.resultsName: 3175 return [ tokenlist ] 3176 else: 3177 return tokenlist
3178
3179 3180 -class Suppress(TokenConverter):
3181 """Converter for ignoring the results of a parsed expression."""
3182 - def postParse( self, instring, loc, tokenlist ):
3183 return []
3184
3185 - def suppress( self ):
3186 return self
3187
3188 3189 -class OnlyOnce(object):
3190 """Wrapper for parse actions, to ensure they are only called once."""
3191 - def __init__(self, methodCall):
3192 self.callable = _trim_arity(methodCall) 3193 self.called = False
3194 - def __call__(self,s,l,t):
3195 if not self.called: 3196 results = self.callable(s,l,t) 3197 self.called = True 3198 return results 3199 raise ParseException(s,l,"")
3200 - def reset(self):
3201 self.called = False
3202
3203 -def traceParseAction(f):
3204 """Decorator for debugging parse actions.""" 3205 f = _trim_arity(f) 3206 def z(*paArgs): 3207 thisFunc = f.func_name 3208 s,l,t = paArgs[-3:] 3209 if len(paArgs)>3: 3210 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3211 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3212 try: 3213 ret = f(*paArgs) 3214 except Exception as exc: 3215 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3216 raise 3217 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3218 return ret
3219 try: 3220 z.__name__ = f.__name__ 3221 except AttributeError: 3222 pass 3223 return z 3224
3225 # 3226 # global helpers 3227 # 3228 -def delimitedList( expr, delim=",", combine=False ):
3229 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3230 By default, the list elements and delimiters can have intervening whitespace, and 3231 comments, but this can be overridden by passing C{combine=True} in the constructor. 3232 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3233 string, with the delimiters included; otherwise, the matching tokens are returned 3234 as a list of tokens, with the delimiters suppressed. 3235 """ 3236 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3237 if combine: 3238 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3239 else: 3240 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3241
3242 -def countedArray( expr, intExpr=None ):
3243 """Helper to define a counted list of expressions. 3244 This helper defines a pattern of the form:: 3245 integer expr expr expr... 3246 where the leading integer tells how many expr expressions follow. 3247 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3248 """ 3249 arrayExpr = Forward() 3250 def countFieldParseAction(s,l,t): 3251 n = t[0] 3252 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3253 return []
3254 if intExpr is None: 3255 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3256 else: 3257 intExpr = intExpr.copy() 3258 intExpr.setName("arrayLen") 3259 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3260 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3261
3262 -def _flatten(L):
3263 ret = [] 3264 for i in L: 3265 if isinstance(i,list): 3266 ret.extend(_flatten(i)) 3267 else: 3268 ret.append(i) 3269 return ret
3270
3271 -def matchPreviousLiteral(expr):
3272 """Helper to define an expression that is indirectly defined from 3273 the tokens matched in a previous expression, that is, it looks 3274 for a 'repeat' of a previous expression. For example:: 3275 first = Word(nums) 3276 second = matchPreviousLiteral(first) 3277 matchExpr = first + ":" + second 3278 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3279 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3280 If this is not desired, use C{matchPreviousExpr}. 3281 Do *not* use with packrat parsing enabled. 3282 """ 3283 rep = Forward() 3284 def copyTokenToRepeater(s,l,t): 3285 if t: 3286 if len(t) == 1: 3287 rep << t[0] 3288 else: 3289 # flatten t tokens 3290 tflat = _flatten(t.asList()) 3291 rep << And(Literal(tt) for tt in tflat) 3292 else: 3293 rep << Empty()
3294 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3295 rep.setName('(prev) ' + _ustr(expr)) 3296 return rep 3297
3298 -def matchPreviousExpr(expr):
3299 """Helper to define an expression that is indirectly defined from 3300 the tokens matched in a previous expression, that is, it looks 3301 for a 'repeat' of a previous expression. For example:: 3302 first = Word(nums) 3303 second = matchPreviousExpr(first) 3304 matchExpr = first + ":" + second 3305 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3306 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3307 the expressions are evaluated first, and then compared, so 3308 C{"1"} is compared with C{"10"}. 3309 Do *not* use with packrat parsing enabled. 3310 """ 3311 rep = Forward() 3312 e2 = expr.copy() 3313 rep <<= e2 3314 def copyTokenToRepeater(s,l,t): 3315 matchTokens = _flatten(t.asList()) 3316 def mustMatchTheseTokens(s,l,t): 3317 theseTokens = _flatten(t.asList()) 3318 if theseTokens != matchTokens: 3319 raise ParseException("",0,"")
3320 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3321 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3322 rep.setName('(prev) ' + _ustr(expr)) 3323 return rep 3324
3325 -def _escapeRegexRangeChars(s):
3326 #~ escape these chars: ^-] 3327 for c in r"\^-]": 3328 s = s.replace(c,_bslash+c) 3329 s = s.replace("\n",r"\n") 3330 s = s.replace("\t",r"\t") 3331 return _ustr(s)
3332
3333 -def oneOf( strs, caseless=False, useRegex=True ):
3334 """Helper to quickly define a set of alternative Literals, and makes sure to do 3335 longest-first testing when there is a conflict, regardless of the input order, 3336 but returns a C{L{MatchFirst}} for best performance. 3337 3338 Parameters: 3339 - strs - a string of space-delimited literals, or a list of string literals 3340 - caseless - (default=False) - treat all literals as caseless 3341 - useRegex - (default=True) - as an optimization, will generate a Regex 3342 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3343 if creating a C{Regex} raises an exception) 3344 """ 3345 if caseless: 3346 isequal = ( lambda a,b: a.upper() == b.upper() ) 3347 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3348 parseElementClass = CaselessLiteral 3349 else: 3350 isequal = ( lambda a,b: a == b ) 3351 masks = ( lambda a,b: b.startswith(a) ) 3352 parseElementClass = Literal 3353 3354 symbols = [] 3355 if isinstance(strs,basestring): 3356 symbols = strs.split() 3357 elif isinstance(strs, collections.Sequence): 3358 symbols = list(strs[:]) 3359 elif isinstance(strs, _generatorType): 3360 symbols = list(strs) 3361 else: 3362 warnings.warn("Invalid argument to oneOf, expected string or list", 3363 SyntaxWarning, stacklevel=2) 3364 if not symbols: 3365 return NoMatch() 3366 3367 i = 0 3368 while i < len(symbols)-1: 3369 cur = symbols[i] 3370 for j,other in enumerate(symbols[i+1:]): 3371 if ( isequal(other, cur) ): 3372 del symbols[i+j+1] 3373 break 3374 elif ( masks(cur, other) ): 3375 del symbols[i+j+1] 3376 symbols.insert(i,other) 3377 cur = other 3378 break 3379 else: 3380 i += 1 3381 3382 if not caseless and useRegex: 3383 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3384 try: 3385 if len(symbols)==len("".join(symbols)): 3386 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3387 else: 3388 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3389 except: 3390 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3391 SyntaxWarning, stacklevel=2) 3392 3393 3394 # last resort, just use MatchFirst 3395 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3396
3397 -def dictOf( key, value ):
3398 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3399 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3400 in the proper order. The key pattern can include delimiting markers or punctuation, 3401 as long as they are suppressed, thereby leaving the significant key text. The value 3402 pattern can include named results, so that the C{Dict} results can include named token 3403 fields. 3404 """ 3405 return Dict( ZeroOrMore( Group ( key + value ) ) )
3406
3407 -def originalTextFor(expr, asString=True):
3408 """Helper to return the original, untokenized text for a given expression. Useful to 3409 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3410 revert separate tokens with intervening whitespace back to the original matching 3411 input text. By default, returns astring containing the original parsed text. 3412 3413 If the optional C{asString} argument is passed as C{False}, then the return value is a 3414 C{L{ParseResults}} containing any results names that were originally matched, and a 3415 single token containing the original matched text from the input string. So if 3416 the expression passed to C{L{originalTextFor}} contains expressions with defined 3417 results names, you must set C{asString} to C{False} if you want to preserve those 3418 results name values.""" 3419 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3420 endlocMarker = locMarker.copy() 3421 endlocMarker.callPreparse = False 3422 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3423 if asString: 3424 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3425 else: 3426 def extractText(s,l,t): 3427 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3428 matchExpr.setParseAction(extractText) 3429 return matchExpr 3430
3431 -def ungroup(expr):
3432 """Helper to undo pyparsing's default grouping of And expressions, even 3433 if all but one are non-empty.""" 3434 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3435
3436 -def locatedExpr(expr):
3437 """Helper to decorate a returned token with its starting and ending locations in the input string. 3438 This helper adds the following results names: 3439 - locn_start = location where matched expression begins 3440 - locn_end = location where matched expression ends 3441 - value = the actual parsed results 3442 3443 Be careful if the input text contains C{<TAB>} characters, you may want to call 3444 C{L{ParserElement.parseWithTabs}} 3445 """ 3446 locator = Empty().setParseAction(lambda s,l,t: l) 3447 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3448 3449 3450 # convenience constants for positional expressions 3451 empty = Empty().setName("empty") 3452 lineStart = LineStart().setName("lineStart") 3453 lineEnd = LineEnd().setName("lineEnd") 3454 stringStart = StringStart().setName("stringStart") 3455 stringEnd = StringEnd().setName("stringEnd") 3456 3457 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3458 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3459 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3460 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3461 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3462 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3463 3464 -def srange(s):
3465 r"""Helper to easily define string ranges for use in Word construction. Borrows 3466 syntax from regexp '[]' string range definitions:: 3467 srange("[0-9]") -> "0123456789" 3468 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3469 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3470 The input string must be enclosed in []'s, and the returned string is the expanded 3471 character set joined into a single string. 3472 The values enclosed in the []'s may be:: 3473 a single character 3474 an escaped character with a leading backslash (such as \- or \]) 3475 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3476 (\0x## is also supported for backwards compatibility) 3477 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3478 a range of any of the above, separated by a dash ('a-z', etc.) 3479 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3480 """ 3481 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3482 try: 3483 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3484 except: 3485 return ""
3486
3487 -def matchOnlyAtCol(n):
3488 """Helper method for defining parse actions that require matching at a specific 3489 column in the input text. 3490 """ 3491 def verifyCol(strg,locn,toks): 3492 if col(locn,strg) != n: 3493 raise ParseException(strg,locn,"matched token not at column %d" % n)
3494 return verifyCol 3495
3496 -def replaceWith(replStr):
3497 """Helper method for common parse actions that simply return a literal value. Especially 3498 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3499 """ 3500 return lambda s,l,t: [replStr]
3501
3502 -def removeQuotes(s,l,t):
3503 """Helper parse action for removing quotation marks from parsed quoted strings. 3504 To use, add this parse action to quoted string using:: 3505 quotedString.setParseAction( removeQuotes ) 3506 """ 3507 return t[0][1:-1]
3508
3509 -def upcaseTokens(s,l,t):
3510 """Helper parse action to convert tokens to upper case.""" 3511 return [ tt.upper() for tt in map(_ustr,t) ]
3512
3513 -def downcaseTokens(s,l,t):
3514 """Helper parse action to convert tokens to lower case.""" 3515 return [ tt.lower() for tt in map(_ustr,t) ]
3516
3517 -def _makeTags(tagStr, xml):
3518 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3519 if isinstance(tagStr,basestring): 3520 resname = tagStr 3521 tagStr = Keyword(tagStr, caseless=not xml) 3522 else: 3523 resname = tagStr.name 3524 3525 tagAttrName = Word(alphas,alphanums+"_-:") 3526 if (xml): 3527 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3528 openTag = Suppress("<") + tagStr("tag") + \ 3529 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3530 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3531 else: 3532 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3533 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3534 openTag = Suppress("<") + tagStr("tag") + \ 3535 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3536 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3537 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3538 closeTag = Combine(_L("</") + tagStr + ">") 3539 3540 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3541 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3542 openTag.tag = resname 3543 closeTag.tag = resname 3544 return openTag, closeTag
3545
3546 -def makeHTMLTags(tagStr):
3547 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3548 return _makeTags( tagStr, False )
3549
3550 -def makeXMLTags(tagStr):
3551 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3552 return _makeTags( tagStr, True )
3553
3554 -def withAttribute(*args,**attrDict):
3555 """Helper to create a validating parse action to be used with start tags created 3556 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3557 with a required attribute value, to avoid false matches on common tags such as 3558 C{<TD>} or C{<DIV>}. 3559 3560 Call C{withAttribute} with a series of attribute names and values. Specify the list 3561 of filter attributes names and values as: 3562 - keyword arguments, as in C{(align="right")}, or 3563 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3564 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3565 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3566 For attribute names with a namespace prefix, you must use the second form. Attribute 3567 names are matched insensitive to upper/lower case. 3568 3569 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3570 3571 To verify that the attribute exists, but without specifying a value, pass 3572 C{withAttribute.ANY_VALUE} as the value. 3573 """ 3574 if args: 3575 attrs = args[:] 3576 else: 3577 attrs = attrDict.items() 3578 attrs = [(k,v) for k,v in attrs] 3579 def pa(s,l,tokens): 3580 for attrName,attrValue in attrs: 3581 if attrName not in tokens: 3582 raise ParseException(s,l,"no matching attribute " + attrName) 3583 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3584 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3585 (attrName, tokens[attrName], attrValue))
3586 return pa 3587 withAttribute.ANY_VALUE = object()
3588 3589 -def withClass(classname, namespace=''):
3590 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3591 difficult because C{class} is a reserved word in Python. 3592 """ 3593 classattr = "%s:class" % namespace if namespace else "class" 3594 return withAttribute(**{classattr : classname})
3595 3596 opAssoc = _Constants() 3597 opAssoc.LEFT = object() 3598 opAssoc.RIGHT = object()
3599 3600 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3601 """Helper method for constructing grammars of expressions made up of 3602 operators working in a precedence hierarchy. Operators may be unary or 3603 binary, left- or right-associative. Parse actions can also be attached 3604 to operator expressions. 3605 3606 Parameters: 3607 - baseExpr - expression representing the most basic element for the nested 3608 - opList - list of tuples, one for each operator precedence level in the 3609 expression grammar; each tuple is of the form 3610 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3611 - opExpr is the pyparsing expression for the operator; 3612 may also be a string, which will be converted to a Literal; 3613 if numTerms is 3, opExpr is a tuple of two expressions, for the 3614 two operators separating the 3 terms 3615 - numTerms is the number of terms for this operator (must 3616 be 1, 2, or 3) 3617 - rightLeftAssoc is the indicator whether the operator is 3618 right or left associative, using the pyparsing-defined 3619 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3620 - parseAction is the parse action to be associated with 3621 expressions matching this operator expression (the 3622 parse action tuple member may be omitted) 3623 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3624 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3625 """ 3626 ret = Forward() 3627 lastExpr = baseExpr | ( lpar + ret + rpar ) 3628 for i,operDef in enumerate(opList): 3629 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3630 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3631 if arity == 3: 3632 if opExpr is None or len(opExpr) != 2: 3633 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3634 opExpr1, opExpr2 = opExpr 3635 thisExpr = Forward().setName(termName) 3636 if rightLeftAssoc == opAssoc.LEFT: 3637 if arity == 1: 3638 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3639 elif arity == 2: 3640 if opExpr is not None: 3641 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3642 else: 3643 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3644 elif arity == 3: 3645 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3646 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3647 else: 3648 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3649 elif rightLeftAssoc == opAssoc.RIGHT: 3650 if arity == 1: 3651 # try to avoid LR with this extra test 3652 if not isinstance(opExpr, Optional): 3653 opExpr = Optional(opExpr) 3654 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3655 elif arity == 2: 3656 if opExpr is not None: 3657 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3658 else: 3659 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3660 elif arity == 3: 3661 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3662 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3663 else: 3664 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3665 else: 3666 raise ValueError("operator must indicate right or left associativity") 3667 if pa: 3668 matchExpr.setParseAction( pa ) 3669 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3670 lastExpr = thisExpr 3671 ret <<= lastExpr 3672 return ret
3673 operatorPrecedence = infixNotation 3674 3675 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3676 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3677 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3678 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3679 3680 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3681 """Helper method for defining nested lists enclosed in opening and closing 3682 delimiters ("(" and ")" are the default). 3683 3684 Parameters: 3685 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3686 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3687 - content - expression for items within the nested lists (default=None) 3688 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3689 3690 If an expression is not provided for the content argument, the nested 3691 expression will capture all whitespace-delimited content between delimiters 3692 as a list of separate values. 3693 3694 Use the C{ignoreExpr} argument to define expressions that may contain 3695 opening or closing characters that should not be treated as opening 3696 or closing characters for nesting, such as quotedString or a comment 3697 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3698 The default is L{quotedString}, but if no expressions are to be ignored, 3699 then pass C{None} for this argument. 3700 """ 3701 if opener == closer: 3702 raise ValueError("opening and closing strings cannot be the same") 3703 if content is None: 3704 if isinstance(opener,basestring) and isinstance(closer,basestring): 3705 if len(opener) == 1 and len(closer)==1: 3706 if ignoreExpr is not None: 3707 content = (Combine(OneOrMore(~ignoreExpr + 3708 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3709 ).setParseAction(lambda t:t[0].strip())) 3710 else: 3711 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3712 ).setParseAction(lambda t:t[0].strip())) 3713 else: 3714 if ignoreExpr is not None: 3715 content = (Combine(OneOrMore(~ignoreExpr + 3716 ~Literal(opener) + ~Literal(closer) + 3717 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3718 ).setParseAction(lambda t:t[0].strip())) 3719 else: 3720 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3721 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3722 ).setParseAction(lambda t:t[0].strip())) 3723 else: 3724 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3725 ret = Forward() 3726 if ignoreExpr is not None: 3727 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3728 else: 3729 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3730 ret.setName('nested %s%s expression' % (opener,closer)) 3731 return ret
3732
3733 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3734 """Helper method for defining space-delimited indentation blocks, such as 3735 those used to define block statements in Python source code. 3736 3737 Parameters: 3738 - blockStatementExpr - expression defining syntax of statement that 3739 is repeated within the indented block 3740 - indentStack - list created by caller to manage indentation stack 3741 (multiple statementWithIndentedBlock expressions within a single grammar 3742 should share a common indentStack) 3743 - indent - boolean indicating whether block must be indented beyond the 3744 the current level; set to False for block of left-most statements 3745 (default=True) 3746 3747 A valid block must contain at least one C{blockStatement}. 3748 """ 3749 def checkPeerIndent(s,l,t): 3750 if l >= len(s): return 3751 curCol = col(l,s) 3752 if curCol != indentStack[-1]: 3753 if curCol > indentStack[-1]: 3754 raise ParseFatalException(s,l,"illegal nesting") 3755 raise ParseException(s,l,"not a peer entry")
3756 3757 def checkSubIndent(s,l,t): 3758 curCol = col(l,s) 3759 if curCol > indentStack[-1]: 3760 indentStack.append( curCol ) 3761 else: 3762 raise ParseException(s,l,"not a subentry") 3763 3764 def checkUnindent(s,l,t): 3765 if l >= len(s): return 3766 curCol = col(l,s) 3767 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3768 raise ParseException(s,l,"not an unindent") 3769 indentStack.pop() 3770 3771 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3772 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3773 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3774 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3775 if indent: 3776 smExpr = Group( Optional(NL) + 3777 #~ FollowedBy(blockStatementExpr) + 3778 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3779 else: 3780 smExpr = Group( Optional(NL) + 3781 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3782 blockStatementExpr.ignore(_bslash + LineEnd()) 3783 return smExpr.setName('indented block') 3784 3785 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3786 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3787 3788 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3789 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3790 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3791 -def replaceHTMLEntity(t):
3792 """Helper parser action to replace common HTML entities with their special characters""" 3793 return _htmlEntityMap.get(t.entity)
3794 3795 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3796 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3797 3798 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3799 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3800 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3801 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3802 3803 javaStyleComment = cppStyleComment 3804 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3805 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3806 Optional( Word(" \t") + 3807 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3808 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3809 3810 3811 if __name__ == "__main__": 3812 3813 selectToken = CaselessLiteral( "select" ) 3814 fromToken = CaselessLiteral( "from" ) 3815 3816 ident = Word( alphas, alphanums + "_$" ) 3817 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3818 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3819 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3820 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3821 simpleSQL = ( selectToken + \ 3822 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3823 fromToken + \ 3824 tableNameList.setResultsName( "tables" ) ) 3825 3826 simpleSQL.runTests("""\ 3827 SELECT * from XYZZY, ABC 3828 select * from SYS.XYZZY 3829 Select A from Sys.dual 3830 Select AA,BB,CC from Sys.dual 3831 Select A, B, C from Sys.dual 3832 Select A, B, C from Sys.dual 3833 Xelect A, B, C from Sys.dual 3834 Select A, B, C frox Sys.dual 3835 Select 3836 Select ^^^ frox Sys.dual 3837 Select A, B, C from Sys.dual, Table2""") 3838