| Package pyparsing ::
        Module pyparsing |  | 
   1   
   2   
   3   
   4   
   5   
   6   
   7   
   8   
   9   
  10   
  11   
  12   
  13   
  14   
  15   
  16   
  17   
  18   
  19   
  20   
  21   
  22   
  23   
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.0.2" 
  61  __versionTime__ = "13 April 2014 11:10" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73   
  74   
  75  __all__ = [ 
  76  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  77  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  78  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  79  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  80  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  81  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  82  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  83  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  84  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  85  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  86  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  87  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  88  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  89  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  90  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  91  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  92  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 
  93  ] 
  94   
  95  PY_3 = sys.version.startswith('3') 
  96  if PY_3: 
  97      _MAX_INT = sys.maxsize 
  98      basestring = str 
  99      unichr = chr 
 100      _ustr = str 
 101   
 102       
 103      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 104   
 105  else: 
 106      _MAX_INT = sys.maxint 
 107      range = xrange 
 108   
 110          """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 
 111             str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 
 112             then < returns the unicode object | encodes it with the default encoding | ... >. 
 113          """ 
 114          if isinstance(obj,unicode): 
 115              return obj 
 116   
 117          try: 
 118               
 119               
 120              return str(obj) 
 121   
 122          except UnicodeEncodeError: 
 123               
 124               
 125               
 126               
 127               
 128              return unicode(obj) 
  129               
 130               
 131               
 132               
 133               
 134               
 135   
 136       
 137      singleArgBuiltins = [] 
 138      import __builtin__ 
 139      for fname in "sum len sorted reversed list tuple set any all min max".split(): 
 140          try: 
 141              singleArgBuiltins.append(getattr(__builtin__,fname)) 
 142          except AttributeError: 
 143              continue 
 144               
 145  _generatorType = type((y for y in range(1))) 
 146    
 148      """Escape &, <, >, ", ', etc. in a string of data.""" 
 149   
 150       
 151      from_symbols = '&><"\'' 
 152      to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 
 153      for from_,to_ in zip(from_symbols, to_symbols): 
 154          data = data.replace(from_, to_) 
 155      return data 
  156   
 159   
 160  alphas = string.ascii_lowercase + string.ascii_uppercase 
 161  nums       = "0123456789" 
 162  hexnums    = nums + "ABCDEFabcdef" 
 163  alphanums  = alphas + nums 
 164  _bslash    = chr(92) 
 165  printables = "".join(c for c in string.printable if c not in string.whitespace) 
 166   
 168      """base exception class for all parsing runtime exceptions""" 
 169       
 170       
 171 -    def __init__( self, pstr, loc=0, msg=None, elem=None ): 
  172          self.loc = loc 
 173          if msg is None: 
 174              self.msg = pstr 
 175              self.pstr = "" 
 176          else: 
 177              self.msg = msg 
 178              self.pstr = pstr 
 179          self.parserElement = elem 
  180   
 182          """supported attributes by name are: 
 183              - lineno - returns the line number of the exception text 
 184              - col - returns the column number of the exception text 
 185              - line - returns the line containing the exception text 
 186          """ 
 187          if( aname == "lineno" ): 
 188              return lineno( self.loc, self.pstr ) 
 189          elif( aname in ("col", "column") ): 
 190              return col( self.loc, self.pstr ) 
 191          elif( aname == "line" ): 
 192              return line( self.loc, self.pstr ) 
 193          else: 
 194              raise AttributeError(aname) 
  195   
 197          return "%s (at char %d), (line:%d, col:%d)" % \ 
 198                  ( self.msg, self.loc, self.lineno, self.column ) 
  212          return "loc msg pstr parserElement lineno col line " \ 
 213                 "markInputline __str__ __repr__".split() 
   214   
 216      """exception thrown when parse expressions don't match class; 
 217         supported attributes by name are: 
 218          - lineno - returns the line number of the exception text 
 219          - col - returns the column number of the exception text 
 220          - line - returns the line containing the exception text 
 221      """ 
 222      pass 
  223   
 225      """user-throwable exception thrown when inconsistent parse content 
 226         is found; stops all parsing immediately""" 
 227      pass 
  228   
 230      """just like C{L{ParseFatalException}}, but thrown internally when an 
 231         C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 
 232         an unbacktrackable syntax error has been found""" 
  236   
 237   
 238       
 239          
 240           
 241           
 242          
 243          
 244          
 245          
 246       
 247           
 248           
 249   
 251      """exception thrown by C{validate()} if the grammar could be improperly recursive""" 
 252 -    def __init__( self, parseElementList ): 
  253          self.parseElementTrace = parseElementList 
  254   
 256          return "RecursiveGrammarException: %s" % self.parseElementTrace 
   257   
 264          return repr(self.tup) 
  266          self.tup = (self.tup[0],i) 
  269      """Structured parse results, to provide multiple means of access to the parsed data: 
 270         - as a list (C{len(results)}) 
 271         - by list index (C{results[0], results[1]}, etc.) 
 272         - by attribute (C{results.<resultsName>}) 
 273         """ 
 274 -    def __new__(cls, toklist, name=None, asList=True, modal=True ): 
  275          if isinstance(toklist, cls): 
 276              return toklist 
 277          retobj = object.__new__(cls) 
 278          retobj.__doinit = True 
 279          return retobj 
  280   
 281       
 282       
 283 -    def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ): 
  284          if self.__doinit: 
 285              self.__doinit = False 
 286              self.__name = None 
 287              self.__parent = None 
 288              self.__accumNames = {} 
 289              if isinstance(toklist, list): 
 290                  self.__toklist = toklist[:] 
 291              elif isinstance(toklist, _generatorType): 
 292                  self.__toklist = list(toklist) 
 293              else: 
 294                  self.__toklist = [toklist] 
 295              self.__tokdict = dict() 
 296   
 297          if name is not None and name: 
 298              if not modal: 
 299                  self.__accumNames[name] = 0 
 300              if isinstance(name,int): 
 301                  name = _ustr(name)  
 302              self.__name = name 
 303              if not toklist in (None,'',[]): 
 304                  if isinstance(toklist,basestring): 
 305                      toklist = [ toklist ] 
 306                  if asList: 
 307                      if isinstance(toklist,ParseResults): 
 308                          self[name] = _ParseResultsWithOffset(toklist.copy(),0) 
 309                      else: 
 310                          self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 
 311                      self[name].__name = name 
 312                  else: 
 313                      try: 
 314                          self[name] = toklist[0] 
 315                      except (KeyError,TypeError,IndexError): 
 316                          self[name] = toklist 
  317   
 319          if isinstance( i, (int,slice) ): 
 320              return self.__toklist[i] 
 321          else: 
 322              if i not in self.__accumNames: 
 323                  return self.__tokdict[i][-1][0] 
 324              else: 
 325                  return ParseResults([ v[0] for v in self.__tokdict[i] ]) 
  326   
 328          if isinstance(v,_ParseResultsWithOffset): 
 329              self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 
 330              sub = v[0] 
 331          elif isinstance(k,int): 
 332              self.__toklist[k] = v 
 333              sub = v 
 334          else: 
 335              self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 
 336              sub = v 
 337          if isinstance(sub,ParseResults): 
 338              sub.__parent = wkref(self) 
  339   
 341          if isinstance(i,(int,slice)): 
 342              mylen = len( self.__toklist ) 
 343              del self.__toklist[i] 
 344   
 345               
 346              if isinstance(i, int): 
 347                  if i < 0: 
 348                      i += mylen 
 349                  i = slice(i, i+1) 
 350               
 351              removed = list(range(*i.indices(mylen))) 
 352              removed.reverse() 
 353               
 354              for name in self.__tokdict: 
 355                  occurrences = self.__tokdict[name] 
 356                  for j in removed: 
 357                      for k, (value, position) in enumerate(occurrences): 
 358                          occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 
 359          else: 
 360              del self.__tokdict[i] 
  361   
 363          return k in self.__tokdict 
  364   
 365 -    def __len__( self ): return len( self.__toklist ) 
  366 -    def __bool__(self): return len( self.__toklist ) > 0 
  367      __nonzero__ = __bool__ 
 368 -    def __iter__( self ): return iter( self.__toklist ) 
  369 -    def __reversed__( self ): return iter( self.__toklist[::-1] ) 
  371          """Returns all named result keys.""" 
 372          if hasattr(self.__tokdict, "iterkeys"): 
 373              return self.__tokdict.iterkeys() 
 374          else: 
 375              return iter(self.__tokdict) 
  376   
 378          """Returns all named result values.""" 
 379          return (self[k] for k in self.iterkeys()) 
  380               
 382          return ((k, self[k]) for k in self.iterkeys()) 
  383   
 384      if PY_3: 
 385          keys = iterkeys 
 386          values = itervalues 
 387          items = iteritems 
 388      else: 
 390              """Returns all named result keys.""" 
 391              return list(self.iterkeys()) 
  392   
 394              """Returns all named result values.""" 
 395              return list(self.itervalues()) 
  396                   
 398              """Returns all named result keys and values as a list of tuples.""" 
 399              return list(self.iteritems()) 
  400   
 402          """Since keys() returns an iterator, this method is helpful in bypassing 
 403             code that looks for the existence of any defined results names.""" 
 404          return bool(self.__tokdict) 
  405           
 406 -    def pop( self, *args, **kwargs): 
  407          """Removes and returns item at specified index (default=last). 
 408             Supports both list and dict semantics for pop(). If passed no 
 409             argument or an integer argument, it will use list semantics 
 410             and pop tokens from the list of parsed tokens. If passed a  
 411             non-integer argument (most likely a string), it will use dict 
 412             semantics and pop the corresponding value from any defined  
 413             results names. A second default return value argument is  
 414             supported, just as in dict.pop().""" 
 415          if not args: 
 416              args = [-1] 
 417          if 'default' in kwargs: 
 418              args.append(kwargs['default']) 
 419          if (isinstance(args[0], int) or  
 420                          len(args) == 1 or  
 421                          args[0] in self): 
 422              ret = self[index] 
 423              del self[index] 
 424              return ret 
 425          else: 
 426              defaultvalue = args[1] 
 427              return defaultvalue 
  428   
 429 -    def get(self, key, defaultValue=None): 
  430          """Returns named result matching the given key, or if there is no 
 431             such name, then returns the given C{defaultValue} or C{None} if no 
 432             C{defaultValue} is specified.""" 
 433          if key in self: 
 434              return self[key] 
 435          else: 
 436              return defaultValue 
  437   
 438 -    def insert( self, index, insStr ): 
  439          """Inserts new element at location index in the list of parsed tokens.""" 
 440          self.__toklist.insert(index, insStr) 
 441           
 442          for name in self.__tokdict: 
 443              occurrences = self.__tokdict[name] 
 444              for k, (value, position) in enumerate(occurrences): 
 445                  occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 
  446   
 448          """Add single element to end of ParseResults list of elements.""" 
 449          self.__toklist.append(item) 
  450   
 452          """Add sequence of elements to end of ParseResults list of elements.""" 
 453          if isinstance(itemseq, ParseResults): 
 454              self += itemseq 
 455          else: 
 456              self.__toklist.extend(itemseq) 
  457   
 459          """Clear all elements and results names.""" 
 460          del self.__toklist[:] 
 461          self.__tokdict.clear() 
  462   
 464          try: 
 465              return self[name] 
 466          except KeyError: 
 467              return "" 
 468               
 469          if name in self.__tokdict: 
 470              if name not in self.__accumNames: 
 471                  return self.__tokdict[name][-1][0] 
 472              else: 
 473                  return ParseResults([ v[0] for v in self.__tokdict[name] ]) 
 474          else: 
 475              return "" 
  476   
 478          ret = self.copy() 
 479          ret += other 
 480          return ret 
  481   
 483          if other.__tokdict: 
 484              offset = len(self.__toklist) 
 485              addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 
 486              otheritems = other.__tokdict.items() 
 487              otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 
 488                                  for (k,vlist) in otheritems for v in vlist] 
 489              for k,v in otherdictitems: 
 490                  self[k] = v 
 491                  if isinstance(v[0],ParseResults): 
 492                      v[0].__parent = wkref(self) 
 493               
 494          self.__toklist += other.__toklist 
 495          self.__accumNames.update( other.__accumNames ) 
 496          return self 
  497   
 499          if isinstance(other,int) and other == 0: 
 500              return self.copy() 
  501           
 503          return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) ) 
  504   
 506          out = [] 
 507          for i in self.__toklist: 
 508              if isinstance(i, ParseResults): 
 509                  out.append(_ustr(i)) 
 510              else: 
 511                  out.append(repr(i)) 
 512          return '[' + ', '.join(out) + ']' 
  513   
 515          out = [] 
 516          for item in self.__toklist: 
 517              if out and sep: 
 518                  out.append(sep) 
 519              if isinstance( item, ParseResults ): 
 520                  out += item._asStringList() 
 521              else: 
 522                  out.append( _ustr(item) ) 
 523          return out 
  524   
 526          """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 
 527          out = [] 
 528          for res in self.__toklist: 
 529              if isinstance(res,ParseResults): 
 530                  out.append( res.asList() ) 
 531              else: 
 532                  out.append( res ) 
 533          return out 
  534   
 536          """Returns the named parse results as dictionary.""" 
 537          if PY_3: 
 538              return dict( self.items() ) 
 539          else: 
 540              return dict( self.iteritems() ) 
  541   
 543          """Returns a new copy of a C{ParseResults} object.""" 
 544          ret = ParseResults( self.__toklist ) 
 545          ret.__tokdict = self.__tokdict.copy() 
 546          ret.__parent = self.__parent 
 547          ret.__accumNames.update( self.__accumNames ) 
 548          ret.__name = self.__name 
 549          return ret 
  550   
 551 -    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ): 
  552          """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 
 553          nl = "\n" 
 554          out = [] 
 555          namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 
 556                                                              for v in vlist) 
 557          nextLevelIndent = indent + "  " 
 558   
 559           
 560          if not formatted: 
 561              indent = "" 
 562              nextLevelIndent = "" 
 563              nl = "" 
 564   
 565          selfTag = None 
 566          if doctag is not None: 
 567              selfTag = doctag 
 568          else: 
 569              if self.__name: 
 570                  selfTag = self.__name 
 571   
 572          if not selfTag: 
 573              if namedItemsOnly: 
 574                  return "" 
 575              else: 
 576                  selfTag = "ITEM" 
 577   
 578          out += [ nl, indent, "<", selfTag, ">" ] 
 579   
 580          worklist = self.__toklist 
 581          for i,res in enumerate(worklist): 
 582              if isinstance(res,ParseResults): 
 583                  if i in namedItems: 
 584                      out += [ res.asXML(namedItems[i], 
 585                                          namedItemsOnly and doctag is None, 
 586                                          nextLevelIndent, 
 587                                          formatted)] 
 588                  else: 
 589                      out += [ res.asXML(None, 
 590                                          namedItemsOnly and doctag is None, 
 591                                          nextLevelIndent, 
 592                                          formatted)] 
 593              else: 
 594                   
 595                  resTag = None 
 596                  if i in namedItems: 
 597                      resTag = namedItems[i] 
 598                  if not resTag: 
 599                      if namedItemsOnly: 
 600                          continue 
 601                      else: 
 602                          resTag = "ITEM" 
 603                  xmlBodyText = _xml_escape(_ustr(res)) 
 604                  out += [ nl, nextLevelIndent, "<", resTag, ">", 
 605                                                  xmlBodyText, 
 606                                                  "</", resTag, ">" ] 
 607   
 608          out += [ nl, indent, "</", selfTag, ">" ] 
 609          return "".join(out) 
  610   
 612          for k,vlist in self.__tokdict.items(): 
 613              for v,loc in vlist: 
 614                  if sub is v: 
 615                      return k 
 616          return None 
  617   
 619          """Returns the results name for this token expression.""" 
 620          if self.__name: 
 621              return self.__name 
 622          elif self.__parent: 
 623              par = self.__parent() 
 624              if par: 
 625                  return par.__lookup(self) 
 626              else: 
 627                  return None 
 628          elif (len(self) == 1 and 
 629                 len(self.__tokdict) == 1 and 
 630                 self.__tokdict.values()[0][0][1] in (0,-1)): 
 631              return self.__tokdict.keys()[0] 
 632          else: 
 633              return None 
  634   
 635 -    def dump(self,indent='',depth=0): 
  636          """Diagnostic method for listing out the contents of a C{ParseResults}. 
 637             Accepts an optional C{indent} argument so that this string can be embedded 
 638             in a nested display of other data.""" 
 639          out = [] 
 640          out.append( indent+_ustr(self.asList()) ) 
 641          items = sorted(self.items()) 
 642          for k,v in items: 
 643              if out: 
 644                  out.append('\n') 
 645              out.append( "%s%s- %s: " % (indent,('  '*depth), k) ) 
 646              if isinstance(v,ParseResults): 
 647                  if v.haskeys(): 
 648                      out.append( v.dump(indent,depth+1) ) 
 649                  else: 
 650                      out.append(_ustr(v)) 
 651              else: 
 652                  out.append(_ustr(v)) 
 653          return "".join(out) 
  654   
 655 -    def pprint(self, *args, **kwargs): 
  656          """Pretty-printer for parsed results as a list, using the C{pprint} module. 
 657             Accepts additional positional or keyword args as defined for the  
 658             C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 
 659          pprint.pprint(self.asList(), *args, **kwargs) 
  660   
 661       
 663          return ( self.__toklist, 
 664                   ( self.__tokdict.copy(), 
 665                     self.__parent is not None and self.__parent() or None, 
 666                     self.__accumNames, 
 667                     self.__name ) ) 
  668   
 670          self.__toklist = state[0] 
 671          (self.__tokdict, 
 672           par, 
 673           inAccumNames, 
 674           self.__name) = state[1] 
 675          self.__accumNames = {} 
 676          self.__accumNames.update(inAccumNames) 
 677          if par is not None: 
 678              self.__parent = wkref(par) 
 679          else: 
 680              self.__parent = None 
  681   
  684   
 685  collections.MutableMapping.register(ParseResults) 
 686   
 688      """Returns current column within a string, counting newlines as line separators. 
 689     The first column is number 1. 
 690   
 691     Note: the default parsing behavior is to expand tabs in the input string 
 692     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 693     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 694     consistent view of the parsed string, the parse location, and line and column 
 695     positions within the parsed string. 
 696     """ 
 697      return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 
  698   
 700      """Returns current line number within a string, counting newlines as line separators. 
 701     The first line is number 1. 
 702   
 703     Note: the default parsing behavior is to expand tabs in the input string 
 704     before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 
 705     on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 706     consistent view of the parsed string, the parse location, and line and column 
 707     positions within the parsed string. 
 708     """ 
 709      return strg.count("\n",0,loc) + 1 
  710   
 711 -def line( loc, strg ): 
  712      """Returns the line of text containing loc within a string, counting newlines as line separators. 
 713         """ 
 714      lastCR = strg.rfind("\n", 0, loc) 
 715      nextCR = strg.find("\n", loc) 
 716      if nextCR >= 0: 
 717          return strg[lastCR+1:nextCR] 
 718      else: 
 719          return strg[lastCR+1:] 
  720   
 722      print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))) 
  723   
 725      print ("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 
  726   
 728      print ("Exception raised:" + _ustr(exc)) 
  729   
 731      """'Do-nothing' debug action, to suppress debugging output during parsing.""" 
 732      pass 
  733   
 734   
 735   
 736   
 737       
 738           
 739       
 740       
 741       
 742           
 743           
 744               
 745                   
 746                   
 747                   
 748               
 749                   
 750                       
 751                   
 752                   
 753       
 754   
 755   
 756  'decorator to trim function calls to match the arity of the target' 
 758      if func in singleArgBuiltins: 
 759          return lambda s,l,t: func(t) 
 760      limit = [0] 
 761      foundArity = [False] 
 762      def wrapper(*args): 
 763          while 1: 
 764              try: 
 765                  ret = func(*args[limit[0]:]) 
 766                  foundArity[0] = True 
 767                  return ret 
 768              except TypeError: 
 769                  if limit[0] <= maxargs and not foundArity[0]: 
 770                      limit[0] += 1 
 771                      continue 
 772                  raise 
  773      return wrapper 
 774    
 776      """Abstract base level parser element class.""" 
 777      DEFAULT_WHITE_CHARS = " \n\t\r" 
 778      verbose_stacktrace = False 
 779   
 784      setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 
 785   
 787          """ 
 788          Set class to be used for inclusion of string literals into a parser. 
 789          """ 
 790          ParserElement.literalStringClass = cls 
  791      inlineLiteralsUsing = staticmethod(inlineLiteralsUsing) 
 792   
 794          self.parseAction = list() 
 795          self.failAction = None 
 796           
 797          self.strRepr = None 
 798          self.resultsName = None 
 799          self.saveAsList = savelist 
 800          self.skipWhitespace = True 
 801          self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 802          self.copyDefaultWhiteChars = True 
 803          self.mayReturnEmpty = False  
 804          self.keepTabs = False 
 805          self.ignoreExprs = list() 
 806          self.debug = False 
 807          self.streamlined = False 
 808          self.mayIndexError = True  
 809          self.errmsg = "" 
 810          self.modalResults = True  
 811          self.debugActions = ( None, None, None )  
 812          self.re = None 
 813          self.callPreparse = True  
 814          self.callDuringTry = False 
  815   
 817          """Make a copy of this C{ParserElement}.  Useful for defining different parse actions 
 818             for the same parsing pattern, using copies of the original parse element.""" 
 819          cpy = copy.copy( self ) 
 820          cpy.parseAction = self.parseAction[:] 
 821          cpy.ignoreExprs = self.ignoreExprs[:] 
 822          if self.copyDefaultWhiteChars: 
 823              cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 
 824          return cpy 
  825   
 827          """Define name for this expression, for use in debugging.""" 
 828          self.name = name 
 829          self.errmsg = "Expected " + self.name 
 830          if hasattr(self,"exception"): 
 831              self.exception.msg = self.errmsg 
 832          return self 
  833   
 835          """Define name for referencing matching tokens as a nested attribute 
 836             of the returned parse results. 
 837             NOTE: this returns a *copy* of the original C{ParserElement} object; 
 838             this is so that the client can define a basic element, such as an 
 839             integer, and reference it in multiple places with different names. 
 840              
 841             You can also set results names using the abbreviated syntax, 
 842             C{expr("name")} in place of C{expr.setResultsName("name")} -  
 843             see L{I{__call__}<__call__>}. 
 844          """ 
 845          newself = self.copy() 
 846          if name.endswith("*"): 
 847              name = name[:-1] 
 848              listAllMatches=True 
 849          newself.resultsName = name 
 850          newself.modalResults = not listAllMatches 
 851          return newself 
  852   
 854          """Method to invoke the Python pdb debugger when this element is 
 855             about to be parsed. Set C{breakFlag} to True to enable, False to 
 856             disable. 
 857          """ 
 858          if breakFlag: 
 859              _parseMethod = self._parse 
 860              def breaker(instring, loc, doActions=True, callPreParse=True): 
 861                  import pdb 
 862                  pdb.set_trace() 
 863                  return _parseMethod( instring, loc, doActions, callPreParse ) 
  864              breaker._originalParseMethod = _parseMethod 
 865              self._parse = breaker 
 866          else: 
 867              if hasattr(self._parse,"_originalParseMethod"): 
 868                  self._parse = self._parse._originalParseMethod 
 869          return self 
  870   
 872          """Define action to perform when successfully matching parse element definition. 
 873             Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 
 874             C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 
 875              - s   = the original string being parsed (see note below) 
 876              - loc = the location of the matching substring 
 877              - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 
 878             If the functions in fns modify the tokens, they can return them as the return 
 879             value from fn, and the modified list of tokens will replace the original. 
 880             Otherwise, fn does not need to return any value. 
 881   
 882             Note: the default parsing behavior is to expand tabs in the input string 
 883             before starting the parsing process.  See L{I{parseString}<parseString>} for more information 
 884             on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 
 885             consistent view of the parsed string, the parse location, and line and column 
 886             positions within the parsed string. 
 887             """ 
 888          self.parseAction = list(map(_trim_arity, list(fns))) 
 889          self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 890          return self 
  891   
 893          """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 
 894          self.parseAction += list(map(_trim_arity, list(fns))) 
 895          self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 
 896          return self 
  897   
 899          """Define action to perform if parsing fails at this expression. 
 900             Fail acton fn is a callable function that takes the arguments 
 901             C{fn(s,loc,expr,err)} where: 
 902              - s = string being parsed 
 903              - loc = location where expression match was attempted and failed 
 904              - expr = the parse expression that failed 
 905              - err = the exception thrown 
 906             The function returns no value.  It may throw C{L{ParseFatalException}} 
 907             if it is desired to stop parsing immediately.""" 
 908          self.failAction = fn 
 909          return self 
  910   
 912          exprsFound = True 
 913          while exprsFound: 
 914              exprsFound = False 
 915              for e in self.ignoreExprs: 
 916                  try: 
 917                      while 1: 
 918                          loc,dummy = e._parse( instring, loc ) 
 919                          exprsFound = True 
 920                  except ParseException: 
 921                      pass 
 922          return loc 
  923   
 925          if self.ignoreExprs: 
 926              loc = self._skipIgnorables( instring, loc ) 
 927   
 928          if self.skipWhitespace: 
 929              wt = self.whiteChars 
 930              instrlen = len(instring) 
 931              while loc < instrlen and instring[loc] in wt: 
 932                  loc += 1 
 933   
 934          return loc 
  935   
 936 -    def parseImpl( self, instring, loc, doActions=True ): 
  938   
 939 -    def postParse( self, instring, loc, tokenlist ): 
  941   
 942       
 943 -    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ): 
  944          debugging = ( self.debug )  
 945   
 946          if debugging or self.failAction: 
 947               
 948              if (self.debugActions[0] ): 
 949                  self.debugActions[0]( instring, loc, self ) 
 950              if callPreParse and self.callPreparse: 
 951                  preloc = self.preParse( instring, loc ) 
 952              else: 
 953                  preloc = loc 
 954              tokensStart = preloc 
 955              try: 
 956                  try: 
 957                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 958                  except IndexError: 
 959                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 960              except ParseBaseException as err: 
 961                   
 962                  if self.debugActions[2]: 
 963                      self.debugActions[2]( instring, tokensStart, self, err ) 
 964                  if self.failAction: 
 965                      self.failAction( instring, tokensStart, self, err ) 
 966                  raise 
 967          else: 
 968              if callPreParse and self.callPreparse: 
 969                  preloc = self.preParse( instring, loc ) 
 970              else: 
 971                  preloc = loc 
 972              tokensStart = preloc 
 973              if self.mayIndexError or loc >= len(instring): 
 974                  try: 
 975                      loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 976                  except IndexError: 
 977                      raise ParseException( instring, len(instring), self.errmsg, self ) 
 978              else: 
 979                  loc,tokens = self.parseImpl( instring, preloc, doActions ) 
 980   
 981          tokens = self.postParse( instring, loc, tokens ) 
 982   
 983          retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 
 984          if self.parseAction and (doActions or self.callDuringTry): 
 985              if debugging: 
 986                  try: 
 987                      for fn in self.parseAction: 
 988                          tokens = fn( instring, tokensStart, retTokens ) 
 989                          if tokens is not None: 
 990                              retTokens = ParseResults( tokens, 
 991                                                        self.resultsName, 
 992                                                        asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
 993                                                        modal=self.modalResults ) 
 994                  except ParseBaseException as err: 
 995                       
 996                      if (self.debugActions[2] ): 
 997                          self.debugActions[2]( instring, tokensStart, self, err ) 
 998                      raise 
 999              else: 
1000                  for fn in self.parseAction: 
1001                      tokens = fn( instring, tokensStart, retTokens ) 
1002                      if tokens is not None: 
1003                          retTokens = ParseResults( tokens, 
1004                                                    self.resultsName, 
1005                                                    asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
1006                                                    modal=self.modalResults ) 
1007   
1008          if debugging: 
1009               
1010              if (self.debugActions[1] ): 
1011                  self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 
1012   
1013          return loc, retTokens 
 1014   
1020   
1021       
1022       
1023 -    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ): 
 1024          lookup = (self,instring,loc,callPreParse,doActions) 
1025          if lookup in ParserElement._exprArgCache: 
1026              value = ParserElement._exprArgCache[ lookup ] 
1027              if isinstance(value, Exception): 
1028                  raise value 
1029              return (value[0],value[1].copy()) 
1030          else: 
1031              try: 
1032                  value = self._parseNoCache( instring, loc, doActions, callPreParse ) 
1033                  ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 
1034                  return value 
1035              except ParseBaseException as pe: 
1036                  pe.__traceback__ = None 
1037                  ParserElement._exprArgCache[ lookup ] = pe 
1038                  raise 
 1039   
1040      _parse = _parseNoCache 
1041   
1042       
1043      _exprArgCache = {} 
1046      resetCache = staticmethod(resetCache) 
1047   
1048      _packratEnabled = False 
1050          """Enables "packrat" parsing, which adds memoizing to the parsing logic. 
1051             Repeated parse attempts at the same string location (which happens 
1052             often in many complex grammars) can immediately return a cached value, 
1053             instead of re-executing parsing/validating code.  Memoizing is done of 
1054             both valid results and parsing exceptions. 
1055   
1056             This speedup may break existing programs that use parse actions that 
1057             have side-effects.  For this reason, packrat parsing is disabled when 
1058             you first import pyparsing.  To activate the packrat feature, your 
1059             program must call the class method C{ParserElement.enablePackrat()}.  If 
1060             your program uses C{psyco} to "compile as you go", you must call 
1061             C{enablePackrat} before calling C{psyco.full()}.  If you do not do this, 
1062             Python will crash.  For best results, call C{enablePackrat()} immediately 
1063             after importing pyparsing. 
1064          """ 
1065          if not ParserElement._packratEnabled: 
1066              ParserElement._packratEnabled = True 
1067              ParserElement._parse = ParserElement._parseCache 
 1068      enablePackrat = staticmethod(enablePackrat) 
1069   
1071          """Execute the parse expression with the given string. 
1072             This is the main interface to the client code, once the complete 
1073             expression has been built. 
1074   
1075             If you want the grammar to require that the entire input string be 
1076             successfully parsed, then set C{parseAll} to True (equivalent to ending 
1077             the grammar with C{L{StringEnd()}}). 
1078   
1079             Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 
1080             in order to report proper column numbers in parse actions. 
1081             If the input string contains tabs and 
1082             the grammar uses parse actions that use the C{loc} argument to index into the 
1083             string being parsed, you can ensure you have a consistent view of the input 
1084             string by: 
1085              - calling C{parseWithTabs} on your grammar before calling C{parseString} 
1086                (see L{I{parseWithTabs}<parseWithTabs>}) 
1087              - define your parse action using the full C{(s,loc,toks)} signature, and 
1088                reference the input string using the parse action's C{s} argument 
1089              - explictly expand the tabs in your input string before calling 
1090                C{parseString} 
1091          """ 
1092          ParserElement.resetCache() 
1093          if not self.streamlined: 
1094              self.streamline() 
1095               
1096          for e in self.ignoreExprs: 
1097              e.streamline() 
1098          if not self.keepTabs: 
1099              instring = instring.expandtabs() 
1100          try: 
1101              loc, tokens = self._parse( instring, 0 ) 
1102              if parseAll: 
1103                  loc = self.preParse( instring, loc ) 
1104                  se = Empty() + StringEnd() 
1105                  se._parse( instring, loc ) 
1106          except ParseBaseException as exc: 
1107              if ParserElement.verbose_stacktrace: 
1108                  raise 
1109              else: 
1110                   
1111                  raise exc 
1112          else: 
1113              return tokens 
 1114   
1116          """Scan the input string for expression matches.  Each match will return the 
1117             matching tokens, start location, and end location.  May be called with optional 
1118             C{maxMatches} argument, to clip scanning after 'n' matches are found.  If 
1119             C{overlap} is specified, then overlapping matches will be reported. 
1120   
1121             Note that the start and end locations are reported relative to the string 
1122             being parsed.  See L{I{parseString}<parseString>} for more information on parsing 
1123             strings with embedded tabs.""" 
1124          if not self.streamlined: 
1125              self.streamline() 
1126          for e in self.ignoreExprs: 
1127              e.streamline() 
1128   
1129          if not self.keepTabs: 
1130              instring = _ustr(instring).expandtabs() 
1131          instrlen = len(instring) 
1132          loc = 0 
1133          preparseFn = self.preParse 
1134          parseFn = self._parse 
1135          ParserElement.resetCache() 
1136          matches = 0 
1137          try: 
1138              while loc <= instrlen and matches < maxMatches: 
1139                  try: 
1140                      preloc = preparseFn( instring, loc ) 
1141                      nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 
1142                  except ParseException: 
1143                      loc = preloc+1 
1144                  else: 
1145                      if nextLoc > loc: 
1146                          matches += 1 
1147                          yield tokens, preloc, nextLoc 
1148                          if overlap: 
1149                              nextloc = preparseFn( instring, loc ) 
1150                              if nextloc > loc: 
1151                                  loc = nextLoc 
1152                              else: 
1153                                  loc += 1 
1154                          else: 
1155                              loc = nextLoc 
1156                      else: 
1157                          loc = preloc+1 
1158          except ParseBaseException as exc: 
1159              if ParserElement.verbose_stacktrace: 
1160                  raise 
1161              else: 
1162                   
1163                  raise exc 
 1164   
1197   
1199          """Another extension to C{L{scanString}}, simplifying the access to the tokens found 
1200             to match the given parse expression.  May be called with optional 
1201             C{maxMatches} argument, to clip searching after 'n' matches are found. 
1202          """ 
1203          try: 
1204              return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 
1205          except ParseBaseException as exc: 
1206              if ParserElement.verbose_stacktrace: 
1207                  raise 
1208              else: 
1209                   
1210                  raise exc 
 1211   
1213          """Implementation of + operator - returns C{L{And}}""" 
1214          if isinstance( other, basestring ): 
1215              other = ParserElement.literalStringClass( other ) 
1216          if not isinstance( other, ParserElement ): 
1217              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1218                      SyntaxWarning, stacklevel=2) 
1219              return None 
1220          return And( [ self, other ] ) 
 1221   
1223          """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 
1224          if isinstance( other, basestring ): 
1225              other = ParserElement.literalStringClass( other ) 
1226          if not isinstance( other, ParserElement ): 
1227              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1228                      SyntaxWarning, stacklevel=2) 
1229              return None 
1230          return other + self 
 1231   
1233          """Implementation of - operator, returns C{L{And}} with error stop""" 
1234          if isinstance( other, basestring ): 
1235              other = ParserElement.literalStringClass( other ) 
1236          if not isinstance( other, ParserElement ): 
1237              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1238                      SyntaxWarning, stacklevel=2) 
1239              return None 
1240          return And( [ self, And._ErrorStop(), other ] ) 
 1241   
1243          """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 
1244          if isinstance( other, basestring ): 
1245              other = ParserElement.literalStringClass( other ) 
1246          if not isinstance( other, ParserElement ): 
1247              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1248                      SyntaxWarning, stacklevel=2) 
1249              return None 
1250          return other - self 
 1251   
1253          """Implementation of * operator, allows use of C{expr * 3} in place of 
1254             C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer 
1255             tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples 
1256             may also include C{None} as in: 
1257              - C{expr*(n,None)} or C{expr*(n,)} is equivalent 
1258                to C{expr*n + L{ZeroOrMore}(expr)} 
1259                (read as "at least n instances of C{expr}") 
1260              - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 
1261                (read as "0 to n instances of C{expr}") 
1262              - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 
1263              - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 
1264   
1265             Note that C{expr*(None,n)} does not raise an exception if 
1266             more than n exprs exist in the input stream; that is, 
1267             C{expr*(None,n)} does not enforce a maximum number of expr 
1268             occurrences.  If this behavior is desired, then write 
1269             C{expr*(None,n) + ~expr} 
1270   
1271          """ 
1272          if isinstance(other,int): 
1273              minElements, optElements = other,0 
1274          elif isinstance(other,tuple): 
1275              other = (other + (None, None))[:2] 
1276              if other[0] is None: 
1277                  other = (0, other[1]) 
1278              if isinstance(other[0],int) and other[1] is None: 
1279                  if other[0] == 0: 
1280                      return ZeroOrMore(self) 
1281                  if other[0] == 1: 
1282                      return OneOrMore(self) 
1283                  else: 
1284                      return self*other[0] + ZeroOrMore(self) 
1285              elif isinstance(other[0],int) and isinstance(other[1],int): 
1286                  minElements, optElements = other 
1287                  optElements -= minElements 
1288              else: 
1289                  raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 
1290          else: 
1291              raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 
1292   
1293          if minElements < 0: 
1294              raise ValueError("cannot multiply ParserElement by negative value") 
1295          if optElements < 0: 
1296              raise ValueError("second tuple value must be greater or equal to first tuple value") 
1297          if minElements == optElements == 0: 
1298              raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 
1299   
1300          if (optElements): 
1301              def makeOptionalList(n): 
1302                  if n>1: 
1303                      return Optional(self + makeOptionalList(n-1)) 
1304                  else: 
1305                      return Optional(self) 
 1306              if minElements: 
1307                  if minElements == 1: 
1308                      ret = self + makeOptionalList(optElements) 
1309                  else: 
1310                      ret = And([self]*minElements) + makeOptionalList(optElements) 
1311              else: 
1312                  ret = makeOptionalList(optElements) 
1313          else: 
1314              if minElements == 1: 
1315                  ret = self 
1316              else: 
1317                  ret = And([self]*minElements) 
1318          return ret 
1319   
1322   
1324          """Implementation of | operator - returns C{L{MatchFirst}}""" 
1325          if isinstance( other, basestring ): 
1326              other = ParserElement.literalStringClass( other ) 
1327          if not isinstance( other, ParserElement ): 
1328              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1329                      SyntaxWarning, stacklevel=2) 
1330              return None 
1331          return MatchFirst( [ self, other ] ) 
 1332   
1334          """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 
1335          if isinstance( other, basestring ): 
1336              other = ParserElement.literalStringClass( other ) 
1337          if not isinstance( other, ParserElement ): 
1338              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1339                      SyntaxWarning, stacklevel=2) 
1340              return None 
1341          return other | self 
 1342   
1344          """Implementation of ^ operator - returns C{L{Or}}""" 
1345          if isinstance( other, basestring ): 
1346              other = ParserElement.literalStringClass( other ) 
1347          if not isinstance( other, ParserElement ): 
1348              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1349                      SyntaxWarning, stacklevel=2) 
1350              return None 
1351          return Or( [ self, other ] ) 
 1352   
1354          """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 
1355          if isinstance( other, basestring ): 
1356              other = ParserElement.literalStringClass( other ) 
1357          if not isinstance( other, ParserElement ): 
1358              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1359                      SyntaxWarning, stacklevel=2) 
1360              return None 
1361          return other ^ self 
 1362   
1364          """Implementation of & operator - returns C{L{Each}}""" 
1365          if isinstance( other, basestring ): 
1366              other = ParserElement.literalStringClass( other ) 
1367          if not isinstance( other, ParserElement ): 
1368              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1369                      SyntaxWarning, stacklevel=2) 
1370              return None 
1371          return Each( [ self, other ] ) 
 1372   
1374          """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 
1375          if isinstance( other, basestring ): 
1376              other = ParserElement.literalStringClass( other ) 
1377          if not isinstance( other, ParserElement ): 
1378              warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 
1379                      SyntaxWarning, stacklevel=2) 
1380              return None 
1381          return other & self 
 1382   
1384          """Implementation of ~ operator - returns C{L{NotAny}}""" 
1385          return NotAny( self ) 
 1386   
1388          """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 
1389               userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 
1390             could be written as:: 
1391               userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 
1392                
1393             If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 
1394             passed as C{True}. 
1395              
1396             If C{name} is omitted, same as calling C{L{copy}}. 
1397             """ 
1398          if name is not None: 
1399              return self.setResultsName(name) 
1400          else: 
1401              return self.copy() 
 1402   
1404          """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 
1405             cluttering up returned output. 
1406          """ 
1407          return Suppress( self ) 
 1408   
1410          """Disables the skipping of whitespace before matching the characters in the 
1411             C{ParserElement}'s defined pattern.  This is normally only used internally by 
1412             the pyparsing module, but may be needed in some whitespace-sensitive grammars. 
1413          """ 
1414          self.skipWhitespace = False 
1415          return self 
 1416   
1418          """Overrides the default whitespace chars 
1419          """ 
1420          self.skipWhitespace = True 
1421          self.whiteChars = chars 
1422          self.copyDefaultWhiteChars = False 
1423          return self 
 1424   
1426          """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 
1427             Must be called before C{parseString} when the input grammar contains elements that 
1428             match C{<TAB>} characters.""" 
1429          self.keepTabs = True 
1430          return self 
 1431   
1433          """Define expression to be ignored (e.g., comments) while doing pattern 
1434             matching; may be called repeatedly, to define multiple comment or other 
1435             ignorable patterns. 
1436          """ 
1437          if isinstance( other, Suppress ): 
1438              if other not in self.ignoreExprs: 
1439                  self.ignoreExprs.append( other.copy() ) 
1440          else: 
1441              self.ignoreExprs.append( Suppress( other.copy() ) ) 
1442          return self 
 1443   
1444 -    def setDebugActions( self, startAction, successAction, exceptionAction ): 
 1445          """Enable display of debugging messages while doing pattern matching.""" 
1446          self.debugActions = (startAction or _defaultStartDebugAction, 
1447                               successAction or _defaultSuccessDebugAction, 
1448                               exceptionAction or _defaultExceptionDebugAction) 
1449          self.debug = True 
1450          return self 
 1451   
1453          """Enable display of debugging messages while doing pattern matching. 
1454             Set C{flag} to True to enable, False to disable.""" 
1455          if flag: 
1456              self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 
1457          else: 
1458              self.debug = False 
1459          return self 
 1460   
1463   
1466   
1468          self.streamlined = True 
1469          self.strRepr = None 
1470          return self 
 1471   
1474   
1475 -    def validate( self, validateTrace=[] ): 
 1476          """Check defined expressions for valid structure, check for infinite recursive definitions.""" 
1477          self.checkRecursion( [] ) 
 1478   
1479 -    def parseFile( self, file_or_filename, parseAll=False ): 
 1480          """Execute the parse expression on the given file or filename. 
1481             If a filename is specified (instead of a file object), 
1482             the entire file is opened, read, and closed before parsing. 
1483          """ 
1484          try: 
1485              file_contents = file_or_filename.read() 
1486          except AttributeError: 
1487              f = open(file_or_filename, "r") 
1488              file_contents = f.read() 
1489              f.close() 
1490          try: 
1491              return self.parseString(file_contents, parseAll) 
1492          except ParseBaseException as exc: 
1493              if ParserElement.verbose_stacktrace: 
1494                  raise 
1495              else: 
1496                   
1497                  raise exc 
 1498   
1500          if isinstance(other, ParserElement): 
1501              return self is other or self.__dict__ == other.__dict__ 
1502          elif isinstance(other, basestring): 
1503              try: 
1504                  self.parseString(_ustr(other), parseAll=True) 
1505                  return True 
1506              except ParseBaseException: 
1507                  return False 
1508          else: 
1509              return super(ParserElement,self)==other 
 1510   
1512          return not (self == other) 
 1513   
1515          return hash(id(self)) 
 1516   
1518          return self == other 
 1519   
1521          return not (self == other) 
 1522   
1523   
1524 -class Token(ParserElement): 
 1525      """Abstract C{ParserElement} subclass, for defining atomic matching patterns.""" 
1528   
1530          s = super(Token,self).setName(name) 
1531          self.errmsg = "Expected " + self.name 
1532          return s 
  1533   
1534   
1536      """An empty token, will always match.""" 
1538          super(Empty,self).__init__() 
1539          self.name = "Empty" 
1540          self.mayReturnEmpty = True 
1541          self.mayIndexError = False 
  1542   
1543   
1545      """A token that will never match.""" 
1547          super(NoMatch,self).__init__() 
1548          self.name = "NoMatch" 
1549          self.mayReturnEmpty = True 
1550          self.mayIndexError = False 
1551          self.errmsg = "Unmatchable token" 
 1552   
1553 -    def parseImpl( self, instring, loc, doActions=True ): 
  1555   
1556   
1558      """Token to exactly match a specified string.""" 
1560          super(Literal,self).__init__() 
1561          self.match = matchString 
1562          self.matchLen = len(matchString) 
1563          try: 
1564              self.firstMatchChar = matchString[0] 
1565          except IndexError: 
1566              warnings.warn("null string passed to Literal; use Empty() instead", 
1567                              SyntaxWarning, stacklevel=2) 
1568              self.__class__ = Empty 
1569          self.name = '"%s"' % _ustr(self.match) 
1570          self.errmsg = "Expected " + self.name 
1571          self.mayReturnEmpty = False 
1572          self.mayIndexError = False 
 1573   
1574       
1575       
1576       
1577       
1578 -    def parseImpl( self, instring, loc, doActions=True ): 
 1579          if (instring[loc] == self.firstMatchChar and 
1580              (self.matchLen==1 or instring.startswith(self.match,loc)) ): 
1581              return loc+self.matchLen, self.match 
1582          raise ParseException(instring, loc, self.errmsg, self) 
  1583  _L = Literal 
1584  ParserElement.literalStringClass = Literal 
1585   
1587      """Token to exactly match a specified string as a keyword, that is, it must be 
1588         immediately followed by a non-keyword character.  Compare with C{L{Literal}}:: 
1589           Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 
1590           Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 
1591         Accepts two optional constructor arguments in addition to the keyword string: 
1592         C{identChars} is a string of characters that would be valid identifier characters, 
1593         defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 
1594         matching, default is C{False}. 
1595      """ 
1596      DEFAULT_KEYWORD_CHARS = alphanums+"_$" 
1597   
1599          super(Keyword,self).__init__() 
1600          self.match = matchString 
1601          self.matchLen = len(matchString) 
1602          try: 
1603              self.firstMatchChar = matchString[0] 
1604          except IndexError: 
1605              warnings.warn("null string passed to Keyword; use Empty() instead", 
1606                              SyntaxWarning, stacklevel=2) 
1607          self.name = '"%s"' % self.match 
1608          self.errmsg = "Expected " + self.name 
1609          self.mayReturnEmpty = False 
1610          self.mayIndexError = False 
1611          self.caseless = caseless 
1612          if caseless: 
1613              self.caselessmatch = matchString.upper() 
1614              identChars = identChars.upper() 
1615          self.identChars = set(identChars) 
 1616   
1617 -    def parseImpl( self, instring, loc, doActions=True ): 
 1618          if self.caseless: 
1619              if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1620                   (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 
1621                   (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 
1622                  return loc+self.matchLen, self.match 
1623          else: 
1624              if (instring[loc] == self.firstMatchChar and 
1625                  (self.matchLen==1 or instring.startswith(self.match,loc)) and 
1626                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 
1627                  (loc == 0 or instring[loc-1] not in self.identChars) ): 
1628                  return loc+self.matchLen, self.match 
1629          raise ParseException(instring, loc, self.errmsg, self) 
 1630   
1635   
1640      setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 
 1641   
1643      """Token to match a specified string, ignoring case of letters. 
1644         Note: the matched results will always be in the case of the given 
1645         match string, NOT the case of the input text. 
1646      """ 
1648          super(CaselessLiteral,self).__init__( matchString.upper() ) 
1649           
1650          self.returnString = matchString 
1651          self.name = "'%s'" % self.returnString 
1652          self.errmsg = "Expected " + self.name 
 1653   
1654 -    def parseImpl( self, instring, loc, doActions=True ): 
 1655          if instring[ loc:loc+self.matchLen ].upper() == self.match: 
1656              return loc+self.matchLen, self.returnString 
1657          raise ParseException(instring, loc, self.errmsg, self) 
  1658   
1662   
1663 -    def parseImpl( self, instring, loc, doActions=True ): 
 1664          if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 
1665               (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 
1666              return loc+self.matchLen, self.match 
1667          raise ParseException(instring, loc, self.errmsg, self) 
  1668   
1670      """Token for matching words composed of allowed character sets. 
1671         Defined with string containing all allowed initial characters, 
1672         an optional string containing allowed body characters (if omitted, 
1673         defaults to the initial character set), and an optional minimum, 
1674         maximum, and/or exact length.  The default value for C{min} is 1 (a 
1675         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1676         are 0, meaning no maximum or exact length restriction. An optional 
1677         C{exclude} parameter can list characters that might be found in  
1678         the input C{bodyChars} string; useful to define a word of all printables 
1679         except for one or two characters, for instance. 
1680      """ 
1681 -    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ): 
 1682          super(Word,self).__init__() 
1683          if excludeChars: 
1684              initChars = ''.join(c for c in initChars if c not in excludeChars) 
1685              if bodyChars: 
1686                  bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 
1687          self.initCharsOrig = initChars 
1688          self.initChars = set(initChars) 
1689          if bodyChars : 
1690              self.bodyCharsOrig = bodyChars 
1691              self.bodyChars = set(bodyChars) 
1692          else: 
1693              self.bodyCharsOrig = initChars 
1694              self.bodyChars = set(initChars) 
1695   
1696          self.maxSpecified = max > 0 
1697   
1698          if min < 1: 
1699              raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 
1700   
1701          self.minLen = min 
1702   
1703          if max > 0: 
1704              self.maxLen = max 
1705          else: 
1706              self.maxLen = _MAX_INT 
1707   
1708          if exact > 0: 
1709              self.maxLen = exact 
1710              self.minLen = exact 
1711   
1712          self.name = _ustr(self) 
1713          self.errmsg = "Expected " + self.name 
1714          self.mayIndexError = False 
1715          self.asKeyword = asKeyword 
1716   
1717          if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 
1718              if self.bodyCharsOrig == self.initCharsOrig: 
1719                  self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 
1720              elif len(self.bodyCharsOrig) == 1: 
1721                  self.reString = "%s[%s]*" % \ 
1722                                        (re.escape(self.initCharsOrig), 
1723                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1724              else: 
1725                  self.reString = "[%s][%s]*" % \ 
1726                                        (_escapeRegexRangeChars(self.initCharsOrig), 
1727                                        _escapeRegexRangeChars(self.bodyCharsOrig),) 
1728              if self.asKeyword: 
1729                  self.reString = r"\b"+self.reString+r"\b" 
1730              try: 
1731                  self.re = re.compile( self.reString ) 
1732              except: 
1733                  self.re = None 
 1734   
1735 -    def parseImpl( self, instring, loc, doActions=True ): 
 1736          if self.re: 
1737              result = self.re.match(instring,loc) 
1738              if not result: 
1739                  raise ParseException(instring, loc, self.errmsg, self) 
1740   
1741              loc = result.end() 
1742              return loc, result.group() 
1743   
1744          if not(instring[ loc ] in self.initChars): 
1745              raise ParseException(instring, loc, self.errmsg, self) 
1746   
1747          start = loc 
1748          loc += 1 
1749          instrlen = len(instring) 
1750          bodychars = self.bodyChars 
1751          maxloc = start + self.maxLen 
1752          maxloc = min( maxloc, instrlen ) 
1753          while loc < maxloc and instring[loc] in bodychars: 
1754              loc += 1 
1755   
1756          throwException = False 
1757          if loc - start < self.minLen: 
1758              throwException = True 
1759          if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 
1760              throwException = True 
1761          if self.asKeyword: 
1762              if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 
1763                  throwException = True 
1764   
1765          if throwException: 
1766              raise ParseException(instring, loc, self.errmsg, self) 
1767   
1768          return loc, instring[start:loc] 
 1769   
1771          try: 
1772              return super(Word,self).__str__() 
1773          except: 
1774              pass 
1775   
1776   
1777          if self.strRepr is None: 
1778   
1779              def charsAsStr(s): 
1780                  if len(s)>4: 
1781                      return s[:4]+"..." 
1782                  else: 
1783                      return s 
 1784   
1785              if ( self.initCharsOrig != self.bodyCharsOrig ): 
1786                  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 
1787              else: 
1788                  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 
1789   
1790          return self.strRepr 
 1791   
1792   
1794      """Token for matching strings that match a given regular expression. 
1795         Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 
1796      """ 
1797      compiledREtype = type(re.compile("[A-Z]")) 
1798 -    def __init__( self, pattern, flags=0): 
 1799          """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 
1800          super(Regex,self).__init__() 
1801   
1802          if isinstance(pattern, basestring): 
1803              if len(pattern) == 0: 
1804                  warnings.warn("null string passed to Regex; use Empty() instead", 
1805                          SyntaxWarning, stacklevel=2) 
1806   
1807              self.pattern = pattern 
1808              self.flags = flags 
1809   
1810              try: 
1811                  self.re = re.compile(self.pattern, self.flags) 
1812                  self.reString = self.pattern 
1813              except sre_constants.error: 
1814                  warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
1815                      SyntaxWarning, stacklevel=2) 
1816                  raise 
1817   
1818          elif isinstance(pattern, Regex.compiledREtype): 
1819              self.re = pattern 
1820              self.pattern = \ 
1821              self.reString = str(pattern) 
1822              self.flags = flags 
1823               
1824          else: 
1825              raise ValueError("Regex may only be constructed with a string or a compiled RE object") 
1826   
1827          self.name = _ustr(self) 
1828          self.errmsg = "Expected " + self.name 
1829          self.mayIndexError = False 
1830          self.mayReturnEmpty = True 
 1831   
1832 -    def parseImpl( self, instring, loc, doActions=True ): 
 1833          result = self.re.match(instring,loc) 
1834          if not result: 
1835              raise ParseException(instring, loc, self.errmsg, self) 
1836   
1837          loc = result.end() 
1838          d = result.groupdict() 
1839          ret = ParseResults(result.group()) 
1840          if d: 
1841              for k in d: 
1842                  ret[k] = d[k] 
1843          return loc,ret 
 1844   
1846          try: 
1847              return super(Regex,self).__str__() 
1848          except: 
1849              pass 
1850   
1851          if self.strRepr is None: 
1852              self.strRepr = "Re:(%s)" % repr(self.pattern) 
1853   
1854          return self.strRepr 
  1855   
1856   
1858      """Token for matching strings that are delimited by quoting characters. 
1859      """ 
1860 -    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 
 1861          """ 
1862             Defined with the following parameters: 
1863              - quoteChar - string of one or more characters defining the quote delimiting string 
1864              - escChar - character to escape quotes, typically backslash (default=None) 
1865              - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 
1866              - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 
1867              - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 
1868              - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 
1869          """ 
1870          super(QuotedString,self).__init__() 
1871   
1872           
1873          quoteChar = quoteChar.strip() 
1874          if len(quoteChar) == 0: 
1875              warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1876              raise SyntaxError() 
1877   
1878          if endQuoteChar is None: 
1879              endQuoteChar = quoteChar 
1880          else: 
1881              endQuoteChar = endQuoteChar.strip() 
1882              if len(endQuoteChar) == 0: 
1883                  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 
1884                  raise SyntaxError() 
1885   
1886          self.quoteChar = quoteChar 
1887          self.quoteCharLen = len(quoteChar) 
1888          self.firstQuoteChar = quoteChar[0] 
1889          self.endQuoteChar = endQuoteChar 
1890          self.endQuoteCharLen = len(endQuoteChar) 
1891          self.escChar = escChar 
1892          self.escQuote = escQuote 
1893          self.unquoteResults = unquoteResults 
1894   
1895          if multiline: 
1896              self.flags = re.MULTILINE | re.DOTALL 
1897              self.pattern = r'%s(?:[^%s%s]' % \ 
1898                  ( re.escape(self.quoteChar), 
1899                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1900                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1901          else: 
1902              self.flags = 0 
1903              self.pattern = r'%s(?:[^%s\n\r%s]' % \ 
1904                  ( re.escape(self.quoteChar), 
1905                    _escapeRegexRangeChars(self.endQuoteChar[0]), 
1906                    (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 
1907          if len(self.endQuoteChar) > 1: 
1908              self.pattern += ( 
1909                  '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 
1910                                                 _escapeRegexRangeChars(self.endQuoteChar[i])) 
1911                                      for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 
1912                  ) 
1913          if escQuote: 
1914              self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 
1915          if escChar: 
1916              self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 
1917              charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-') 
1918              self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset) 
1919          self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 
1920   
1921          try: 
1922              self.re = re.compile(self.pattern, self.flags) 
1923              self.reString = self.pattern 
1924          except sre_constants.error: 
1925              warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
1926                  SyntaxWarning, stacklevel=2) 
1927              raise 
1928   
1929          self.name = _ustr(self) 
1930          self.errmsg = "Expected " + self.name 
1931          self.mayIndexError = False 
1932          self.mayReturnEmpty = True 
 1933   
1934 -    def parseImpl( self, instring, loc, doActions=True ): 
 1935          result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 
1936          if not result: 
1937              raise ParseException(instring, loc, self.errmsg, self) 
1938   
1939          loc = result.end() 
1940          ret = result.group() 
1941   
1942          if self.unquoteResults: 
1943   
1944               
1945              ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 
1946   
1947              if isinstance(ret,basestring): 
1948                   
1949                  if self.escChar: 
1950                      ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 
1951   
1952                   
1953                  if self.escQuote: 
1954                      ret = ret.replace(self.escQuote, self.endQuoteChar) 
1955   
1956          return loc, ret 
 1957   
1959          try: 
1960              return super(QuotedString,self).__str__() 
1961          except: 
1962              pass 
1963   
1964          if self.strRepr is None: 
1965              self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 
1966   
1967          return self.strRepr 
  1968   
1969   
1971      """Token for matching words composed of characters *not* in a given set. 
1972         Defined with string containing all disallowed characters, and an optional 
1973         minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a 
1974         minimum value < 1 is not valid); the default values for C{max} and C{exact} 
1975         are 0, meaning no maximum or exact length restriction. 
1976      """ 
1977 -    def __init__( self, notChars, min=1, max=0, exact=0 ): 
 1978          super(CharsNotIn,self).__init__() 
1979          self.skipWhitespace = False 
1980          self.notChars = notChars 
1981   
1982          if min < 1: 
1983              raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 
1984   
1985          self.minLen = min 
1986   
1987          if max > 0: 
1988              self.maxLen = max 
1989          else: 
1990              self.maxLen = _MAX_INT 
1991   
1992          if exact > 0: 
1993              self.maxLen = exact 
1994              self.minLen = exact 
1995   
1996          self.name = _ustr(self) 
1997          self.errmsg = "Expected " + self.name 
1998          self.mayReturnEmpty = ( self.minLen == 0 ) 
1999          self.mayIndexError = False 
 2000   
2001 -    def parseImpl( self, instring, loc, doActions=True ): 
 2002          if instring[loc] in self.notChars: 
2003              raise ParseException(instring, loc, self.errmsg, self) 
2004   
2005          start = loc 
2006          loc += 1 
2007          notchars = self.notChars 
2008          maxlen = min( start+self.maxLen, len(instring) ) 
2009          while loc < maxlen and \ 
2010                (instring[loc] not in notchars): 
2011              loc += 1 
2012   
2013          if loc - start < self.minLen: 
2014              raise ParseException(instring, loc, self.errmsg, self) 
2015   
2016          return loc, instring[start:loc] 
 2017   
2019          try: 
2020              return super(CharsNotIn, self).__str__() 
2021          except: 
2022              pass 
2023   
2024          if self.strRepr is None: 
2025              if len(self.notChars) > 4: 
2026                  self.strRepr = "!W:(%s...)" % self.notChars[:4] 
2027              else: 
2028                  self.strRepr = "!W:(%s)" % self.notChars 
2029   
2030          return self.strRepr 
  2031   
2033      """Special matching class for matching whitespace.  Normally, whitespace is ignored 
2034         by pyparsing grammars.  This class is included when some whitespace structures 
2035         are significant.  Define with a string containing the whitespace characters to be 
2036         matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments, 
2037         as defined for the C{L{Word}} class.""" 
2038      whiteStrs = { 
2039          " " : "<SPC>", 
2040          "\t": "<TAB>", 
2041          "\n": "<LF>", 
2042          "\r": "<CR>", 
2043          "\f": "<FF>", 
2044          } 
2045 -    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 
 2046          super(White,self).__init__() 
2047          self.matchWhite = ws 
2048          self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 
2049           
2050          self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 
2051          self.mayReturnEmpty = True 
2052          self.errmsg = "Expected " + self.name 
2053   
2054          self.minLen = min 
2055   
2056          if max > 0: 
2057              self.maxLen = max 
2058          else: 
2059              self.maxLen = _MAX_INT 
2060   
2061          if exact > 0: 
2062              self.maxLen = exact 
2063              self.minLen = exact 
 2064   
2065 -    def parseImpl( self, instring, loc, doActions=True ): 
 2066          if not(instring[ loc ] in self.matchWhite): 
2067              raise ParseException(instring, loc, self.errmsg, self) 
2068          start = loc 
2069          loc += 1 
2070          maxloc = start + self.maxLen 
2071          maxloc = min( maxloc, len(instring) ) 
2072          while loc < maxloc and instring[loc] in self.matchWhite: 
2073              loc += 1 
2074   
2075          if loc - start < self.minLen: 
2076              raise ParseException(instring, loc, self.errmsg, self) 
2077   
2078          return loc, instring[start:loc] 
  2079   
2080   
2083          super(_PositionToken,self).__init__() 
2084          self.name=self.__class__.__name__ 
2085          self.mayReturnEmpty = True 
2086          self.mayIndexError = False 
 2089      """Token to advance to a specific column of input text; useful for tabular report scraping.""" 
2093   
2095          if col(loc,instring) != self.col: 
2096              instrlen = len(instring) 
2097              if self.ignoreExprs: 
2098                  loc = self._skipIgnorables( instring, loc ) 
2099              while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 
2100                  loc += 1 
2101          return loc 
 2102   
2103 -    def parseImpl( self, instring, loc, doActions=True ): 
 2104          thiscol = col( loc, instring ) 
2105          if thiscol > self.col: 
2106              raise ParseException( instring, loc, "Text not in expected column", self ) 
2107          newloc = loc + self.col - thiscol 
2108          ret = instring[ loc: newloc ] 
2109          return newloc, ret 
  2110   
2112      """Matches if current position is at the beginning of a line within the parse string""" 
2117   
2119          preloc = super(LineStart,self).preParse(instring,loc) 
2120          if instring[preloc] == "\n": 
2121              loc += 1 
2122          return loc 
 2123   
2124 -    def parseImpl( self, instring, loc, doActions=True ): 
 2125          if not( loc==0 or 
2126              (loc == self.preParse( instring, 0 )) or 
2127              (instring[loc-1] == "\n") ):  
2128              raise ParseException(instring, loc, self.errmsg, self) 
2129          return loc, [] 
  2130   
2132      """Matches if current position is at the end of a line within the parse string""" 
2137   
2138 -    def parseImpl( self, instring, loc, doActions=True ): 
 2139          if loc<len(instring): 
2140              if instring[loc] == "\n": 
2141                  return loc+1, "\n" 
2142              else: 
2143                  raise ParseException(instring, loc, self.errmsg, self) 
2144          elif loc == len(instring): 
2145              return loc+1, [] 
2146          else: 
2147              raise ParseException(instring, loc, self.errmsg, self) 
  2148   
2150      """Matches if current position is at the beginning of the parse string""" 
2154   
2155 -    def parseImpl( self, instring, loc, doActions=True ): 
 2156          if loc != 0: 
2157               
2158              if loc != self.preParse( instring, 0 ): 
2159                  raise ParseException(instring, loc, self.errmsg, self) 
2160          return loc, [] 
  2161   
2163      """Matches if current position is at the end of the parse string""" 
2167   
2168 -    def parseImpl( self, instring, loc, doActions=True ): 
 2169          if loc < len(instring): 
2170              raise ParseException(instring, loc, self.errmsg, self) 
2171          elif loc == len(instring): 
2172              return loc+1, [] 
2173          elif loc > len(instring): 
2174              return loc, [] 
2175          else: 
2176              raise ParseException(instring, loc, self.errmsg, self) 
  2177   
2179      """Matches if the current position is at the beginning of a Word, and 
2180         is not preceded by any character in a given set of C{wordChars} 
2181         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2182         use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 
2183         the string being parsed, or at the beginning of a line. 
2184      """ 
2186          super(WordStart,self).__init__() 
2187          self.wordChars = set(wordChars) 
2188          self.errmsg = "Not at the start of a word" 
 2189   
2190 -    def parseImpl(self, instring, loc, doActions=True ): 
 2191          if loc != 0: 
2192              if (instring[loc-1] in self.wordChars or 
2193                  instring[loc] not in self.wordChars): 
2194                  raise ParseException(instring, loc, self.errmsg, self) 
2195          return loc, [] 
  2196   
2198      """Matches if the current position is at the end of a Word, and 
2199         is not followed by any character in a given set of C{wordChars} 
2200         (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 
2201         use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 
2202         the string being parsed, or at the end of a line. 
2203      """ 
2205          super(WordEnd,self).__init__() 
2206          self.wordChars = set(wordChars) 
2207          self.skipWhitespace = False 
2208          self.errmsg = "Not at the end of a word" 
 2209   
2210 -    def parseImpl(self, instring, loc, doActions=True ): 
 2211          instrlen = len(instring) 
2212          if instrlen>0 and loc<instrlen: 
2213              if (instring[loc] in self.wordChars or 
2214                  instring[loc-1] not in self.wordChars): 
2215                  raise ParseException(instring, loc, self.errmsg, self) 
2216          return loc, [] 
  2217   
2218   
2220      """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 
2221 -    def __init__( self, exprs, savelist = False ): 
 2222          super(ParseExpression,self).__init__(savelist) 
2223          if isinstance( exprs, _generatorType ): 
2224              exprs = list(exprs) 
2225   
2226          if isinstance( exprs, basestring ): 
2227              self.exprs = [ Literal( exprs ) ] 
2228          elif isinstance( exprs, collections.Sequence ): 
2229               
2230              if all(isinstance(expr, basestring) for expr in exprs): 
2231                  exprs = map(Literal, exprs) 
2232              self.exprs = list(exprs) 
2233          else: 
2234              try: 
2235                  self.exprs = list( exprs ) 
2236              except TypeError: 
2237                  self.exprs = [ exprs ] 
2238          self.callPreparse = False 
 2239   
2241          return self.exprs[i] 
 2242   
2244          self.exprs.append( other ) 
2245          self.strRepr = None 
2246          return self 
 2247   
2249          """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 
2250             all contained expressions.""" 
2251          self.skipWhitespace = False 
2252          self.exprs = [ e.copy() for e in self.exprs ] 
2253          for e in self.exprs: 
2254              e.leaveWhitespace() 
2255          return self 
 2256   
2258          if isinstance( other, Suppress ): 
2259              if other not in self.ignoreExprs: 
2260                  super( ParseExpression, self).ignore( other ) 
2261                  for e in self.exprs: 
2262                      e.ignore( self.ignoreExprs[-1] ) 
2263          else: 
2264              super( ParseExpression, self).ignore( other ) 
2265              for e in self.exprs: 
2266                  e.ignore( self.ignoreExprs[-1] ) 
2267          return self 
 2268   
2270          try: 
2271              return super(ParseExpression,self).__str__() 
2272          except: 
2273              pass 
2274   
2275          if self.strRepr is None: 
2276              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 
2277          return self.strRepr 
 2278   
2280          super(ParseExpression,self).streamline() 
2281   
2282          for e in self.exprs: 
2283              e.streamline() 
2284   
2285           
2286           
2287           
2288          if ( len(self.exprs) == 2 ): 
2289              other = self.exprs[0] 
2290              if ( isinstance( other, self.__class__ ) and 
2291                    not(other.parseAction) and 
2292                    other.resultsName is None and 
2293                    not other.debug ): 
2294                  self.exprs = other.exprs[:] + [ self.exprs[1] ] 
2295                  self.strRepr = None 
2296                  self.mayReturnEmpty |= other.mayReturnEmpty 
2297                  self.mayIndexError  |= other.mayIndexError 
2298   
2299              other = self.exprs[-1] 
2300              if ( isinstance( other, self.__class__ ) and 
2301                    not(other.parseAction) and 
2302                    other.resultsName is None and 
2303                    not other.debug ): 
2304                  self.exprs = self.exprs[:-1] + other.exprs[:] 
2305                  self.strRepr = None 
2306                  self.mayReturnEmpty |= other.mayReturnEmpty 
2307                  self.mayIndexError  |= other.mayIndexError 
2308   
2309          return self 
 2310   
2314   
2315 -    def validate( self, validateTrace=[] ): 
 2316          tmp = validateTrace[:]+[self] 
2317          for e in self.exprs: 
2318              e.validate(tmp) 
2319          self.checkRecursion( [] ) 
 2320           
 2325   
2326 -class And(ParseExpression): 
 2327      """Requires all given C{ParseExpression}s to be found in the given order. 
2328         Expressions may be separated by whitespace. 
2329         May be constructed using the C{'+'} operator. 
2330      """ 
2331   
2337   
2338 -    def __init__( self, exprs, savelist = True ): 
 2339          super(And,self).__init__(exprs, savelist) 
2340          self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 
2341          self.setWhitespaceChars( exprs[0].whiteChars ) 
2342          self.skipWhitespace = exprs[0].skipWhitespace 
2343          self.callPreparse = True 
 2344   
2345 -    def parseImpl( self, instring, loc, doActions=True ): 
 2346           
2347           
2348          loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 
2349          errorStop = False 
2350          for e in self.exprs[1:]: 
2351              if isinstance(e, And._ErrorStop): 
2352                  errorStop = True 
2353                  continue 
2354              if errorStop: 
2355                  try: 
2356                      loc, exprtokens = e._parse( instring, loc, doActions ) 
2357                  except ParseSyntaxException: 
2358                      raise 
2359                  except ParseBaseException as pe: 
2360                      pe.__traceback__ = None 
2361                      raise ParseSyntaxException(pe) 
2362                  except IndexError: 
2363                      raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 
2364              else: 
2365                  loc, exprtokens = e._parse( instring, loc, doActions ) 
2366              if exprtokens or exprtokens.haskeys(): 
2367                  resultlist += exprtokens 
2368          return loc, resultlist 
 2369   
2371          if isinstance( other, basestring ): 
2372              other = Literal( other ) 
2373          return self.append( other )  
 2374   
2376          subRecCheckList = parseElementList[:] + [ self ] 
2377          for e in self.exprs: 
2378              e.checkRecursion( subRecCheckList ) 
2379              if not e.mayReturnEmpty: 
2380                  break 
 2381   
2383          if hasattr(self,"name"): 
2384              return self.name 
2385   
2386          if self.strRepr is None: 
2387              self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 
2388   
2389          return self.strRepr 
  2390   
2391   
2392 -class Or(ParseExpression): 
 2393      """Requires that at least one C{ParseExpression} is found. 
2394         If two expressions match, the expression that matches the longest string will be used. 
2395         May be constructed using the C{'^'} operator. 
2396      """ 
2397 -    def __init__( self, exprs, savelist = False ): 
 2398          super(Or,self).__init__(exprs, savelist) 
2399          if self.exprs: 
2400              self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 
2401          else: 
2402              self.mayReturnEmpty = True 
 2403   
2404 -    def parseImpl( self, instring, loc, doActions=True ): 
 2405          maxExcLoc = -1 
2406          maxMatchLoc = -1 
2407          maxException = None 
2408          for e in self.exprs: 
2409              try: 
2410                  loc2 = e.tryParse( instring, loc ) 
2411              except ParseException as err: 
2412                  err.__traceback__ = None 
2413                  if err.loc > maxExcLoc: 
2414                      maxException = err 
2415                      maxExcLoc = err.loc 
2416              except IndexError: 
2417                  if len(instring) > maxExcLoc: 
2418                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2419                      maxExcLoc = len(instring) 
2420              else: 
2421                  if loc2 > maxMatchLoc: 
2422                      maxMatchLoc = loc2 
2423                      maxMatchExp = e 
2424   
2425          if maxMatchLoc < 0: 
2426              if maxException is not None: 
2427                  raise maxException 
2428              else: 
2429                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
2430   
2431          return maxMatchExp._parse( instring, loc, doActions ) 
 2432   
2434          if isinstance( other, basestring ): 
2435              other = ParserElement.literalStringClass( other ) 
2436          return self.append( other )  
 2437   
2439          if hasattr(self,"name"): 
2440              return self.name 
2441   
2442          if self.strRepr is None: 
2443              self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 
2444   
2445          return self.strRepr 
 2446   
2448          subRecCheckList = parseElementList[:] + [ self ] 
2449          for e in self.exprs: 
2450              e.checkRecursion( subRecCheckList ) 
  2451   
2452   
2454      """Requires that at least one C{ParseExpression} is found. 
2455         If two expressions match, the first one listed is the one that will match. 
2456         May be constructed using the C{'|'} operator. 
2457      """ 
2458 -    def __init__( self, exprs, savelist = False ): 
 2459          super(MatchFirst,self).__init__(exprs, savelist) 
2460          if self.exprs: 
2461              self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 
2462          else: 
2463              self.mayReturnEmpty = True 
 2464   
2465 -    def parseImpl( self, instring, loc, doActions=True ): 
 2466          maxExcLoc = -1 
2467          maxException = None 
2468          for e in self.exprs: 
2469              try: 
2470                  ret = e._parse( instring, loc, doActions ) 
2471                  return ret 
2472              except ParseException as err: 
2473                  if err.loc > maxExcLoc: 
2474                      maxException = err 
2475                      maxExcLoc = err.loc 
2476              except IndexError: 
2477                  if len(instring) > maxExcLoc: 
2478                      maxException = ParseException(instring,len(instring),e.errmsg,self) 
2479                      maxExcLoc = len(instring) 
2480   
2481           
2482          else: 
2483              if maxException is not None: 
2484                  raise maxException 
2485              else: 
2486                  raise ParseException(instring, loc, "no defined alternatives to match", self) 
 2487   
2489          if isinstance( other, basestring ): 
2490              other = ParserElement.literalStringClass( other ) 
2491          return self.append( other )  
 2492   
2494          if hasattr(self,"name"): 
2495              return self.name 
2496   
2497          if self.strRepr is None: 
2498              self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 
2499   
2500          return self.strRepr 
 2501   
2503          subRecCheckList = parseElementList[:] + [ self ] 
2504          for e in self.exprs: 
2505              e.checkRecursion( subRecCheckList ) 
  2506   
2507   
2508 -class Each(ParseExpression): 
 2509      """Requires all given C{ParseExpression}s to be found, but in any order. 
2510         Expressions may be separated by whitespace. 
2511         May be constructed using the C{'&'} operator. 
2512      """ 
2513 -    def __init__( self, exprs, savelist = True ): 
 2514          super(Each,self).__init__(exprs, savelist) 
2515          self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 
2516          self.skipWhitespace = True 
2517          self.initExprGroups = True 
 2518   
2519 -    def parseImpl( self, instring, loc, doActions=True ): 
 2520          if self.initExprGroups: 
2521              opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 
2522              opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 
2523              self.optionals = opt1 + opt2 
2524              self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 
2525              self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 
2526              self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 
2527              self.required += self.multirequired 
2528              self.initExprGroups = False 
2529          tmpLoc = loc 
2530          tmpReqd = self.required[:] 
2531          tmpOpt  = self.optionals[:] 
2532          matchOrder = [] 
2533   
2534          keepMatching = True 
2535          while keepMatching: 
2536              tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 
2537              failed = [] 
2538              for e in tmpExprs: 
2539                  try: 
2540                      tmpLoc = e.tryParse( instring, tmpLoc ) 
2541                  except ParseException: 
2542                      failed.append(e) 
2543                  else: 
2544                      matchOrder.append(e) 
2545                      if e in tmpReqd: 
2546                          tmpReqd.remove(e) 
2547                      elif e in tmpOpt: 
2548                          tmpOpt.remove(e) 
2549              if len(failed) == len(tmpExprs): 
2550                  keepMatching = False 
2551   
2552          if tmpReqd: 
2553              missing = ", ".join(_ustr(e) for e in tmpReqd) 
2554              raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 
2555   
2556           
2557          matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 
2558   
2559          resultlist = [] 
2560          for e in matchOrder: 
2561              loc,results = e._parse(instring,loc,doActions) 
2562              resultlist.append(results) 
2563   
2564          finalResults = ParseResults([]) 
2565          for r in resultlist: 
2566              dups = {} 
2567              for k in r.keys(): 
2568                  if k in finalResults: 
2569                      tmp = ParseResults(finalResults[k]) 
2570                      tmp += ParseResults(r[k]) 
2571                      dups[k] = tmp 
2572              finalResults += ParseResults(r) 
2573              for k,v in dups.items(): 
2574                  finalResults[k] = v 
2575          return loc, finalResults 
 2576   
2578          if hasattr(self,"name"): 
2579              return self.name 
2580   
2581          if self.strRepr is None: 
2582              self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 
2583   
2584          return self.strRepr 
 2585   
2587          subRecCheckList = parseElementList[:] + [ self ] 
2588          for e in self.exprs: 
2589              e.checkRecursion( subRecCheckList ) 
  2590   
2591   
2593      """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.""" 
2594 -    def __init__( self, expr, savelist=False ): 
 2595          super(ParseElementEnhance,self).__init__(savelist) 
2596          if isinstance( expr, basestring ): 
2597              expr = Literal(expr) 
2598          self.expr = expr 
2599          self.strRepr = None 
2600          if expr is not None: 
2601              self.mayIndexError = expr.mayIndexError 
2602              self.mayReturnEmpty = expr.mayReturnEmpty 
2603              self.setWhitespaceChars( expr.whiteChars ) 
2604              self.skipWhitespace = expr.skipWhitespace 
2605              self.saveAsList = expr.saveAsList 
2606              self.callPreparse = expr.callPreparse 
2607              self.ignoreExprs.extend(expr.ignoreExprs) 
 2608   
2609 -    def parseImpl( self, instring, loc, doActions=True ): 
 2610          if self.expr is not None: 
2611              return self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2612          else: 
2613              raise ParseException("",loc,self.errmsg,self) 
 2614   
2616          self.skipWhitespace = False 
2617          self.expr = self.expr.copy() 
2618          if self.expr is not None: 
2619              self.expr.leaveWhitespace() 
2620          return self 
 2621   
2623          if isinstance( other, Suppress ): 
2624              if other not in self.ignoreExprs: 
2625                  super( ParseElementEnhance, self).ignore( other ) 
2626                  if self.expr is not None: 
2627                      self.expr.ignore( self.ignoreExprs[-1] ) 
2628          else: 
2629              super( ParseElementEnhance, self).ignore( other ) 
2630              if self.expr is not None: 
2631                  self.expr.ignore( self.ignoreExprs[-1] ) 
2632          return self 
 2633   
2639   
2641          if self in parseElementList: 
2642              raise RecursiveGrammarException( parseElementList+[self] ) 
2643          subRecCheckList = parseElementList[:] + [ self ] 
2644          if self.expr is not None: 
2645              self.expr.checkRecursion( subRecCheckList ) 
 2646   
2647 -    def validate( self, validateTrace=[] ): 
 2648          tmp = validateTrace[:]+[self] 
2649          if self.expr is not None: 
2650              self.expr.validate(tmp) 
2651          self.checkRecursion( [] ) 
 2652   
2654          try: 
2655              return super(ParseElementEnhance,self).__str__() 
2656          except: 
2657              pass 
2658   
2659          if self.strRepr is None and self.expr is not None: 
2660              self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 
2661          return self.strRepr 
  2662   
2663   
2665      """Lookahead matching of the given parse expression.  C{FollowedBy} 
2666      does *not* advance the parsing position within the input string, it only 
2667      verifies that the specified parse expression matches at the current 
2668      position.  C{FollowedBy} always returns a null token list.""" 
2672   
2673 -    def parseImpl( self, instring, loc, doActions=True ): 
 2674          self.expr.tryParse( instring, loc ) 
2675          return loc, [] 
  2676   
2677   
2678 -class NotAny(ParseElementEnhance): 
 2679      """Lookahead to disallow matching with the given parse expression.  C{NotAny} 
2680      does *not* advance the parsing position within the input string, it only 
2681      verifies that the specified parse expression does *not* match at the current 
2682      position.  Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 
2683      always returns a null token list.  May be constructed using the '~' operator.""" 
2685          super(NotAny,self).__init__(expr) 
2686           
2687          self.skipWhitespace = False   
2688          self.mayReturnEmpty = True 
2689          self.errmsg = "Found unwanted token, "+_ustr(self.expr) 
 2690   
2691 -    def parseImpl( self, instring, loc, doActions=True ): 
 2692          try: 
2693              self.expr.tryParse( instring, loc ) 
2694          except (ParseException,IndexError): 
2695              pass 
2696          else: 
2697              raise ParseException(instring, loc, self.errmsg, self) 
2698          return loc, [] 
 2699   
2701          if hasattr(self,"name"): 
2702              return self.name 
2703   
2704          if self.strRepr is None: 
2705              self.strRepr = "~{" + _ustr(self.expr) + "}" 
2706   
2707          return self.strRepr 
  2708   
2709   
2711      """Optional repetition of zero or more of the given expression.""" 
2715   
2716 -    def parseImpl( self, instring, loc, doActions=True ): 
 2717          tokens = [] 
2718          try: 
2719              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2720              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2721              while 1: 
2722                  if hasIgnoreExprs: 
2723                      preloc = self._skipIgnorables( instring, loc ) 
2724                  else: 
2725                      preloc = loc 
2726                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2727                  if tmptokens or tmptokens.haskeys(): 
2728                      tokens += tmptokens 
2729          except (ParseException,IndexError): 
2730              pass 
2731   
2732          return loc, tokens 
 2733   
2735          if hasattr(self,"name"): 
2736              return self.name 
2737   
2738          if self.strRepr is None: 
2739              self.strRepr = "[" + _ustr(self.expr) + "]..." 
2740   
2741          return self.strRepr 
 2742   
 2747   
2748   
2750      """Repetition of one or more of the given expression.""" 
2751 -    def parseImpl( self, instring, loc, doActions=True ): 
 2752           
2753          loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2754          try: 
2755              hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 
2756              while 1: 
2757                  if hasIgnoreExprs: 
2758                      preloc = self._skipIgnorables( instring, loc ) 
2759                  else: 
2760                      preloc = loc 
2761                  loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 
2762                  if tmptokens or tmptokens.haskeys(): 
2763                      tokens += tmptokens 
2764          except (ParseException,IndexError): 
2765              pass 
2766   
2767          return loc, tokens 
 2768   
2770          if hasattr(self,"name"): 
2771              return self.name 
2772   
2773          if self.strRepr is None: 
2774              self.strRepr = "{" + _ustr(self.expr) + "}..." 
2775   
2776          return self.strRepr 
 2777   
 2782   
2789   
2790  _optionalNotMatched = _NullToken() 
2792      """Optional matching of the given expression. 
2793         A default return string can also be specified, if the optional expression 
2794         is not found. 
2795      """ 
2797          super(Optional,self).__init__( expr, savelist=False ) 
2798          self.defaultValue = default 
2799          self.mayReturnEmpty = True 
 2800   
2801 -    def parseImpl( self, instring, loc, doActions=True ): 
 2802          try: 
2803              loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 
2804          except (ParseException,IndexError): 
2805              if self.defaultValue is not _optionalNotMatched: 
2806                  if self.expr.resultsName: 
2807                      tokens = ParseResults([ self.defaultValue ]) 
2808                      tokens[self.expr.resultsName] = self.defaultValue 
2809                  else: 
2810                      tokens = [ self.defaultValue ] 
2811              else: 
2812                  tokens = [] 
2813          return loc, tokens 
 2814   
2816          if hasattr(self,"name"): 
2817              return self.name 
2818   
2819          if self.strRepr is None: 
2820              self.strRepr = "[" + _ustr(self.expr) + "]" 
2821   
2822          return self.strRepr 
  2823   
2824   
2825 -class SkipTo(ParseElementEnhance): 
 2826      """Token for skipping over all undefined text until the matched expression is found. 
2827         If C{include} is set to true, the matched expression is also parsed (the skipped text 
2828         and matched expression are returned as a 2-element list).  The C{ignore} 
2829         argument is used to define grammars (typically quoted strings and comments) that 
2830         might contain false matches. 
2831      """ 
2832 -    def __init__( self, other, include=False, ignore=None, failOn=None ): 
 2833          super( SkipTo, self ).__init__( other ) 
2834          self.ignoreExpr = ignore 
2835          self.mayReturnEmpty = True 
2836          self.mayIndexError = False 
2837          self.includeMatch = include 
2838          self.asList = False 
2839          if failOn is not None and isinstance(failOn, basestring): 
2840              self.failOn = Literal(failOn) 
2841          else: 
2842              self.failOn = failOn 
2843          self.errmsg = "No match found for "+_ustr(self.expr) 
 2844   
2845 -    def parseImpl( self, instring, loc, doActions=True ): 
 2846          startLoc = loc 
2847          instrlen = len(instring) 
2848          expr = self.expr 
2849          failParse = False 
2850          while loc <= instrlen: 
2851              try: 
2852                  if self.failOn: 
2853                      try: 
2854                          self.failOn.tryParse(instring, loc) 
2855                      except ParseBaseException: 
2856                          pass 
2857                      else: 
2858                          failParse = True 
2859                          raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 
2860                      failParse = False 
2861                  if self.ignoreExpr is not None: 
2862                      while 1: 
2863                          try: 
2864                              loc = self.ignoreExpr.tryParse(instring,loc) 
2865                               
2866                          except ParseBaseException: 
2867                              break 
2868                  expr._parse( instring, loc, doActions=False, callPreParse=False ) 
2869                  skipText = instring[startLoc:loc] 
2870                  if self.includeMatch: 
2871                      loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 
2872                      if mat: 
2873                          skipRes = ParseResults( skipText ) 
2874                          skipRes += mat 
2875                          return loc, [ skipRes ] 
2876                      else: 
2877                          return loc, [ skipText ] 
2878                  else: 
2879                      return loc, [ skipText ] 
2880              except (ParseException,IndexError): 
2881                  if failParse: 
2882                      raise 
2883                  else: 
2884                      loc += 1 
2885          raise ParseException(instring, loc, self.errmsg, self) 
  2886   
2887 -class Forward(ParseElementEnhance): 
 2888      """Forward declaration of an expression to be defined later - 
2889         used for recursive grammars, such as algebraic infix notation. 
2890         When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 
2891   
2892         Note: take care when assigning to C{Forward} not to overlook precedence of operators. 
2893         Specifically, '|' has a lower precedence than '<<', so that:: 
2894            fwdExpr << a | b | c 
2895         will actually be evaluated as:: 
2896            (fwdExpr << a) | b | c 
2897         thereby leaving b and c out as parseable alternatives.  It is recommended that you 
2898         explicitly group the values inserted into the C{Forward}:: 
2899            fwdExpr << (a | b | c) 
2900         Converting to use the '<<=' operator instead will avoid this problem. 
2901      """ 
2904   
2906          if isinstance( other, basestring ): 
2907              other = ParserElement.literalStringClass(other) 
2908          self.expr = other 
2909          self.mayReturnEmpty = other.mayReturnEmpty 
2910          self.strRepr = None 
2911          self.mayIndexError = self.expr.mayIndexError 
2912          self.mayReturnEmpty = self.expr.mayReturnEmpty 
2913          self.setWhitespaceChars( self.expr.whiteChars ) 
2914          self.skipWhitespace = self.expr.skipWhitespace 
2915          self.saveAsList = self.expr.saveAsList 
2916          self.ignoreExprs.extend(self.expr.ignoreExprs) 
2917          return self 
 2918           
2920          return self << other 
 2921       
2923          self.skipWhitespace = False 
2924          return self 
 2925   
2927          if not self.streamlined: 
2928              self.streamlined = True 
2929              if self.expr is not None: 
2930                  self.expr.streamline() 
2931          return self 
 2932   
2933 -    def validate( self, validateTrace=[] ): 
 2934          if self not in validateTrace: 
2935              tmp = validateTrace[:]+[self] 
2936              if self.expr is not None: 
2937                  self.expr.validate(tmp) 
2938          self.checkRecursion([]) 
 2939   
2941          if hasattr(self,"name"): 
2942              return self.name 
2943   
2944          self._revertClass = self.__class__ 
2945          self.__class__ = _ForwardNoRecurse 
2946          try: 
2947              if self.expr is not None: 
2948                  retString = _ustr(self.expr) 
2949              else: 
2950                  retString = "None" 
2951          finally: 
2952              self.__class__ = self._revertClass 
2953          return self.__class__.__name__ + ": " + retString 
 2954   
2956          if self.expr is not None: 
2957              return super(Forward,self).copy() 
2958          else: 
2959              ret = Forward() 
2960              ret <<= self 
2961              return ret 
  2962   
2966   
2968      """Abstract subclass of C{ParseExpression}, for converting parsed results.""" 
2969 -    def __init__( self, expr, savelist=False ): 
  2972   
2973 -class Upcase(TokenConverter): 
 2974      """Converter to upper case all matching tokens.""" 
2976          super(Upcase,self).__init__(*args) 
2977          warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
2978                         DeprecationWarning,stacklevel=2) 
 2979   
2980 -    def postParse( self, instring, loc, tokenlist ): 
 2981          return list(map( str.upper, tokenlist )) 
  2982   
2983   
2985      """Converter to concatenate all matching tokens to a single string. 
2986         By default, the matching patterns must also be contiguous in the input string; 
2987         this can be disabled by specifying C{'adjacent=False'} in the constructor. 
2988      """ 
2989 -    def __init__( self, expr, joinString="", adjacent=True ): 
 2990          super(Combine,self).__init__( expr ) 
2991           
2992          if adjacent: 
2993              self.leaveWhitespace() 
2994          self.adjacent = adjacent 
2995          self.skipWhitespace = True 
2996          self.joinString = joinString 
2997          self.callPreparse = True 
 2998   
3005   
3006 -    def postParse( self, instring, loc, tokenlist ): 
 3007          retToks = tokenlist.copy() 
3008          del retToks[:] 
3009          retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 
3010   
3011          if self.resultsName and retToks.haskeys(): 
3012              return [ retToks ] 
3013          else: 
3014              return retToks 
  3015   
3016 -class Group(TokenConverter): 
 3017      """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.""" 
3019          super(Group,self).__init__( expr ) 
3020          self.saveAsList = True 
 3021   
3022 -    def postParse( self, instring, loc, tokenlist ): 
 3023          return [ tokenlist ] 
  3024   
3025 -class Dict(TokenConverter): 
 3026      """Converter to return a repetitive expression as a list, but also as a dictionary. 
3027         Each element can also be referenced using the first token in the expression as its key. 
3028         Useful for tabular report scraping when the first column can be used as a item key. 
3029      """ 
3031          super(Dict,self).__init__( expr ) 
3032          self.saveAsList = True 
 3033   
3034 -    def postParse( self, instring, loc, tokenlist ): 
 3035          for i,tok in enumerate(tokenlist): 
3036              if len(tok) == 0: 
3037                  continue 
3038              ikey = tok[0] 
3039              if isinstance(ikey,int): 
3040                  ikey = _ustr(tok[0]).strip() 
3041              if len(tok)==1: 
3042                  tokenlist[ikey] = _ParseResultsWithOffset("",i) 
3043              elif len(tok)==2 and not isinstance(tok[1],ParseResults): 
3044                  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 
3045              else: 
3046                  dictvalue = tok.copy()  
3047                  del dictvalue[0] 
3048                  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 
3049                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 
3050                  else: 
3051                      tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 
3052   
3053          if self.resultsName: 
3054              return [ tokenlist ] 
3055          else: 
3056              return tokenlist 
  3057   
3058   
3060      """Converter for ignoring the results of a parsed expression.""" 
3061 -    def postParse( self, instring, loc, tokenlist ): 
 3063   
 3066   
3067   
3069      """Wrapper for parse actions, to ensure they are only called once.""" 
3071          self.callable = _trim_arity(methodCall) 
3072          self.called = False 
 3074          if not self.called: 
3075              results = self.callable(s,l,t) 
3076              self.called = True 
3077              return results 
3078          raise ParseException(s,l,"") 
  3081   
3083      """Decorator for debugging parse actions.""" 
3084      f = _trim_arity(f) 
3085      def z(*paArgs): 
3086          thisFunc = f.func_name 
3087          s,l,t = paArgs[-3:] 
3088          if len(paArgs)>3: 
3089              thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 
3090          sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 
3091          try: 
3092              ret = f(*paArgs) 
3093          except Exception as exc: 
3094              sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 
3095              raise 
3096          sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 
3097          return ret 
 3098      try: 
3099          z.__name__ = f.__name__ 
3100      except AttributeError: 
3101          pass 
3102      return z 
3103   
3104   
3105   
3106   
3108      """Helper to define a delimited list of expressions - the delimiter defaults to ','. 
3109         By default, the list elements and delimiters can have intervening whitespace, and 
3110         comments, but this can be overridden by passing C{combine=True} in the constructor. 
3111         If C{combine} is set to C{True}, the matching tokens are returned as a single token 
3112         string, with the delimiters included; otherwise, the matching tokens are returned 
3113         as a list of tokens, with the delimiters suppressed. 
3114      """ 
3115      dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 
3116      if combine: 
3117          return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 
3118      else: 
3119          return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName) 
 3120   
3122      """Helper to define a counted list of expressions. 
3123         This helper defines a pattern of the form:: 
3124             integer expr expr expr... 
3125         where the leading integer tells how many expr expressions follow. 
3126         The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 
3127      """ 
3128      arrayExpr = Forward() 
3129      def countFieldParseAction(s,l,t): 
3130          n = t[0] 
3131          arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 
3132          return [] 
 3133      if intExpr is None: 
3134          intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 
3135      else: 
3136          intExpr = intExpr.copy() 
3137      intExpr.setName("arrayLen") 
3138      intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 
3139      return ( intExpr + arrayExpr ) 
3140   
3142      ret = [] 
3143      for i in L: 
3144          if isinstance(i,list): 
3145              ret.extend(_flatten(i)) 
3146          else: 
3147              ret.append(i) 
3148      return ret 
 3149   
3151      """Helper to define an expression that is indirectly defined from 
3152         the tokens matched in a previous expression, that is, it looks 
3153         for a 'repeat' of a previous expression.  For example:: 
3154             first = Word(nums) 
3155             second = matchPreviousLiteral(first) 
3156             matchExpr = first + ":" + second 
3157         will match C{"1:1"}, but not C{"1:2"}.  Because this matches a 
3158         previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 
3159         If this is not desired, use C{matchPreviousExpr}. 
3160         Do *not* use with packrat parsing enabled. 
3161      """ 
3162      rep = Forward() 
3163      def copyTokenToRepeater(s,l,t): 
3164          if t: 
3165              if len(t) == 1: 
3166                  rep << t[0] 
3167              else: 
3168                   
3169                  tflat = _flatten(t.asList()) 
3170                  rep << And( [ Literal(tt) for tt in tflat ] ) 
3171          else: 
3172              rep << Empty() 
 3173      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3174      return rep 
3175   
3177      """Helper to define an expression that is indirectly defined from 
3178         the tokens matched in a previous expression, that is, it looks 
3179         for a 'repeat' of a previous expression.  For example:: 
3180             first = Word(nums) 
3181             second = matchPreviousExpr(first) 
3182             matchExpr = first + ":" + second 
3183         will match C{"1:1"}, but not C{"1:2"}.  Because this matches by 
3184         expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 
3185         the expressions are evaluated first, and then compared, so 
3186         C{"1"} is compared with C{"10"}. 
3187         Do *not* use with packrat parsing enabled. 
3188      """ 
3189      rep = Forward() 
3190      e2 = expr.copy() 
3191      rep <<= e2 
3192      def copyTokenToRepeater(s,l,t): 
3193          matchTokens = _flatten(t.asList()) 
3194          def mustMatchTheseTokens(s,l,t): 
3195              theseTokens = _flatten(t.asList()) 
3196              if  theseTokens != matchTokens: 
3197                  raise ParseException("",0,"") 
 3198          rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 
3199      expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 
3200      return rep 
3201   
3203       
3204      for c in r"\^-]": 
3205          s = s.replace(c,_bslash+c) 
3206      s = s.replace("\n",r"\n") 
3207      s = s.replace("\t",r"\t") 
3208      return _ustr(s) 
 3209   
3210 -def oneOf( strs, caseless=False, useRegex=True ): 
 3211      """Helper to quickly define a set of alternative Literals, and makes sure to do 
3212         longest-first testing when there is a conflict, regardless of the input order, 
3213         but returns a C{L{MatchFirst}} for best performance. 
3214   
3215         Parameters: 
3216          - strs - a string of space-delimited literals, or a list of string literals 
3217          - caseless - (default=False) - treat all literals as caseless 
3218          - useRegex - (default=True) - as an optimization, will generate a Regex 
3219            object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 
3220            if creating a C{Regex} raises an exception) 
3221      """ 
3222      if caseless: 
3223          isequal = ( lambda a,b: a.upper() == b.upper() ) 
3224          masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 
3225          parseElementClass = CaselessLiteral 
3226      else: 
3227          isequal = ( lambda a,b: a == b ) 
3228          masks = ( lambda a,b: b.startswith(a) ) 
3229          parseElementClass = Literal 
3230   
3231      if isinstance(strs,basestring): 
3232          symbols = strs.split() 
3233      elif isinstance(strs, collections.Sequence): 
3234          symbols = list(strs[:]) 
3235      elif isinstance(strs, _generatorType): 
3236          symbols = list(strs) 
3237      else: 
3238          warnings.warn("Invalid argument to oneOf, expected string or list", 
3239                  SyntaxWarning, stacklevel=2) 
3240   
3241      i = 0 
3242      while i < len(symbols)-1: 
3243          cur = symbols[i] 
3244          for j,other in enumerate(symbols[i+1:]): 
3245              if ( isequal(other, cur) ): 
3246                  del symbols[i+j+1] 
3247                  break 
3248              elif ( masks(cur, other) ): 
3249                  del symbols[i+j+1] 
3250                  symbols.insert(i,other) 
3251                  cur = other 
3252                  break 
3253          else: 
3254              i += 1 
3255   
3256      if not caseless and useRegex: 
3257           
3258          try: 
3259              if len(symbols)==len("".join(symbols)): 
3260                  return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 
3261              else: 
3262                  return Regex( "|".join(re.escape(sym) for sym in symbols) ) 
3263          except: 
3264              warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 
3265                      SyntaxWarning, stacklevel=2) 
3266   
3267   
3268       
3269      return MatchFirst( [ parseElementClass(sym) for sym in symbols ] ) 
 3270   
3272      """Helper to easily and clearly define a dictionary by specifying the respective patterns 
3273         for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 
3274         in the proper order.  The key pattern can include delimiting markers or punctuation, 
3275         as long as they are suppressed, thereby leaving the significant key text.  The value 
3276         pattern can include named results, so that the C{Dict} results can include named token 
3277         fields. 
3278      """ 
3279      return Dict( ZeroOrMore( Group ( key + value ) ) ) 
 3280   
3281 -def originalTextFor(expr, asString=True): 
 3282      """Helper to return the original, untokenized text for a given expression.  Useful to 
3283         restore the parsed fields of an HTML start tag into the raw tag text itself, or to 
3284         revert separate tokens with intervening whitespace back to the original matching 
3285         input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 
3286         require the inspect module to chase up the call stack.  By default, returns a  
3287         string containing the original parsed text.   
3288          
3289         If the optional C{asString} argument is passed as C{False}, then the return value is a  
3290         C{L{ParseResults}} containing any results names that were originally matched, and a  
3291         single token containing the original matched text from the input string.  So if  
3292         the expression passed to C{L{originalTextFor}} contains expressions with defined 
3293         results names, you must set C{asString} to C{False} if you want to preserve those 
3294         results name values.""" 
3295      locMarker = Empty().setParseAction(lambda s,loc,t: loc) 
3296      endlocMarker = locMarker.copy() 
3297      endlocMarker.callPreparse = False 
3298      matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 
3299      if asString: 
3300          extractText = lambda s,l,t: s[t._original_start:t._original_end] 
3301      else: 
3302          def extractText(s,l,t): 
3303              del t[:] 
3304              t.insert(0, s[t._original_start:t._original_end]) 
3305              del t["_original_start"] 
3306              del t["_original_end"] 
 3307      matchExpr.setParseAction(extractText) 
3308      return matchExpr 
3309   
3311      """Helper to undo pyparsing's default grouping of And expressions, even 
3312         if all but one are non-empty.""" 
3313      return TokenConverter(expr).setParseAction(lambda t:t[0]) 
3314   
3316      """Helper to decorate a returned token with its starting and ending locations in the input string. 
3317         This helper adds the following results names: 
3318          - locn_start = location where matched expression begins 
3319          - locn_end = location where matched expression ends 
3320          - value = the actual parsed results 
3321   
3322         Be careful if the input text contains C{<TAB>} characters, you may want to call 
3323         C{L{ParserElement.parseWithTabs}} 
3324      """ 
3325      locator = Empty().setParseAction(lambda s,l,t: l) 
3326      return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 
 3327   
3328   
3329   
3330  empty       = Empty().setName("empty") 
3331  lineStart   = LineStart().setName("lineStart") 
3332  lineEnd     = LineEnd().setName("lineEnd") 
3333  stringStart = StringStart().setName("stringStart") 
3334  stringEnd   = StringEnd().setName("stringEnd") 
3335   
3336  _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 
3337  _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 
3338  _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 
3339  _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) 
3340  _charRange = Group(_singleChar + Suppress("-") + _singleChar) 
3341  _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 
3342   
3344      r"""Helper to easily define string ranges for use in Word construction.  Borrows 
3345         syntax from regexp '[]' string range definitions:: 
3346            srange("[0-9]")   -> "0123456789" 
3347            srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz" 
3348            srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 
3349         The input string must be enclosed in []'s, and the returned string is the expanded 
3350         character set joined into a single string. 
3351         The values enclosed in the []'s may be:: 
3352            a single character 
3353            an escaped character with a leading backslash (such as \- or \]) 
3354            an escaped hex character with a leading '\x' (\x21, which is a '!' character)  
3355              (\0x## is also supported for backwards compatibility)  
3356            an escaped octal character with a leading '\0' (\041, which is a '!' character) 
3357            a range of any of the above, separated by a dash ('a-z', etc.) 
3358            any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 
3359      """ 
3360      _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 
3361      try: 
3362          return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 
3363      except: 
3364          return "" 
 3365   
3367      """Helper method for defining parse actions that require matching at a specific 
3368         column in the input text. 
3369      """ 
3370      def verifyCol(strg,locn,toks): 
3371          if col(locn,strg) != n: 
3372              raise ParseException(strg,locn,"matched token not at column %d" % n) 
 3373      return verifyCol 
3374   
3376      """Helper method for common parse actions that simply return a literal value.  Especially 
3377         useful when used with C{L{transformString<ParserElement.transformString>}()}. 
3378      """ 
3379      def _replFunc(*args): 
3380          return [replStr] 
 3381      return _replFunc 
3382   
3384      """Helper parse action for removing quotation marks from parsed quoted strings. 
3385         To use, add this parse action to quoted string using:: 
3386           quotedString.setParseAction( removeQuotes ) 
3387      """ 
3388      return t[0][1:-1] 
 3389   
3391      """Helper parse action to convert tokens to upper case.""" 
3392      return [ tt.upper() for tt in map(_ustr,t) ] 
 3393   
3395      """Helper parse action to convert tokens to lower case.""" 
3396      return [ tt.lower() for tt in map(_ustr,t) ] 
 3397   
3398 -def keepOriginalText(s,startLoc,t): 
 3399      """DEPRECATED - use new helper method C{L{originalTextFor}}. 
3400         Helper parse action to preserve original parsed text, 
3401         overriding any nested parse actions.""" 
3402      try: 
3403          endloc = getTokensEndLoc() 
3404      except ParseException: 
3405          raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 
3406      del t[:] 
3407      t += ParseResults(s[startLoc:endloc]) 
3408      return t 
 3409   
3411      """Method to be called from within a parse action to determine the end 
3412         location of the parsed tokens.""" 
3413      import inspect 
3414      fstack = inspect.stack() 
3415      try: 
3416           
3417          for f in fstack[2:]: 
3418              if f[3] == "_parseNoCache": 
3419                  endloc = f[0].f_locals["loc"] 
3420                  return endloc 
3421          else: 
3422              raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 
3423      finally: 
3424          del fstack 
 3425   
3454   
3458   
3462   
3464      """Helper to create a validating parse action to be used with start tags created 
3465         with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 
3466         with a required attribute value, to avoid false matches on common tags such as 
3467         C{<TD>} or C{<DIV>}. 
3468   
3469         Call C{withAttribute} with a series of attribute names and values. Specify the list 
3470         of filter attributes names and values as: 
3471          - keyword arguments, as in C{(align="right")}, or 
3472          - as an explicit dict with C{**} operator, when an attribute name is also a Python 
3473            reserved word, as in C{**{"class":"Customer", "align":"right"}} 
3474          - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 
3475         For attribute names with a namespace prefix, you must use the second form.  Attribute 
3476         names are matched insensitive to upper/lower case. 
3477   
3478         To verify that the attribute exists, but without specifying a value, pass 
3479         C{withAttribute.ANY_VALUE} as the value. 
3480         """ 
3481      if args: 
3482          attrs = args[:] 
3483      else: 
3484          attrs = attrDict.items() 
3485      attrs = [(k,v) for k,v in attrs] 
3486      def pa(s,l,tokens): 
3487          for attrName,attrValue in attrs: 
3488              if attrName not in tokens: 
3489                  raise ParseException(s,l,"no matching attribute " + attrName) 
3490              if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 
3491                  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 
3492                                              (attrName, tokens[attrName], attrValue)) 
 3493      return pa 
3494  withAttribute.ANY_VALUE = object() 
3495   
3496  opAssoc = _Constants() 
3497  opAssoc.LEFT = object() 
3498  opAssoc.RIGHT = object() 
3499   
3501      """Helper method for constructing grammars of expressions made up of 
3502         operators working in a precedence hierarchy.  Operators may be unary or 
3503         binary, left- or right-associative.  Parse actions can also be attached 
3504         to operator expressions. 
3505   
3506         Parameters: 
3507          - baseExpr - expression representing the most basic element for the nested 
3508          - opList - list of tuples, one for each operator precedence level in the 
3509            expression grammar; each tuple is of the form 
3510            (opExpr, numTerms, rightLeftAssoc, parseAction), where: 
3511             - opExpr is the pyparsing expression for the operator; 
3512                may also be a string, which will be converted to a Literal; 
3513                if numTerms is 3, opExpr is a tuple of two expressions, for the 
3514                two operators separating the 3 terms 
3515             - numTerms is the number of terms for this operator (must 
3516                be 1, 2, or 3) 
3517             - rightLeftAssoc is the indicator whether the operator is 
3518                right or left associative, using the pyparsing-defined 
3519                constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 
3520             - parseAction is the parse action to be associated with 
3521                expressions matching this operator expression (the 
3522                parse action tuple member may be omitted) 
3523          - lpar - expression for matching left-parentheses (default=Suppress('(')) 
3524          - rpar - expression for matching right-parentheses (default=Suppress(')')) 
3525      """ 
3526      ret = Forward() 
3527      lastExpr = baseExpr | ( lpar + ret + rpar ) 
3528      for i,operDef in enumerate(opList): 
3529          opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 
3530          if arity == 3: 
3531              if opExpr is None or len(opExpr) != 2: 
3532                  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 
3533              opExpr1, opExpr2 = opExpr 
3534          thisExpr = Forward() 
3535          if rightLeftAssoc == opAssoc.LEFT: 
3536              if arity == 1: 
3537                  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 
3538              elif arity == 2: 
3539                  if opExpr is not None: 
3540                      matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 
3541                  else: 
3542                      matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 
3543              elif arity == 3: 
3544                  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 
3545                              Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 
3546              else: 
3547                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3548          elif rightLeftAssoc == opAssoc.RIGHT: 
3549              if arity == 1: 
3550                   
3551                  if not isinstance(opExpr, Optional): 
3552                      opExpr = Optional(opExpr) 
3553                  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
3554              elif arity == 2: 
3555                  if opExpr is not None: 
3556                      matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 
3557                  else: 
3558                      matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 
3559              elif arity == 3: 
3560                  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 
3561                              Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 
3562              else: 
3563                  raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 
3564          else: 
3565              raise ValueError("operator must indicate right or left associativity") 
3566          if pa: 
3567              matchExpr.setParseAction( pa ) 
3568          thisExpr <<= ( matchExpr | lastExpr ) 
3569          lastExpr = thisExpr 
3570      ret <<= lastExpr 
3571      return ret 
 3572  operatorPrecedence = infixNotation 
3573   
3574  dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 
3575  sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 
3576  quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 
3577  unicodeString = Combine(_L('u') + quotedString.copy()) 
3578   
3580      """Helper method for defining nested lists enclosed in opening and closing 
3581         delimiters ("(" and ")" are the default). 
3582   
3583         Parameters: 
3584          - opener - opening character for a nested list (default="("); can also be a pyparsing expression 
3585          - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 
3586          - content - expression for items within the nested lists (default=None) 
3587          - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 
3588   
3589         If an expression is not provided for the content argument, the nested 
3590         expression will capture all whitespace-delimited content between delimiters 
3591         as a list of separate values. 
3592   
3593         Use the C{ignoreExpr} argument to define expressions that may contain 
3594         opening or closing characters that should not be treated as opening 
3595         or closing characters for nesting, such as quotedString or a comment 
3596         expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 
3597         The default is L{quotedString}, but if no expressions are to be ignored, 
3598         then pass C{None} for this argument. 
3599      """ 
3600      if opener == closer: 
3601          raise ValueError("opening and closing strings cannot be the same") 
3602      if content is None: 
3603          if isinstance(opener,basestring) and isinstance(closer,basestring): 
3604              if len(opener) == 1 and len(closer)==1: 
3605                  if ignoreExpr is not None: 
3606                      content = (Combine(OneOrMore(~ignoreExpr + 
3607                                      CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3608                                  ).setParseAction(lambda t:t[0].strip())) 
3609                  else: 
3610                      content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 
3611                                  ).setParseAction(lambda t:t[0].strip())) 
3612              else: 
3613                  if ignoreExpr is not None: 
3614                      content = (Combine(OneOrMore(~ignoreExpr +  
3615                                      ~Literal(opener) + ~Literal(closer) + 
3616                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3617                                  ).setParseAction(lambda t:t[0].strip())) 
3618                  else: 
3619                      content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 
3620                                      CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 
3621                                  ).setParseAction(lambda t:t[0].strip())) 
3622          else: 
3623              raise ValueError("opening and closing arguments must be strings if no content expression is given") 
3624      ret = Forward() 
3625      if ignoreExpr is not None: 
3626          ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 
3627      else: 
3628          ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) ) 
3629      return ret 
 3630   
3631 -def indentedBlock(blockStatementExpr, indentStack, indent=True): 
 3632      """Helper method for defining space-delimited indentation blocks, such as 
3633         those used to define block statements in Python source code. 
3634   
3635         Parameters: 
3636          - blockStatementExpr - expression defining syntax of statement that 
3637              is repeated within the indented block 
3638          - indentStack - list created by caller to manage indentation stack 
3639              (multiple statementWithIndentedBlock expressions within a single grammar 
3640              should share a common indentStack) 
3641          - indent - boolean indicating whether block must be indented beyond the 
3642              the current level; set to False for block of left-most statements 
3643              (default=True) 
3644   
3645         A valid block must contain at least one C{blockStatement}. 
3646      """ 
3647      def checkPeerIndent(s,l,t): 
3648          if l >= len(s): return 
3649          curCol = col(l,s) 
3650          if curCol != indentStack[-1]: 
3651              if curCol > indentStack[-1]: 
3652                  raise ParseFatalException(s,l,"illegal nesting") 
3653              raise ParseException(s,l,"not a peer entry") 
 3654   
3655      def checkSubIndent(s,l,t): 
3656          curCol = col(l,s) 
3657          if curCol > indentStack[-1]: 
3658              indentStack.append( curCol ) 
3659          else: 
3660              raise ParseException(s,l,"not a subentry") 
3661   
3662      def checkUnindent(s,l,t): 
3663          if l >= len(s): return 
3664          curCol = col(l,s) 
3665          if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 
3666              raise ParseException(s,l,"not an unindent") 
3667          indentStack.pop() 
3668   
3669      NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 
3670      INDENT = Empty() + Empty().setParseAction(checkSubIndent) 
3671      PEER   = Empty().setParseAction(checkPeerIndent) 
3672      UNDENT = Empty().setParseAction(checkUnindent) 
3673      if indent: 
3674          smExpr = Group( Optional(NL) + 
3675               
3676              INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 
3677      else: 
3678          smExpr = Group( Optional(NL) + 
3679              (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 
3680      blockStatementExpr.ignore(_bslash + LineEnd()) 
3681      return smExpr 
3682   
3683  alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 
3684  punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 
3685   
3686  anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 
3687  commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 
3688  _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 
3689  replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 
3690   
3691   
3692  cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 
3693   
3694  htmlComment = Regex(r"<!--[\s\S]*?-->") 
3695  restOfLine = Regex(r".*").leaveWhitespace() 
3696  dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 
3697  cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 
3698   
3699  javaStyleComment = cppStyleComment 
3700  pythonStyleComment = Regex(r"#.*").setName("Python style comment") 
3701  _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 
3702                                    Optional( Word(" \t") + 
3703                                              ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 
3704  commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 
3705   
3706   
3707  if __name__ == "__main__": 
3708   
3709 -    def test( teststring ): 
 3710          try: 
3711              tokens = simpleSQL.parseString( teststring ) 
3712              tokenlist = tokens.asList() 
3713              print (teststring + "->"   + str(tokenlist)) 
3714              print ("tokens = "         + str(tokens)) 
3715              print ("tokens.columns = " + str(tokens.columns)) 
3716              print ("tokens.tables = "  + str(tokens.tables)) 
3717              print (tokens.asXML("SQL",True)) 
3718          except ParseBaseException as err: 
3719              print (teststring + "->") 
3720              print (err.line) 
3721              print (" "*(err.column-1) + "^") 
3722              print (err) 
3723          print() 
 3724   
3725      selectToken    = CaselessLiteral( "select" ) 
3726      fromToken      = CaselessLiteral( "from" ) 
3727   
3728      ident          = Word( alphas, alphanums + "_$" ) 
3729      columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3730      columnNameList = Group( delimitedList( columnName ) ) 
3731      tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 
3732      tableNameList  = Group( delimitedList( tableName ) ) 
3733      simpleSQL      = ( selectToken + \ 
3734                       ( '*' | columnNameList ).setResultsName( "columns" ) + \ 
3735                       fromToken + \ 
3736                       tableNameList.setResultsName( "tables" ) ) 
3737   
3738      test( "SELECT * from XYZZY, ABC" ) 
3739      test( "select * from SYS.XYZZY" ) 
3740      test( "Select A from Sys.dual" ) 
3741      test( "Select AA,BB,CC from Sys.dual" ) 
3742      test( "Select A, B, C from Sys.dual" ) 
3743      test( "Select A, B, C from Sys.dual" ) 
3744      test( "Xelect A, B, C from Sys.dual" ) 
3745      test( "Select A, B, C frox Sys.dual" ) 
3746      test( "Select" ) 
3747      test( "Select ^^^ frox Sys.dual" ) 
3748      test( "Select A, B, C from Sys.dual, Table2   " ) 
3749