CMS 3D CMS Logo

pyparsing.py

Go to the documentation of this file.
00001 # module pyparsing.py
00002 #
00003 # Copyright (c) 2003-2006  Paul T. McGuire
00004 #
00005 # Permission is hereby granted, free of charge, to any person obtaining
00006 # a copy of this software and associated documentation files (the
00007 # "Software"), to deal in the Software without restriction, including
00008 # without limitation the rights to use, copy, modify, merge, publish,
00009 # distribute, sublicense, and/or sell copies of the Software, and to
00010 # permit persons to whom the Software is furnished to do so, subject to
00011 # the following conditions:
00012 #
00013 # The above copyright notice and this permission notice shall be
00014 # included in all copies or substantial portions of the Software.
00015 #
00016 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00019 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
00020 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00021 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00022 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00023 #
00024 #from __future__ import generators
00025 
00026 __doc__ = \
00027 """
00028 pyparsing module - Classes and methods to define and execute parsing grammars
00029 
00030 The pyparsing module is an alternative approach to creating and executing simple grammars, 
00031 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
00032 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
00033 provides a library of classes that you use to construct the grammar directly in Python.
00034 
00035 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
00036 
00037     from pyparsing import Word, alphas
00038     
00039     # define grammar of a greeting
00040     greet = Word( alphas ) + "," + Word( alphas ) + "!" 
00041     
00042     hello = "Hello, World!"
00043     print hello, "->", greet.parseString( hello )
00044 
00045 The program outputs the following::
00046 
00047     Hello, World! -> ['Hello', ',', 'World', '!']
00048 
00049 The Python representation of the grammar is quite readable, owing to the self-explanatory 
00050 class names, and the use of '+', '|' and '^' operators.
00051 
00052 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
00053 object with named attributes.
00054 
00055 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
00056  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
00057  - quoted strings
00058  - embedded comments
00059 """
00060 __version__ = "1.4.3"
00061 __versionTime__ = "1 July 2006 05:32"
00062 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
00063 
00064 import string
00065 import copy,sys
00066 import warnings
00067 import re
00068 import sre_constants
00069 import xml.sax.saxutils 
00070 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
00071 
00072 def _ustr(obj):
00073     """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
00074        str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
00075        then < returns the unicode object | encodes it with the default encoding | ... >.
00076     """
00077     try:
00078         # If this works, then _ustr(obj) has the same behaviour as str(obj), so
00079         # it won't break any existing code.
00080         return str(obj)
00081         
00082     except UnicodeEncodeError, e:
00083         # The Python docs (https://docs.python.org/ref/customization.html#l2h-182)
00084         # state that "The return value must be a string object". However, does a
00085         # unicode object (being a subclass of basestring) count as a "string
00086         # object"?
00087         # If so, then return a unicode object:
00088         return unicode(obj)
00089         # Else encode it... but how? There are many choices... :)
00090         # Replace unprintables with escape codes?
00091         #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
00092         # Replace unprintables with question marks?
00093         #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
00094         # ...
00095 
00096 def _str2dict(strg):
00097     return dict( [(c,0) for c in strg] )
00098 
00099 alphas     = string.lowercase + string.uppercase
00100 nums       = string.digits
00101 hexnums    = nums + "ABCDEFabcdef"
00102 alphanums  = alphas + nums    
00103 
00104 class ParseBaseException(Exception):
00105     """base exception class for all parsing runtime exceptions"""
00106     __slots__ = ( "loc","msg","pstr","parserElement" )
00107     # Performance tuning: we construct a *lot* of these, so keep this
00108     # constructor as small and fast as possible        
00109     def __init__( self, pstr, loc, msg, elem=None ):
00110         self.loc = loc
00111         self.msg = msg
00112         self.pstr = pstr
00113         self.parserElement = elem
00114 
00115     def __getattr__( self, aname ):
00116         """supported attributes by name are:
00117             - lineno - returns the line number of the exception text
00118             - col - returns the column number of the exception text
00119             - line - returns the line containing the exception text
00120         """
00121         if( aname == "lineno" ):
00122             return lineno( self.loc, self.pstr )
00123         elif( aname in ("col", "column") ):
00124             return col( self.loc, self.pstr )
00125         elif( aname == "line" ):
00126             return line( self.loc, self.pstr )
00127         else:
00128             raise AttributeError, aname
00129 
00130     def __str__( self ):
00131         return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
00132     def __repr__( self ):
00133         return _ustr(self)
00134     def markInputline( self, markerString = ">!<" ):
00135         """Extracts the exception line from the input string, and marks 
00136            the location of the exception with a special symbol.
00137         """
00138         line_str = self.line
00139         line_column = self.column - 1
00140         if markerString:
00141             line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
00142         return line_str.strip()
00143 
00144 class ParseException(ParseBaseException):
00145     """exception thrown when parse expressions don't match class"""
00146     """supported attributes by name are:
00147         - lineno - returns the line number of the exception text
00148         - col - returns the column number of the exception text
00149         - line - returns the line containing the exception text
00150     """
00151     pass
00152     
00153 class ParseFatalException(ParseBaseException):
00154     """user-throwable exception thrown when inconsistent parse content
00155        is found; stops all parsing immediately"""
00156     pass
00157     
00158 class RecursiveGrammarException(Exception):
00159     """exception thrown by validate() if the grammar could be improperly recursive"""
00160     def __init__( self, parseElementList ):
00161         self.parseElementTrace = parseElementList
00162     
00163     def __str__( self ):
00164         return "RecursiveGrammarException: %s" % self.parseElementTrace
00165 
00166 class ParseResults(object):
00167     """Structured parse results, to provide multiple means of access to the parsed data:
00168        - as a list (len(results))
00169        - by list index (results[0], results[1], etc.)
00170        - by attribute (results.<resultsName>)
00171        """
00172     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
00173     def __new__(cls, toklist, name=None, asList=True, modal=True ):
00174         if isinstance(toklist, cls):
00175             return toklist
00176         retobj = object.__new__(cls)
00177         retobj.__doinit = True
00178         return retobj
00179         
00180     # Performance tuning: we construct a *lot* of these, so keep this
00181     # constructor as small and fast as possible
00182     def __init__( self, toklist, name=None, asList=True, modal=True ):
00183         if self.__doinit:
00184             self.__doinit = False
00185             self.__name = None
00186             self.__parent = None
00187             self.__modal = modal
00188             if isinstance(toklist, list):
00189                 self.__toklist = toklist[:]
00190             else:
00191                 self.__toklist = [toklist]
00192             self.__tokdict = dict()
00193 
00194         # this line is related to debugging the asXML bug
00195         #~ asList = False
00196         
00197         if name:
00198             if not self.__name:
00199                 self.__modal = self.__modal and modal
00200             if isinstance(name,int):
00201                 name = _ustr(name) # will always return a str, but use _ustr for consistency
00202             self.__name = name
00203             if not toklist in (None,'',[]):
00204                 if isinstance(toklist,basestring): 
00205                     toklist = [ toklist ]
00206                 if asList:
00207                     if isinstance(toklist,ParseResults):
00208                         self[name] = (toklist.copy(),-1)
00209                     else:
00210                         self[name] = (ParseResults(toklist[0]),-1)
00211                     self[name].__name = name
00212                 else:
00213                     try:
00214                         self[name] = toklist[0]
00215                     except (KeyError,TypeError):
00216                         self[name] = toklist
00217 
00218     def __getitem__( self, i ):
00219         if isinstance( i, (int,slice) ):
00220             return self.__toklist[i]
00221         else:
00222             if self.__modal:
00223                 return self.__tokdict[i][-1][0]
00224             else:
00225                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
00226 
00227     def __setitem__( self, k, v ):
00228         if isinstance(v,tuple):
00229             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
00230             sub = v[0]
00231         elif isinstance(k,int):
00232             self.__toklist[k] = v
00233         else:
00234             self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
00235             sub = v
00236         if isinstance(sub,ParseResults):
00237             sub.__parent = self
00238         
00239     def __delitem__( self, i ):
00240         del self.__toklist[i]
00241 
00242     def __contains__( self, k ):
00243         return self.__tokdict.has_key(k)
00244         
00245     def __len__( self ): return len( self.__toklist )
00246     def __iter__( self ): return iter( self.__toklist )
00247     def keys( self ): 
00248         """Returns all named result keys."""
00249         return self.__tokdict.keys()
00250     
00251     def items( self ): 
00252         """Returns all named result keys and values as a list of tuples."""
00253         return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
00254     
00255     def values( self ): 
00256         """Returns all named result values."""
00257         return [ v[-1][0] for v in self.__tokdict.values() ]
00258 
00259     def __getattr__( self, name ):
00260         if name not in self.__slots__:
00261             if self.__tokdict.has_key( name ):
00262                 if self.__modal:
00263                     return self.__tokdict[name][-1][0]
00264                 else:
00265                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
00266             else:
00267                 return ""
00268         return None
00269 
00270     def __iadd__( self, other ):
00271         if other.__tokdict:
00272             offset = len(self.__toklist)
00273             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
00274             otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
00275             for k,v in otherdictitems:
00276                 self[k] = v
00277                 if isinstance(v[0],ParseResults):
00278                     v[0].__parent = self
00279         self.__toklist += other.__toklist
00280         del other
00281         return self
00282        
00283     def __repr__( self ):
00284         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
00285 
00286     def __str__( self ):
00287         out = "["
00288         sep = ""
00289         for i in self.__toklist:
00290             if isinstance(i, ParseResults):
00291                 out += sep + _ustr(i)
00292             else:
00293                 out += sep + repr(i)
00294             sep = ", "
00295         out += "]"
00296         return out
00297 
00298     def _asStringList( self, sep='' ):
00299         out = []
00300         for item in self.__toklist:
00301             if out and sep:
00302                 out.append(sep)
00303             if isinstance( item, ParseResults ):
00304                 out += item._asStringList()
00305             else:
00306                 out.append( _ustr(item) )
00307         return out
00308 
00309     def asList( self ):
00310         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
00311         out = []
00312         for res in self.__toklist:
00313             if isinstance(res,ParseResults):
00314                 out.append( res.asList() )
00315             else:
00316                 out.append( res )
00317         return out
00318 
00319     def asDict( self ):
00320         """Returns the named parse results as dictionary."""
00321         return dict( self.items() )
00322 
00323     def copy( self ):
00324         """Returns a new copy of a ParseResults object."""
00325         ret = ParseResults( self.__toklist )
00326         ret.__tokdict = self.__tokdict.copy()
00327         ret.__parent = self.__parent
00328         ret.__modal = self.__modal
00329         ret.__name = self.__name
00330         return ret
00331         
00332     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
00333         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
00334         nl = "\n"
00335         out = []
00336         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
00337         nextLevelIndent = indent + "  "
00338         
00339         # collapse out indents if formatting is not desired
00340         if not formatted:
00341             indent = ""
00342             nextLevelIndent = ""
00343             nl = ""
00344             
00345         selfTag = None
00346         if doctag is not None:
00347             selfTag = doctag
00348         else:
00349             if self.__name:
00350                 selfTag = self.__name
00351         
00352         if not selfTag:
00353             if namedItemsOnly:
00354                 return ""
00355             else:
00356                 selfTag = "ITEM"
00357         
00358         out += [ nl, indent, "<", selfTag, ">" ]
00359         
00360         worklist = self.__toklist
00361         for i,res in enumerate(worklist):
00362             if isinstance(res,ParseResults):
00363                 if i in namedItems:
00364                     out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
00365                 else:
00366                     out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
00367             else:
00368                 # individual token, see if there is a name for it
00369                 resTag = None
00370                 if i in namedItems:
00371                     resTag = namedItems[i]
00372                 if not resTag:
00373                     if namedItemsOnly:
00374                         continue
00375                     else:
00376                         resTag = "ITEM"
00377                 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
00378                 out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
00379         
00380         out += [ nl, indent, "</", selfTag, ">" ]
00381         return "".join(out)
00382 
00383     def __lookup(self,sub):
00384         for k,vlist in self.__tokdict.items():
00385             for v,loc in vlist:
00386                 if sub is v:
00387                     return k
00388         return None
00389             
00390     def getName(self):
00391         """Returns the results name for this token expression."""
00392         if self.__name:
00393             return self.__name
00394         elif self.__parent:
00395             par = self.__parent
00396             if par:
00397                 return par.__lookup(self)
00398             else:
00399                 return None
00400         elif (len(self) == 1 and 
00401                len(self.__tokdict) == 1 and
00402                self.__tokdict.values()[0][0][1] in (0,-1)):
00403             return self.__tokdict.keys()[0]
00404         else:
00405             return None
00406             
00407     def dump(self,indent='',depth=0):
00408         """Diagnostic method for listing out the contents of a ParseResults.
00409            Accepts an optional indent argument so that this string can be embedded
00410            in a nested display of other data."""
00411         out = []
00412         keys = self.items()
00413         keys.sort()
00414         for k,v in keys:
00415             if out:
00416                 out.append('\n')
00417             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
00418             if isinstance(v,ParseResults):
00419                 if v.keys():
00420                     out.append('\n')
00421                     out.append( v.dump(indent,depth+1) )
00422                     out.append('\n')
00423                 else:
00424                     out.append(str(v))
00425             else:
00426                 out.append(str(v))
00427         out.append('\n')
00428         out.append( indent+str(self.asList()) )
00429         return "".join(out)
00430     
00431 def col (loc,strg):
00432     """Returns current column within a string, counting newlines as line separators.
00433    The first column is number 1.
00434    """
00435     return loc - strg.rfind("\n", 0, loc)
00436 
00437 def lineno(loc,strg):
00438     """Returns current line number within a string, counting newlines as line separators.
00439    The first line is number 1.
00440    """
00441     return strg.count("\n",0,loc) + 1
00442 
00443 def line( loc, strg ):
00444     """Returns the line of text containing loc within a string, counting newlines as line separators.
00445        """
00446     lastCR = strg.rfind("\n", 0, loc)
00447     nextCR = strg.find("\n", loc)
00448     if nextCR > 0:
00449         return strg[lastCR+1:nextCR]
00450     else:
00451         return strg[lastCR+1:]
00452 
00453 def _defaultStartDebugAction( instring, loc, expr ):
00454     print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
00455 
00456 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
00457     print "Matched",expr,"->",toks.asList()
00458     
00459 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
00460     print "Exception raised:", exc
00461 
00462 def nullDebugAction(*args):
00463     """'Do-nothing' debug action, to suppress debugging output during parsing."""
00464     pass
00465 
00466 class ParserElement(object):
00467     """Abstract base level parser element class."""
00468     DEFAULT_WHITE_CHARS = " \n\t\r"
00469     
00470     def setDefaultWhitespaceChars( chars ):
00471         """Overrides the default whitespace chars
00472         """
00473         ParserElement.DEFAULT_WHITE_CHARS = chars
00474     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
00475     
00476     def __init__( self, savelist=False ):
00477         self.parseAction = list()
00478         self.failAction = None
00479         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
00480         self.strRepr = None
00481         self.resultsName = None
00482         self.saveAsList = savelist
00483         self.skipWhitespace = True
00484         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00485         self.copyDefaultWhiteChars = True
00486         self.mayReturnEmpty = False
00487         self.keepTabs = False
00488         self.ignoreExprs = list()
00489         self.debug = False
00490         self.streamlined = False
00491         self.mayIndexError = True
00492         self.errmsg = ""
00493         self.modalResults = True
00494         self.debugActions = ( None, None, None )
00495         self.re = None
00496 
00497     def copy( self ):
00498         """Make a copy of this ParserElement.  Useful for defining different parse actions
00499            for the same parsing pattern, using copies of the original parse element."""
00500         cpy = copy.copy( self )
00501         cpy.parseAction = self.parseAction[:]
00502         cpy.ignoreExprs = self.ignoreExprs[:]
00503         if self.copyDefaultWhiteChars:
00504             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00505         return cpy
00506 
00507     def setName( self, name ):
00508         """Define name for this expression, for use in debugging."""
00509         self.name = name
00510         self.errmsg = "Expected " + self.name
00511         return self
00512 
00513     def setResultsName( self, name, listAllMatches=False ):
00514         """Define name for referencing matching tokens as a nested attribute 
00515            of the returned parse results.
00516            NOTE: this returns a *copy* of the original ParserElement object;
00517            this is so that the client can define a basic element, such as an
00518            integer, and reference it in multiple places with different names.
00519         """
00520         newself = self.copy()
00521         newself.resultsName = name
00522         newself.modalResults = not listAllMatches
00523         return newself
00524 
00525     def normalizeParseActionArgs( f ):
00526         """Internal method used to decorate parse actions that take fewer than 3 arguments,
00527            so that all parse actions can be called as f(s,l,t)."""
00528         STAR_ARGS = 4
00529         try:
00530             if f.func_code.co_flags & STAR_ARGS:
00531                 return f
00532             numargs = f.func_code.co_argcount
00533             if hasattr(f,"im_self"):
00534                 numargs -= 1
00535         except AttributeError:
00536             try:
00537                 # not a function, must be a callable object, get info from the
00538                 # im_func binding of its bound __call__ method
00539                 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
00540                     return f
00541                 numargs = f.__call__.im_func.func_code.co_argcount
00542                 if hasattr(f.__call__,"im_self"):
00543                     numargs -= 1
00544             except AttributeError:
00545                 # not a bound method, get info directly from __call__ method
00546                 if f.__call__.func_code.co_flags & STAR_ARGS:
00547                     return f
00548                 numargs = f.__call__.func_code.co_argcount
00549                 if hasattr(f.__call__,"im_self"):
00550                     numargs -= 1
00551 
00552         #~ print "adding function %s with %d args" % (f.func_name,numargs)
00553         if numargs == 3:
00554             return f
00555         else:
00556             if numargs == 2:
00557                 def tmp(s,l,t):
00558                     return f(l,t)
00559             elif numargs == 1:
00560                 def tmp(s,l,t):
00561                     return f(t)
00562             else: #~ numargs == 0:
00563                 def tmp(s,l,t):
00564                     return f()
00565             return tmp
00566     normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
00567             
00568     def setParseAction( self, *fns ):
00569         """Define action to perform when successfully matching parse element definition.
00570            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
00571            fn(loc,toks), fn(toks), or just fn(), where:
00572             - s   = the original string being parsed
00573             - loc = the location of the matching substring
00574             - toks = a list of the matched tokens, packaged as a ParseResults object
00575            If the functions in fns modify the tokens, they can return them as the return
00576            value from fn, and the modified list of tokens will replace the original.
00577            Otherwise, fn does not need to return any value."""
00578         self.parseAction = [self.normalizeParseActionArgs(f) for f in list(fns)]
00579         return self
00580 
00581     def addParseAction( self, *fns ):
00582         """Add parse action to expression's list of parse actions. See setParseAction_."""
00583         self.parseAction += [self.normalizeParseActionArgs(f) for f in list(fns)]
00584         return self
00585 
00586     def setFailAction( self, fn ):
00587         """Define action to perform if parsing fails at this expression. 
00588            Fail acton fn is a callable function that takes the arguments 
00589            fn(s,loc,expr,err) where:
00590             - s = string being parsed
00591             - loc = location where expression match was attempted and failed
00592             - expr = the parse expression that failed
00593             - err = the exception thrown
00594            The function returns no value.  It may throw ParseFatalException
00595            if it is desired to stop parsing immediately."""
00596         self.failAction = fn
00597         return self
00598         
00599     def skipIgnorables( self, instring, loc ):
00600         exprsFound = True
00601         while exprsFound:
00602             exprsFound = False
00603             for e in self.ignoreExprs:
00604                 try:
00605                     while 1:
00606                         loc,dummy = e._parse( instring, loc )
00607                         exprsFound = True
00608                 except ParseException:
00609                     pass
00610         return loc
00611 
00612     def preParse( self, instring, loc ):
00613         if self.ignoreExprs:
00614             loc = self.skipIgnorables( instring, loc )
00615         
00616         if self.skipWhitespace:
00617             wt = self.whiteChars
00618             instrlen = len(instring)
00619             while loc < instrlen and instring[loc] in wt:
00620                 loc += 1
00621                 
00622         return loc
00623 
00624     def parseImpl( self, instring, loc, doActions=True ):
00625         return loc, []
00626 
00627     def postParse( self, instring, loc, tokenlist ):
00628         return tokenlist
00629 
00630     #~ @profile
00631     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
00632         debugging = ( self.debug ) #and doActions )
00633 
00634         if debugging or self.failAction:
00635             #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
00636             if (self.debugActions[0] ):
00637                 self.debugActions[0]( instring, loc, self )
00638             if callPreParse:
00639                 preloc = self.preParse( instring, loc )
00640             else:
00641                 preloc = loc
00642             tokensStart = loc
00643             try:
00644                 try:
00645                     loc,tokens = self.parseImpl( instring, preloc, doActions )
00646                 except IndexError:
00647                     raise ParseException( instring, len(instring), self.errmsg, self )
00648             except ParseException, err:
00649                 #~ print "Exception raised:", err
00650                 if self.debugActions[2]:
00651                     self.debugActions[2]( instring, tokensStart, self, err )
00652                 if self.failAction:
00653                     self.failAction( instring, tokensStart, self, err )
00654                 raise
00655         else:
00656             if callPreParse:
00657                 preloc = self.preParse( instring, loc )
00658             else:
00659                 preloc = loc
00660             tokensStart = loc
00661             if self.mayIndexError or loc >= len(instring):
00662                 try:
00663                     loc,tokens = self.parseImpl( instring, preloc, doActions )
00664                 except IndexError:
00665                     raise ParseException( instring, len(instring), self.errmsg, self )
00666             else:
00667                 loc,tokens = self.parseImpl( instring, preloc, doActions )
00668         
00669         tokens = self.postParse( instring, loc, tokens )
00670 
00671         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
00672         if self.parseAction and doActions:
00673             if debugging:
00674                 try:
00675                     for fn in self.parseAction:
00676                         tokens = fn( instring, tokensStart, retTokens )
00677                         if tokens is not None:
00678                             retTokens = ParseResults( tokens, 
00679                                                       self.resultsName, 
00680                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
00681                                                       modal=self.modalResults )
00682                 except ParseException, err:
00683                     #~ print "Exception raised in user parse action:", err
00684                     if (self.debugActions[2] ):
00685                         self.debugActions[2]( instring, tokensStart, self, err )
00686                     raise
00687             else:
00688                 for fn in self.parseAction:
00689                     tokens = fn( instring, tokensStart, retTokens )
00690                     if tokens is not None:
00691                         retTokens = ParseResults( tokens, 
00692                                                   self.resultsName, 
00693                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
00694                                                   modal=self.modalResults )
00695 
00696         if debugging:
00697             #~ print "Matched",self,"->",retTokens.asList()
00698             if (self.debugActions[1] ):
00699                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
00700 
00701         return loc, retTokens
00702 
00703     def tryParse( self, instring, loc ):
00704         return self._parse( instring, loc, doActions=False )[0]
00705     
00706     # this method gets repeatedly called during backtracking with the same arguments -
00707     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
00708     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
00709         lookup = (self,instring,loc,callPreParse)
00710         if lookup in ParserElement._exprArgCache:
00711             value = ParserElement._exprArgCache[ lookup ]
00712             if isinstance(value,Exception):
00713                 if isinstance(value,ParseBaseException):
00714                     value.loc = loc
00715                 raise value
00716             return value
00717         else:
00718             try:
00719                 ParserElement._exprArgCache[ lookup ] = \
00720                     value = self._parseNoCache( instring, loc, doActions, callPreParse )
00721                 return value
00722             except ParseBaseException, pe:
00723                 ParserElement._exprArgCache[ lookup ] = pe
00724                 raise
00725 
00726     _parse = _parseNoCache
00727 
00728     # argument cache for optimizing repeated calls when backtracking through recursive expressions
00729     _exprArgCache = {}
00730     def resetCache():
00731         ParserElement._exprArgCache.clear()
00732     resetCache = staticmethod(resetCache)
00733     
00734     _packratEnabled = False
00735     def enablePackrat():
00736         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
00737            Repeated parse attempts at the same string location (which happens 
00738            often in many complex grammars) can immediately return a cached value, 
00739            instead of re-executing parsing/validating code.  Memoizing is done of
00740            both valid results and parsing exceptions.
00741             
00742            This speedup may break existing programs that use parse actions that 
00743            have side-effects.  For this reason, packrat parsing is disabled when
00744            you first import pyparsing.  To activate the packrat feature, your
00745            program must call the class method ParserElement.enablePackrat().  If
00746            your program uses psyco to "compile as you go", you must call 
00747            enablePackrat before calling psyco.full().  If you do not do this,
00748            Python will crash.  For best results, call enablePackrat() immediately
00749            after importing pyparsing.
00750         """
00751         if not ParserElement._packratEnabled:
00752             ParserElement._packratEnabled = True
00753             ParserElement._parse = ParserElement._parseCache
00754     enablePackrat = staticmethod(enablePackrat)
00755 
00756     def parseString( self, instring ):
00757         """Execute the parse expression with the given string.
00758            This is the main interface to the client code, once the complete 
00759            expression has been built.
00760         """
00761         ParserElement.resetCache()
00762         if not self.streamlined:
00763             self.streamline()
00764             #~ self.saveAsList = True
00765         for e in self.ignoreExprs:
00766             e.streamline()
00767         if self.keepTabs:
00768             loc, tokens = self._parse( instring, 0 )
00769         else:
00770             loc, tokens = self._parse( instring.expandtabs(), 0 )
00771         return tokens
00772 
00773     def scanString( self, instring ):
00774         """Scan the input string for expression matches.  Each match will return the matching tokens, start location, and end location."""
00775         if not self.streamlined:
00776             self.streamline()
00777         for e in self.ignoreExprs:
00778             e.streamline()
00779         
00780         if not self.keepTabs:
00781             instring = instring.expandtabs()
00782         instrlen = len(instring)
00783         loc = 0
00784         preparseFn = self.preParse
00785         parseFn = self._parse
00786         ParserElement.resetCache()
00787         while loc <= instrlen:
00788             try:
00789                 preloc = preparseFn( instring, loc )
00790                 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
00791             except ParseException:
00792                 loc += 1
00793             else:
00794                 yield tokens, preloc, nextLoc
00795                 loc = nextLoc
00796         
00797     def transformString( self, instring ):
00798         """Extension to scanString, to modify matching text with modified tokens that may
00799            be returned from a parse action.  To use transformString, define a grammar and 
00800            attach a parse action to it that modifies the returned token list.  
00801            Invoking transformString() on a target string will then scan for matches, 
00802            and replace the matched text patterns according to the logic in the parse 
00803            action.  transformString() returns the resulting transformed string."""
00804         out = []
00805         lastE = 0
00806         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
00807         # keep string locs straight between transformString and scanString
00808         self.keepTabs = True
00809         for t,s,e in self.scanString( instring ):
00810             out.append( instring[lastE:s] )
00811             if t:
00812                 if isinstance(t,ParseResults):
00813                     out += t.asList()
00814                 elif isinstance(t,list):
00815                     out += t
00816                 else:
00817                     out.append(t)
00818             lastE = e
00819         out.append(instring[lastE:])
00820         return "".join(out)
00821 
00822     def searchString( self, instring ):
00823         """Another extension to scanString, simplifying the access to the tokens found
00824            to match the given parse expression.
00825         """
00826         return ParseResults([ t for t,s,e in self.scanString( instring ) ])
00827             
00828     def __add__(self, other ):
00829         """Implementation of + operator - returns And"""
00830         if isinstance( other, basestring ):
00831             other = Literal( other )
00832         if not isinstance( other, ParserElement ):
00833             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00834                     SyntaxWarning, stacklevel=2)
00835         return And( [ self, other ] )
00836 
00837     def __radd__(self, other ):
00838         """Implementation of += operator"""
00839         if isinstance( other, basestring ):
00840             other = Literal( other )
00841         if not isinstance( other, ParserElement ):
00842             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00843                     SyntaxWarning, stacklevel=2)
00844         return other + self
00845 
00846     def __or__(self, other ):
00847         """Implementation of | operator - returns MatchFirst"""
00848         if isinstance( other, basestring ):
00849             other = Literal( other )
00850         if not isinstance( other, ParserElement ):
00851             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00852                     SyntaxWarning, stacklevel=2)
00853         return MatchFirst( [ self, other ] )
00854 
00855     def __ror__(self, other ):
00856         """Implementation of |= operator"""
00857         if isinstance( other, basestring ):
00858             other = Literal( other )
00859         if not isinstance( other, ParserElement ):
00860             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00861                     SyntaxWarning, stacklevel=2)
00862         return other | self
00863 
00864     def __xor__(self, other ):
00865         """Implementation of ^ operator - returns Or"""
00866         if isinstance( other, basestring ):
00867             other = Literal( other )
00868         if not isinstance( other, ParserElement ):
00869             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00870                     SyntaxWarning, stacklevel=2)
00871         return Or( [ self, other ] )
00872 
00873     def __rxor__(self, other ):
00874         """Implementation of ^= operator"""
00875         if isinstance( other, basestring ):
00876             other = Literal( other )
00877         if not isinstance( other, ParserElement ):
00878             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00879                     SyntaxWarning, stacklevel=2)
00880         return other ^ self
00881 
00882     def __and__(self, other ):
00883         """Implementation of & operator - returns Each"""
00884         if isinstance( other, basestring ):
00885             other = Literal( other )
00886         if not isinstance( other, ParserElement ):
00887             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00888                     SyntaxWarning, stacklevel=2)
00889         return Each( [ self, other ] )
00890 
00891     def __rand__(self, other ):
00892         """Implementation of right-& operator"""
00893         if isinstance( other, basestring ):
00894             other = Literal( other )
00895         if not isinstance( other, ParserElement ):
00896             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00897                     SyntaxWarning, stacklevel=2)
00898         return other & self
00899 
00900     def __invert__( self ):
00901         """Implementation of ~ operator - returns NotAny"""
00902         return NotAny( self )
00903 
00904     def suppress( self ):
00905         """Suppresses the output of this ParserElement; useful to keep punctuation from
00906            cluttering up returned output.
00907         """
00908         return Suppress( self )
00909 
00910     def leaveWhitespace( self ):
00911         """Disables the skipping of whitespace before matching the characters in the 
00912            ParserElement's defined pattern.  This is normally only used internally by
00913            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
00914         """
00915         self.skipWhitespace = False
00916         return self
00917 
00918     def setWhitespaceChars( self, chars ):
00919         """Overrides the default whitespace chars
00920         """
00921         self.skipWhitespace = True
00922         self.whiteChars = chars
00923         self.copyDefaultWhiteChars = False
00924         return self
00925         
00926     def parseWithTabs( self ):
00927         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
00928            Must be called before parseString when the input grammar contains elements that 
00929            match <TAB> characters."""
00930         self.keepTabs = True
00931         return self
00932         
00933     def ignore( self, other ):
00934         """Define expression to be ignored (e.g., comments) while doing pattern 
00935            matching; may be called repeatedly, to define multiple comment or other
00936            ignorable patterns.
00937         """
00938         if isinstance( other, Suppress ):
00939             if other not in self.ignoreExprs:
00940                 self.ignoreExprs.append( other )
00941         else:
00942             self.ignoreExprs.append( Suppress( other ) )
00943         return self
00944 
00945     def setDebugActions( self, startAction, successAction, exceptionAction ):
00946         """Enable display of debugging messages while doing pattern matching."""
00947         self.debugActions = (startAction or _defaultStartDebugAction, 
00948                              successAction or _defaultSuccessDebugAction, 
00949                              exceptionAction or _defaultExceptionDebugAction)
00950         self.debug = True
00951         return self
00952 
00953     def setDebug( self, flag=True ):
00954         """Enable display of debugging messages while doing pattern matching."""
00955         if flag:
00956             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
00957         else:
00958             self.debug = False
00959         return self
00960 
00961     def __str__( self ):
00962         return self.name
00963 
00964     def __repr__( self ):
00965         return _ustr(self)
00966         
00967     def streamline( self ):
00968         self.streamlined = True
00969         self.strRepr = None
00970         return self
00971         
00972     def checkRecursion( self, parseElementList ):
00973         pass
00974         
00975     def validate( self, validateTrace=[] ):
00976         """Check defined expressions for valid structure, check for infinite recursive definitions."""
00977         self.checkRecursion( [] )
00978 
00979     def parseFile( self, file_or_filename ):
00980         """Execute the parse expression on the given file or filename.
00981            If a filename is specified (instead of a file object),
00982            the entire file is opened, read, and closed before parsing.
00983         """
00984         try:
00985             file_contents = file_or_filename.read()
00986         except AttributeError:
00987             f = open(file_or_filename, "rb")
00988             file_contents = f.read()
00989             f.close()
00990         return self.parseString(file_contents)
00991 
00992 
00993 class Token(ParserElement):
00994     """Abstract ParserElement subclass, for defining atomic matching patterns."""
00995     def __init__( self ):
00996         super(Token,self).__init__( savelist=False )
00997         self.myException = ParseException("",0,"",self)
00998 
00999     def setName(self, name):
01000         s = super(Token,self).setName(name)
01001         self.errmsg = "Expected " + self.name
01002         s.myException.msg = self.errmsg
01003         return s
01004 
01005 
01006 class Empty(Token):
01007     """An empty token, will always match."""
01008     def __init__( self ):
01009         super(Empty,self).__init__()
01010         self.name = "Empty"
01011         self.mayReturnEmpty = True
01012         self.mayIndexError = False
01013 
01014 
01015 class NoMatch(Token):
01016     """A token that will never match."""
01017     def __init__( self ):
01018         super(NoMatch,self).__init__()
01019         self.name = "NoMatch"
01020         self.mayReturnEmpty = True
01021         self.mayIndexError = False
01022         self.errmsg = "Unmatchable token"
01023         self.myException.msg = self.errmsg
01024         
01025     def parseImpl( self, instring, loc, doActions=True ):
01026         exc = self.myException
01027         exc.loc = loc
01028         exc.pstr = instring
01029         raise exc
01030 
01031 
01032 class Literal(Token):
01033     """Token to exactly match a specified string."""
01034     def __init__( self, matchString ):
01035         super(Literal,self).__init__()
01036         self.match = matchString
01037         self.matchLen = len(matchString)
01038         try:
01039             self.firstMatchChar = matchString[0]
01040         except IndexError:
01041             warnings.warn("null string passed to Literal; use Empty() instead", 
01042                             SyntaxWarning, stacklevel=2)
01043             self.__class__ = Empty
01044         self.name = '"%s"' % self.match
01045         self.errmsg = "Expected " + self.name
01046         self.mayReturnEmpty = False
01047         self.myException.msg = self.errmsg
01048         self.mayIndexError = False
01049 
01050     # Performance tuning: this routine gets called a *lot*
01051     # if this is a single character match string  and the first character matches,
01052     # short-circuit as quickly as possible, and avoid calling startswith
01053     #~ @profile
01054     def parseImpl( self, instring, loc, doActions=True ):
01055         if (instring[loc] == self.firstMatchChar and
01056             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
01057             return loc+self.matchLen, self.match
01058         #~ raise ParseException( instring, loc, self.errmsg )
01059         exc = self.myException
01060         exc.loc = loc
01061         exc.pstr = instring
01062         raise exc
01063 
01064 class Keyword(Token):
01065     """Token to exactly match a specified string as a keyword, that is, it must be 
01066        immediately followed by a non-keyword character.  Compare with Literal::
01067          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
01068          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
01069        Accepts two optional constructor arguments in addition to the keyword string:
01070        identChars is a string of characters that would be valid identifier characters,
01071        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
01072        matching, default is False.
01073     """
01074     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
01075     
01076     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
01077         super(Keyword,self).__init__()
01078         self.match = matchString
01079         self.matchLen = len(matchString)
01080         try:
01081             self.firstMatchChar = matchString[0]
01082         except IndexError:
01083             warnings.warn("null string passed to Keyword; use Empty() instead", 
01084                             SyntaxWarning, stacklevel=2)
01085         self.name = '"%s"' % self.match
01086         self.errmsg = "Expected " + self.name
01087         self.mayReturnEmpty = False
01088         self.myException.msg = self.errmsg
01089         self.mayIndexError = False
01090         self.caseless = caseless
01091         if caseless:
01092             self.caselessmatch = matchString.upper()
01093             identChars = identChars.upper()
01094         self.identChars = _str2dict(identChars)
01095 
01096     def parseImpl( self, instring, loc, doActions=True ):
01097         if self.caseless:
01098             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01099                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
01100                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
01101                 return loc+self.matchLen, self.match
01102         else:
01103             if (instring[loc] == self.firstMatchChar and
01104                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
01105                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
01106                 (loc == 0 or instring[loc-1] not in self.identChars) ):
01107                 return loc+self.matchLen, self.match
01108         #~ raise ParseException( instring, loc, self.errmsg )
01109         exc = self.myException
01110         exc.loc = loc
01111         exc.pstr = instring
01112         raise exc
01113         
01114     def copy(self):
01115         c = super(Keyword,self).copy()
01116         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
01117         return c
01118         
01119     def setDefaultKeywordChars( chars ):
01120         """Overrides the default Keyword chars
01121         """
01122         Keyword.DEFAULT_KEYWORD_CHARS = chars
01123     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)        
01124 
01125 
01126 class CaselessLiteral(Literal):
01127     """Token to match a specified string, ignoring case of letters.
01128        Note: the matched results will always be in the case of the given
01129        match string, NOT the case of the input text.
01130     """
01131     def __init__( self, matchString ):
01132         super(CaselessLiteral,self).__init__( matchString.upper() )
01133         # Preserve the defining literal.
01134         self.returnString = matchString
01135         self.name = "'%s'" % self.returnString
01136         self.errmsg = "Expected " + self.name
01137         self.myException.msg = self.errmsg
01138 
01139     def parseImpl( self, instring, loc, doActions=True ):
01140         if instring[ loc:loc+self.matchLen ].upper() == self.match:
01141             return loc+self.matchLen, self.returnString
01142         #~ raise ParseException( instring, loc, self.errmsg )
01143         exc = self.myException
01144         exc.loc = loc
01145         exc.pstr = instring
01146         raise exc
01147 
01148 class CaselessKeyword(Keyword):
01149     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
01150         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
01151 
01152     def parseImpl( self, instring, loc, doActions=True ):
01153         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01154              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
01155             return loc+self.matchLen, self.match
01156         #~ raise ParseException( instring, loc, self.errmsg )
01157         exc = self.myException
01158         exc.loc = loc
01159         exc.pstr = instring
01160         raise exc
01161 
01162 class Word(Token):
01163     """Token for matching words composed of allowed character sets.
01164        Defined with string containing all allowed initial characters,
01165        an optional string containing allowed body characters (if omitted,
01166        defaults to the initial character set), and an optional minimum,
01167        maximum, and/or exact length.
01168     """
01169     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
01170         super(Word,self).__init__()
01171         self.initCharsOrig = initChars
01172         self.initChars = _str2dict(initChars)
01173         if bodyChars :
01174             self.bodyCharsOrig = bodyChars
01175             self.bodyChars = _str2dict(bodyChars)
01176         else:
01177             self.bodyCharsOrig = initChars
01178             self.bodyChars = _str2dict(initChars)
01179             
01180         self.maxSpecified = max > 0
01181 
01182         self.minLen = min
01183 
01184         if max > 0:
01185             self.maxLen = max
01186         else:
01187             self.maxLen = sys.maxint
01188 
01189         if exact > 0:
01190             self.maxLen = exact
01191             self.minLen = exact
01192 
01193         self.name = _ustr(self)
01194         self.errmsg = "Expected " + self.name
01195         self.myException.msg = self.errmsg
01196         self.mayIndexError = False
01197         
01198         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
01199             if self.bodyCharsOrig == self.initCharsOrig:
01200                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
01201             elif len(self.bodyCharsOrig) == 1:
01202                 self.reString = "%s[%s]*" % \
01203                                       (re.escape(self.initCharsOrig),
01204                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
01205             else:
01206                 self.reString = "[%s][%s]*" % \
01207                                       (_escapeRegexRangeChars(self.initCharsOrig),
01208                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
01209             try:
01210                 self.re = re.compile( self.reString )
01211             except:
01212                 self.re = None
01213         
01214     def parseImpl( self, instring, loc, doActions=True ):
01215         if self.re:
01216             result = self.re.match(instring,loc)
01217             if not result:
01218                 exc = self.myException
01219                 exc.loc = loc
01220                 exc.pstr = instring
01221                 raise exc
01222             
01223             loc = result.end()
01224             return loc,result.group()
01225         
01226         if not(instring[ loc ] in self.initChars):
01227             #~ raise ParseException( instring, loc, self.errmsg )
01228             exc = self.myException
01229             exc.loc = loc
01230             exc.pstr = instring
01231             raise exc
01232         start = loc
01233         loc += 1
01234         instrlen = len(instring)
01235         bodychars = self.bodyChars
01236         maxloc = start + self.maxLen
01237         maxloc = min( maxloc, instrlen )
01238         while loc < maxloc and instring[loc] in bodychars:
01239             loc += 1
01240             
01241         throwException = False
01242         if loc - start < self.minLen:
01243             throwException = True
01244         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
01245             throwException = True
01246 
01247         if throwException:
01248             #~ raise ParseException( instring, loc, self.errmsg )
01249             exc = self.myException
01250             exc.loc = loc
01251             exc.pstr = instring
01252             raise exc
01253 
01254         return loc, instring[start:loc]
01255 
01256     def __str__( self ):
01257         try:
01258             return super(Word,self).__str__()
01259         except:
01260             pass
01261 
01262             
01263         if self.strRepr is None:
01264             
01265             def charsAsStr(s):
01266                 if len(s)>4:
01267                     return s[:4]+"..."
01268                 else:
01269                     return s
01270             
01271             if ( self.initCharsOrig != self.bodyCharsOrig ):
01272                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
01273             else:
01274                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
01275 
01276         return self.strRepr
01277 
01278 
01279 class Regex(Token):
01280     """Token for matching strings that match a given regular expression.
01281        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
01282     """
01283     def __init__( self, pattern, flags=0):
01284         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
01285         super(Regex,self).__init__()
01286         
01287         if len(pattern) == 0:
01288             warnings.warn("null string passed to Regex; use Empty() instead", 
01289                     SyntaxWarning, stacklevel=2)
01290     
01291         self.pattern = pattern
01292         self.flags = flags
01293         
01294         try:
01295             self.re = re.compile(self.pattern, self.flags)
01296             self.reString = self.pattern
01297         except sre_constants.error,e:
01298             warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
01299                 SyntaxWarning, stacklevel=2)
01300             raise
01301 
01302         self.name = _ustr(self)
01303         self.errmsg = "Expected " + self.name
01304         self.myException.msg = self.errmsg
01305         self.mayIndexError = False
01306         self.mayReturnEmpty = True
01307     
01308     def parseImpl( self, instring, loc, doActions=True ):
01309         result = self.re.match(instring,loc)
01310         if not result:
01311             exc = self.myException
01312             exc.loc = loc
01313             exc.pstr = instring
01314             raise exc
01315         
01316         loc = result.end()
01317         d = result.groupdict()
01318         ret = ParseResults(result.group())
01319         if d:
01320             for k in d.keys():
01321                 ret[k] = d[k]
01322         return loc,ret
01323     
01324     def __str__( self ):
01325         try:
01326             return super(Regex,self).__str__()
01327         except:
01328             pass
01329         
01330         if self.strRepr is None:
01331             self.strRepr = "Re:(%s)" % repr(self.pattern)
01332         
01333         return self.strRepr
01334 
01335 
01336 class QuotedString(Token):
01337     """Token for matching strings that are delimited by quoting characters.
01338     """
01339     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
01340         """
01341            Defined with the following parameters:
01342            - quoteChar - string of one or more characters defining the quote delimiting string
01343            - escChar - character to escape quotes, typically backslash (default=None)
01344            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
01345            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
01346            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
01347            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
01348         """
01349         super(QuotedString,self).__init__()
01350         
01351         # remove white space from quote chars - wont work anyway
01352         quoteChar = quoteChar.strip()
01353         if len(quoteChar) == 0:
01354             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01355             raise SyntaxError()
01356         
01357         if endQuoteChar is None:
01358             endQuoteChar = quoteChar
01359         else:
01360             endQuoteChar = endQuoteChar.strip()
01361             if len(endQuoteChar) == 0:
01362                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01363                 raise SyntaxError()
01364         
01365         self.quoteChar = quoteChar
01366         self.quoteCharLen = len(quoteChar)
01367         self.firstQuoteChar = quoteChar[0]
01368         self.endQuoteChar = endQuoteChar
01369         self.endQuoteCharLen = len(endQuoteChar)
01370         self.escChar = escChar
01371         self.escQuote = escQuote
01372         self.unquoteResults = unquoteResults
01373         
01374         if multiline:
01375             self.flags = re.MULTILINE | re.DOTALL
01376             self.pattern = r'%s([^%s%s]' % \
01377                 ( re.escape(self.quoteChar),
01378                   _escapeRegexRangeChars(self.endQuoteChar[0]),
01379                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01380         else:
01381             self.flags = 0
01382             self.pattern = r'%s([^%s\n\r%s]' % \
01383                 ( re.escape(self.quoteChar),
01384                   _escapeRegexRangeChars(self.endQuoteChar[0]),
01385                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01386         if len(self.endQuoteChar) > 1:
01387             self.pattern += (
01388                 '|(' + ')|('.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
01389                                                _escapeRegexRangeChars(self.endQuoteChar[i])) 
01390                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
01391                 )
01392         if escQuote:
01393             self.pattern += (r'|(%s)' % re.escape(escQuote))
01394         if escChar:
01395             self.pattern += (r'|(%s.)' % re.escape(escChar))
01396             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
01397         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
01398         
01399         try:
01400             self.re = re.compile(self.pattern, self.flags)
01401             self.reString = self.pattern
01402         except sre_constants.error,e:
01403             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
01404                 SyntaxWarning, stacklevel=2)
01405             raise
01406 
01407         self.name = _ustr(self)
01408         self.errmsg = "Expected " + self.name
01409         self.myException.msg = self.errmsg
01410         self.mayIndexError = False
01411         self.mayReturnEmpty = True
01412     
01413     def parseImpl( self, instring, loc, doActions=True ):
01414         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
01415         if not result:
01416             exc = self.myException
01417             exc.loc = loc
01418             exc.pstr = instring
01419             raise exc
01420         
01421         loc = result.end()
01422         ret = result.group()
01423         
01424         if self.unquoteResults:
01425             
01426             # strip off quotes
01427             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
01428                 
01429             if isinstance(ret,basestring):
01430                 # replace escaped characters
01431                 if self.escChar:
01432                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
01433 
01434                 # replace escaped quotes
01435                 if self.escQuote:
01436                     ret = ret.replace(self.escQuote, self.endQuoteChar)
01437 
01438         return loc, ret
01439     
01440     def __str__( self ):
01441         try:
01442             return super(QuotedString,self).__str__()
01443         except:
01444             pass
01445         
01446         if self.strRepr is None:
01447             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
01448         
01449         return self.strRepr
01450 
01451 
01452 class CharsNotIn(Token):
01453     """Token for matching words composed of characters *not* in a given set.
01454        Defined with string containing all disallowed characters, and an optional 
01455        minimum, maximum, and/or exact length.
01456     """
01457     def __init__( self, notChars, min=1, max=0, exact=0 ):
01458         super(CharsNotIn,self).__init__()
01459         self.skipWhitespace = False
01460         self.notChars = notChars
01461         
01462         self.minLen = min
01463 
01464         if max > 0:
01465             self.maxLen = max
01466         else:
01467             self.maxLen = sys.maxint
01468 
01469         if exact > 0:
01470             self.maxLen = exact
01471             self.minLen = exact
01472         
01473         self.name = _ustr(self)
01474         self.errmsg = "Expected " + self.name
01475         self.mayReturnEmpty = ( self.minLen == 0 )
01476         self.myException.msg = self.errmsg
01477         self.mayIndexError = False
01478 
01479     def parseImpl( self, instring, loc, doActions=True ):
01480         if instring[loc] in self.notChars:
01481             #~ raise ParseException( instring, loc, self.errmsg )
01482             exc = self.myException
01483             exc.loc = loc
01484             exc.pstr = instring
01485             raise exc
01486             
01487         start = loc
01488         loc += 1
01489         notchars = self.notChars
01490         maxlen = min( start+self.maxLen, len(instring) )
01491         while loc < maxlen and \
01492               (instring[loc] not in notchars):
01493             loc += 1
01494 
01495         if loc - start < self.minLen:
01496             #~ raise ParseException( instring, loc, self.errmsg )
01497             exc = self.myException
01498             exc.loc = loc
01499             exc.pstr = instring
01500             raise exc
01501 
01502         return loc, instring[start:loc]
01503 
01504     def __str__( self ):
01505         try:
01506             return super(CharsNotIn, self).__str__()
01507         except:
01508             pass
01509 
01510         if self.strRepr is None:
01511             if len(self.notChars) > 4:
01512                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
01513             else:
01514                 self.strRepr = "!W:(%s)" % self.notChars
01515         
01516         return self.strRepr
01517 
01518 class White(Token):
01519     """Special matching class for matching whitespace.  Normally, whitespace is ignored
01520        by pyparsing grammars.  This class is included when some whitespace structures
01521        are significant.  Define with a string containing the whitespace characters to be
01522        matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
01523        as defined for the Word class."""
01524     whiteStrs = {
01525         " " : "<SPC>",
01526         "\t": "<TAB>",
01527         "\n": "<LF>",
01528         "\r": "<CR>",
01529         "\f": "<FF>",
01530         }
01531     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
01532         super(White,self).__init__()
01533         self.matchWhite = ws
01534         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
01535         #~ self.leaveWhitespace()
01536         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
01537         self.mayReturnEmpty = True
01538         self.errmsg = "Expected " + self.name
01539         self.myException.msg = self.errmsg
01540 
01541         self.minLen = min
01542 
01543         if max > 0:
01544             self.maxLen = max
01545         else:
01546             self.maxLen = sys.maxint
01547 
01548         if exact > 0:
01549             self.maxLen = exact
01550             self.minLen = exact
01551             
01552     def parseImpl( self, instring, loc, doActions=True ):
01553         if not(instring[ loc ] in self.matchWhite):
01554             #~ raise ParseException( instring, loc, self.errmsg )
01555             exc = self.myException
01556             exc.loc = loc
01557             exc.pstr = instring
01558             raise exc
01559         start = loc
01560         loc += 1
01561         maxloc = start + self.maxLen
01562         maxloc = min( maxloc, len(instring) )
01563         while loc < maxloc and instring[loc] in self.matchWhite:
01564             loc += 1
01565 
01566         if loc - start < self.minLen:
01567             #~ raise ParseException( instring, loc, self.errmsg )
01568             exc = self.myException
01569             exc.loc = loc
01570             exc.pstr = instring
01571             raise exc
01572 
01573         return loc, instring[start:loc]
01574 
01575 
01576 class PositionToken(Token):
01577     def __init__( self ):
01578         super(PositionToken,self).__init__()
01579         self.name=self.__class__.__name__
01580         self.mayReturnEmpty = True
01581         self.mayIndexError = False
01582 
01583 class GoToColumn(PositionToken):
01584     """Token to advance to a specific column of input text; useful for tabular report scraping."""
01585     def __init__( self, colno ):
01586         super(GoToColumn,self).__init__()
01587         self.col = colno
01588 
01589     def preParse( self, instring, loc ):
01590         if col(loc,instring) != self.col:
01591             instrlen = len(instring)
01592             if self.ignoreExprs:
01593                 loc = self.skipIgnorables( instring, loc )
01594             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
01595                 loc += 1
01596         return loc
01597 
01598     def parseImpl( self, instring, loc, doActions=True ):
01599         thiscol = col( loc, instring )
01600         if thiscol > self.col:
01601             raise ParseException( instring, loc, "Text not in expected column", self )
01602         newloc = loc + self.col - thiscol
01603         ret = instring[ loc: newloc ]
01604         return newloc, ret
01605 
01606 class LineStart(PositionToken):
01607     """Matches if current position is at the beginning of a line within the parse string"""
01608     def __init__( self ):
01609         super(LineStart,self).__init__()
01610         self.setWhitespaceChars( " \t" )
01611         self.errmsg = "Expected start of line"
01612         self.myException.msg = self.errmsg
01613 
01614     def preParse( self, instring, loc ):
01615         preloc = super(LineStart,self).preParse(instring,loc)
01616         if instring[preloc] == "\n":
01617             loc += 1
01618         return loc
01619 
01620     def parseImpl( self, instring, loc, doActions=True ):
01621         if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1:
01622             #~ raise ParseException( instring, loc, "Expected start of line" )
01623             exc = self.myException
01624             exc.loc = loc
01625             exc.pstr = instring
01626             raise exc
01627         return loc, []
01628 
01629 class LineEnd(PositionToken):
01630     """Matches if current position is at the end of a line within the parse string"""
01631     def __init__( self ):
01632         super(LineEnd,self).__init__()
01633         self.setWhitespaceChars( " \t" )
01634         self.errmsg = "Expected end of line"
01635         self.myException.msg = self.errmsg
01636     
01637     def parseImpl( self, instring, loc, doActions=True ):
01638         if loc<len(instring):
01639             if instring[loc] == "\n":
01640                 return loc+1, "\n"
01641             else:
01642                 #~ raise ParseException( instring, loc, "Expected end of line" )
01643                 exc = self.myException
01644                 exc.loc = loc
01645                 exc.pstr = instring
01646                 raise exc
01647         elif loc == len(instring):
01648             return loc+1, []
01649         else:
01650             exc = self.myException
01651             exc.loc = loc
01652             exc.pstr = instring
01653             raise exc
01654 
01655 class StringStart(PositionToken):
01656     """Matches if current position is at the beginning of the parse string"""
01657     def __init__( self ):
01658         super(StringStart,self).__init__()
01659         self.errmsg = "Expected start of text"
01660         self.myException.msg = self.errmsg
01661     
01662     def parseImpl( self, instring, loc, doActions=True ):
01663         if loc != 0:
01664             # see if entire string up to here is just whitespace and ignoreables
01665             if loc != self.preParse( instring, 0 ):
01666                 #~ raise ParseException( instring, loc, "Expected start of text" )
01667                 exc = self.myException
01668                 exc.loc = loc
01669                 exc.pstr = instring
01670                 raise exc
01671         return loc, []
01672 
01673 class StringEnd(PositionToken):
01674     """Matches if current position is at the end of the parse string"""
01675     def __init__( self ):
01676         super(StringEnd,self).__init__()
01677         self.errmsg = "Expected end of text"
01678         self.myException.msg = self.errmsg
01679     
01680     def parseImpl( self, instring, loc, doActions=True ):
01681         if loc < len(instring):
01682             #~ raise ParseException( instring, loc, "Expected end of text" )
01683             exc = self.myException
01684             exc.loc = loc
01685             exc.pstr = instring
01686             raise exc
01687         elif loc == len(instring):
01688             return loc+1, []
01689         else:
01690             exc = self.myException
01691             exc.loc = loc
01692             exc.pstr = instring
01693             raise exc
01694 
01695 
01696 class ParseExpression(ParserElement):
01697     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
01698     def __init__( self, exprs, savelist = False ):
01699         super(ParseExpression,self).__init__(savelist)
01700         if isinstance( exprs, list ):
01701             self.exprs = exprs
01702         elif isinstance( exprs, basestring ):
01703             self.exprs = [ Literal( exprs ) ]
01704         else:
01705             self.exprs = [ exprs ]
01706 
01707     def __getitem__( self, i ):
01708         return self.exprs[i]
01709 
01710     def append( self, other ):
01711         self.exprs.append( other )
01712         self.strRepr = None
01713         return self
01714 
01715     def leaveWhitespace( self ):
01716         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
01717            all contained expressions."""
01718         self.skipWhitespace = False
01719         self.exprs = [ copy.copy(e) for e in self.exprs ]
01720         for e in self.exprs:
01721             e.leaveWhitespace()
01722         return self
01723 
01724     def ignore( self, other ):
01725         if isinstance( other, Suppress ):
01726             if other not in self.ignoreExprs:
01727                 super( ParseExpression, self).ignore( other )
01728                 for e in self.exprs:
01729                     e.ignore( self.ignoreExprs[-1] )
01730         else:
01731             super( ParseExpression, self).ignore( other )
01732             for e in self.exprs:
01733                 e.ignore( self.ignoreExprs[-1] )
01734         return self
01735 
01736     def __str__( self ):
01737         try:
01738             return super(ParseExpression,self).__str__()
01739         except:
01740             pass
01741             
01742         if self.strRepr is None:
01743             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
01744         return self.strRepr
01745 
01746     def streamline( self ):
01747         super(ParseExpression,self).streamline()
01748 
01749         for e in self.exprs:
01750             e.streamline()
01751 
01752         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
01753         # but only if there are no parse actions or resultsNames on the nested And's
01754         # (likewise for Or's and MatchFirst's)
01755         if ( len(self.exprs) == 2 ):
01756             other = self.exprs[0]
01757             if ( isinstance( other, self.__class__ ) and
01758                   not(other.parseAction) and
01759                   other.resultsName is None and
01760                   not other.debug ):
01761                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
01762                 self.strRepr = None
01763                 self.mayReturnEmpty |= other.mayReturnEmpty
01764                 self.mayIndexError  |= other.mayIndexError
01765 
01766             other = self.exprs[-1]
01767             if ( isinstance( other, self.__class__ ) and
01768                   not(other.parseAction) and
01769                   other.resultsName is None and
01770                   not other.debug ):
01771                 self.exprs = self.exprs[:-1] + other.exprs[:]
01772                 self.strRepr = None
01773                 self.mayReturnEmpty |= other.mayReturnEmpty
01774                 self.mayIndexError  |= other.mayIndexError
01775 
01776         return self
01777 
01778     def setResultsName( self, name, listAllMatches=False ):
01779         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
01780         #~ ret.saveAsList = True
01781         return ret
01782     
01783     def validate( self, validateTrace=[] ):
01784         tmp = validateTrace[:]+[self]
01785         for e in self.exprs:
01786             e.validate(tmp)
01787         self.checkRecursion( [] )
01788 
01789     #~ def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
01790         #~ if self.parseAction and doActions:
01791             #~ return self._parseNoCache( instring, loc, doActions, callPreParse )
01792         #~ return super(ParseExpression,self)._parseCache( instring, loc, doActions, callPreParse )
01793 
01794 class And(ParseExpression):
01795     """Requires all given ParseExpressions to be found in the given order.
01796        Expressions may be separated by whitespace.
01797        May be constructed using the '+' operator.
01798     """
01799     def __init__( self, exprs, savelist = True ):
01800         super(And,self).__init__(exprs, savelist)
01801         self.mayReturnEmpty = True
01802         for e in self.exprs:
01803             if not e.mayReturnEmpty:
01804                 self.mayReturnEmpty = False
01805                 break
01806         self.skipWhitespace = exprs[0].skipWhitespace
01807         self.setWhitespaceChars( exprs[0].whiteChars )
01808 
01809     def parseImpl( self, instring, loc, doActions=True ):
01810         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions )
01811         for e in self.exprs[1:]:
01812             loc, exprtokens = e._parse( instring, loc, doActions )
01813             if exprtokens or exprtokens.keys():
01814                 resultlist += exprtokens
01815         return loc, resultlist
01816 
01817     def __iadd__(self, other ):
01818         if isinstance( other, basestring ):
01819             other = Literal( other )
01820         return self.append( other ) #And( [ self, other ] )
01821         
01822     def checkRecursion( self, parseElementList ):
01823         subRecCheckList = parseElementList[:] + [ self ]
01824         for e in self.exprs:
01825             e.checkRecursion( subRecCheckList )
01826             if not e.mayReturnEmpty:
01827                 break
01828                 
01829     def __str__( self ):
01830         if hasattr(self,"name"):
01831             return self.name
01832             
01833         if self.strRepr is None:
01834             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01835         
01836         return self.strRepr
01837     
01838 
01839 class Or(ParseExpression):
01840     """Requires that at least one ParseExpression is found.
01841        If two expressions match, the expression that matches the longest string will be used.
01842        May be constructed using the '^' operator.
01843     """
01844     def __init__( self, exprs, savelist = False ):
01845         super(Or,self).__init__(exprs, savelist)
01846         self.mayReturnEmpty = False
01847         for e in self.exprs:
01848             if e.mayReturnEmpty:
01849                 self.mayReturnEmpty = True
01850                 break
01851     
01852     def parseImpl( self, instring, loc, doActions=True ):
01853         maxExcLoc = -1
01854         maxMatchLoc = -1
01855         for e in self.exprs:
01856             try:
01857                 loc2 = e.tryParse( instring, loc )
01858             except ParseException, err:
01859                 if err.loc > maxExcLoc:
01860                     maxException = err
01861                     maxExcLoc = err.loc
01862             except IndexError, err:
01863                 if len(instring) > maxExcLoc:
01864                     maxException = ParseException(instring,len(instring),e.errmsg,self)
01865                     maxExcLoc = len(instring)
01866             else:
01867                 if loc2 > maxMatchLoc:
01868                     maxMatchLoc = loc2
01869                     maxMatchExp = e
01870         
01871         if maxMatchLoc < 0:
01872             if self.exprs:
01873                 raise maxException
01874             else:
01875                 raise ParseException(instring, loc, "no defined alternatives to match", self)
01876 
01877         return maxMatchExp._parse( instring, loc, doActions )
01878 
01879     def __ixor__(self, other ):
01880         if isinstance( other, basestring ):
01881             other = Literal( other )
01882         return self.append( other ) #Or( [ self, other ] )
01883 
01884     def __str__( self ):
01885         if hasattr(self,"name"):
01886             return self.name
01887             
01888         if self.strRepr is None:
01889             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01890         
01891         return self.strRepr
01892     
01893     def checkRecursion( self, parseElementList ):
01894         subRecCheckList = parseElementList[:] + [ self ]
01895         for e in self.exprs:
01896             e.checkRecursion( subRecCheckList )
01897 
01898 
01899 class MatchFirst(ParseExpression):
01900     """Requires that at least one ParseExpression is found.
01901        If two expressions match, the first one listed is the one that will match.
01902        May be constructed using the '|' operator.
01903     """
01904     def __init__( self, exprs, savelist = False ):
01905         super(MatchFirst,self).__init__(exprs, savelist)
01906         if exprs:
01907             self.mayReturnEmpty = False
01908             for e in self.exprs:
01909                 if e.mayReturnEmpty:
01910                     self.mayReturnEmpty = True
01911                     break
01912         else:
01913             self.mayReturnEmpty = True
01914     
01915     def parseImpl( self, instring, loc, doActions=True ):
01916         maxExcLoc = -1
01917         for e in self.exprs:
01918             try:
01919                 ret = e._parse( instring, loc, doActions )
01920                 return ret
01921             except ParseException, err:
01922                 if err.loc > maxExcLoc:
01923                     maxException = err
01924                     maxExcLoc = err.loc
01925             except IndexError, err:
01926                 if len(instring) > maxExcLoc:
01927                     maxException = ParseException(instring,len(instring),e.errmsg,self)
01928                     maxExcLoc = len(instring)
01929 
01930         # only got here if no expression matched, raise exception for match that made it the furthest
01931         else:
01932             if self.exprs:
01933                 raise maxException
01934             else:
01935                 raise ParseException(instring, loc, "no defined alternatives to match", self)
01936 
01937     def __ior__(self, other ):
01938         if isinstance( other, basestring ):
01939             other = Literal( other )
01940         return self.append( other ) #MatchFirst( [ self, other ] )
01941 
01942     def __str__( self ):
01943         if hasattr(self,"name"):
01944             return self.name
01945             
01946         if self.strRepr is None:
01947             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01948         
01949         return self.strRepr
01950     
01951     def checkRecursion( self, parseElementList ):
01952         subRecCheckList = parseElementList[:] + [ self ]
01953         for e in self.exprs:
01954             e.checkRecursion( subRecCheckList )
01955 
01956 class Each(ParseExpression):
01957     """Requires all given ParseExpressions to be found, but in any order.
01958        Expressions may be separated by whitespace.
01959        May be constructed using the '&' operator.
01960     """
01961     def __init__( self, exprs, savelist = True ):
01962         super(Each,self).__init__(exprs, savelist)
01963         self.mayReturnEmpty = True
01964         for e in self.exprs:
01965             if not e.mayReturnEmpty:
01966                 self.mayReturnEmpty = False
01967                 break
01968         self.skipWhitespace = True
01969         self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
01970         self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
01971         self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
01972         self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
01973         self.required += self.multirequired
01974 
01975     def parseImpl( self, instring, loc, doActions=True ):
01976         tmpLoc = loc
01977         tmpReqd = self.required[:]
01978         tmpOpt  = self.optionals[:]
01979         matchOrder = []
01980 
01981         keepMatching = True
01982         while keepMatching:
01983             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
01984             failed = []
01985             for e in tmpExprs:
01986                 try:
01987                     tmpLoc = e.tryParse( instring, tmpLoc )
01988                 except ParseException:
01989                     failed.append(e)
01990                 else:
01991                     matchOrder.append(e)
01992                     if e in tmpReqd:
01993                         tmpReqd.remove(e)
01994                     elif e in tmpOpt:
01995                         tmpOpt.remove(e)
01996             if len(failed) == len(tmpExprs):
01997                 keepMatching = False
01998         
01999         if tmpReqd:
02000             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
02001             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
02002 
02003         resultlist = []
02004         for e in matchOrder:
02005             loc,results = e._parse(instring,loc,doActions)
02006             resultlist.append(results)
02007             
02008         finalResults = ParseResults([])
02009         for r in resultlist:
02010             dups = {}
02011             for k in r.keys():
02012                 if k in finalResults.keys():
02013                     tmp = ParseResults(finalResults[k])
02014                     tmp += ParseResults(r[k])
02015                     dups[k] = tmp
02016             finalResults += ParseResults(r)
02017             for k,v in dups.items():
02018                 finalResults[k] = v
02019         return loc, finalResults
02020 
02021     def __str__( self ):
02022         if hasattr(self,"name"):
02023             return self.name
02024             
02025         if self.strRepr is None:
02026             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02027         
02028         return self.strRepr
02029     
02030     def checkRecursion( self, parseElementList ):
02031         subRecCheckList = parseElementList[:] + [ self ]
02032         for e in self.exprs:
02033             e.checkRecursion( subRecCheckList )
02034 
02035 
02036 class ParseElementEnhance(ParserElement):
02037     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
02038     def __init__( self, expr, savelist=False ):
02039         super(ParseElementEnhance,self).__init__(savelist)
02040         if isinstance( expr, basestring ):
02041             expr = Literal(expr)
02042         self.expr = expr
02043         self.strRepr = None
02044         if expr is not None:
02045             self.mayIndexError = expr.mayIndexError
02046             self.skipWhitespace = expr.skipWhitespace
02047             self.setWhitespaceChars( expr.whiteChars )
02048             self.saveAsList = expr.saveAsList
02049     
02050     def parseImpl( self, instring, loc, doActions=True ):
02051         if self.expr is not None:
02052             return self.expr._parse( instring, loc, doActions )
02053         else:
02054             raise ParseException("",loc,self.errmsg,self)
02055             
02056     def leaveWhitespace( self ):
02057         self.skipWhitespace = False
02058         self.expr = copy.copy(self.expr)
02059         if self.expr is not None:
02060             self.expr.leaveWhitespace()
02061         return self
02062 
02063     def ignore( self, other ):
02064         if isinstance( other, Suppress ):
02065             if other not in self.ignoreExprs:
02066                 super( ParseElementEnhance, self).ignore( other )
02067                 if self.expr is not None:
02068                     self.expr.ignore( self.ignoreExprs[-1] )
02069         else:
02070             super( ParseElementEnhance, self).ignore( other )
02071             if self.expr is not None:
02072                 self.expr.ignore( self.ignoreExprs[-1] )
02073         return self
02074 
02075     def streamline( self ):
02076         super(ParseElementEnhance,self).streamline()
02077         if self.expr is not None:
02078             self.expr.streamline()
02079         return self
02080 
02081     def checkRecursion( self, parseElementList ):
02082         if self in parseElementList:
02083             raise RecursiveGrammarException( parseElementList+[self] )
02084         subRecCheckList = parseElementList[:] + [ self ]
02085         if self.expr is not None:
02086             self.expr.checkRecursion( subRecCheckList )
02087         
02088     def validate( self, validateTrace=[] ):
02089         tmp = validateTrace[:]+[self]
02090         if self.expr is not None:
02091             self.expr.validate(tmp)
02092         self.checkRecursion( [] )
02093     
02094     def __str__( self ):
02095         try:
02096             return super(ParseElementEnhance,self).__str__()
02097         except:
02098             pass
02099             
02100         if self.strRepr is None and self.expr is not None:
02101             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
02102         return self.strRepr
02103 
02104 
02105 class FollowedBy(ParseElementEnhance):
02106     """Lookahead matching of the given parse expression.  FollowedBy
02107     does *not* advance the parsing position within the input string, it only 
02108     verifies that the specified parse expression matches at the current 
02109     position.  FollowedBy always returns a null token list."""
02110     def __init__( self, expr ):
02111         super(FollowedBy,self).__init__(expr)
02112         self.mayReturnEmpty = True
02113         
02114     def parseImpl( self, instring, loc, doActions=True ):
02115         self.expr.tryParse( instring, loc )
02116         return loc, []
02117 
02118 
02119 class NotAny(ParseElementEnhance):
02120     """Lookahead to disallow matching with the given parse expression.  NotAny
02121     does *not* advance the parsing position within the input string, it only 
02122     verifies that the specified parse expression does *not* match at the current 
02123     position.  Also, NotAny does *not* skip over leading whitespace. NotAny 
02124     always returns a null token list.  May be constructed using the '~' operator."""
02125     def __init__( self, expr ):
02126         super(NotAny,self).__init__(expr)
02127         #~ self.leaveWhitespace()
02128         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
02129         self.mayReturnEmpty = True
02130         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
02131         self.myException = ParseException("",0,self.errmsg,self)
02132         
02133     def parseImpl( self, instring, loc, doActions=True ):
02134         try:
02135             self.expr.tryParse( instring, loc )
02136         except (ParseException,IndexError):
02137             pass
02138         else:
02139             #~ raise ParseException(instring, loc, self.errmsg )
02140             exc = self.myException
02141             exc.loc = loc
02142             exc.pstr = instring
02143             raise exc
02144         return loc, []
02145 
02146     def __str__( self ):
02147         if hasattr(self,"name"):
02148             return self.name
02149             
02150         if self.strRepr is None:
02151             self.strRepr = "~{" + _ustr(self.expr) + "}"
02152         
02153         return self.strRepr
02154 
02155 
02156 class ZeroOrMore(ParseElementEnhance):
02157     """Optional repetition of zero or more of the given expression."""
02158     def __init__( self, expr ):
02159         super(ZeroOrMore,self).__init__(expr)
02160         self.mayReturnEmpty = True
02161     
02162     def parseImpl( self, instring, loc, doActions=True ):
02163         tokens = []
02164         try:
02165             loc, tokens = self.expr._parse( instring, loc, doActions )
02166             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02167             while 1:
02168                 if hasIgnoreExprs:
02169                     preloc = self.skipIgnorables( instring, loc )
02170                 else:
02171                     preloc = loc
02172                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02173                 if tmptokens or tmptokens.keys():
02174                     tokens += tmptokens
02175         except (ParseException,IndexError):
02176             pass
02177 
02178         return loc, tokens
02179 
02180     def __str__( self ):
02181         if hasattr(self,"name"):
02182             return self.name
02183             
02184         if self.strRepr is None:
02185             self.strRepr = "[" + _ustr(self.expr) + "]..."
02186         
02187         return self.strRepr
02188     
02189     def setResultsName( self, name, listAllMatches=False ):
02190         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
02191         ret.saveAsList = True
02192         return ret
02193     
02194 
02195 class OneOrMore(ParseElementEnhance):
02196     """Repetition of one or more of the given expression."""
02197     def parseImpl( self, instring, loc, doActions=True ):
02198         # must be at least one
02199         loc, tokens = self.expr._parse( instring, loc, doActions )
02200         try:
02201             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02202             while 1:
02203                 if hasIgnoreExprs:
02204                     preloc = self.skipIgnorables( instring, loc )
02205                 else:
02206                     preloc = loc
02207                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02208                 if tmptokens or tmptokens.keys():
02209                     tokens += tmptokens
02210         except (ParseException,IndexError):
02211             pass
02212 
02213         return loc, tokens
02214 
02215     def __str__( self ):
02216         if hasattr(self,"name"):
02217             return self.name
02218             
02219         if self.strRepr is None:
02220             self.strRepr = "{" + _ustr(self.expr) + "}..."
02221         
02222         return self.strRepr
02223     
02224     def setResultsName( self, name, listAllMatches=False ):
02225         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
02226         ret.saveAsList = True
02227         return ret
02228 
02229 class _NullToken(object):
02230     def __bool__(self):
02231         return False
02232     def __str__(self):
02233         return ""
02234 
02235 _optionalNotMatched = _NullToken()
02236 class Optional(ParseElementEnhance):
02237     """Optional matching of the given expression.
02238        A default return string can also be specified, if the optional expression
02239        is not found.
02240     """
02241     def __init__( self, exprs, default=_optionalNotMatched ):
02242         super(Optional,self).__init__( exprs, savelist=False )
02243         self.defaultValue = default
02244         self.mayReturnEmpty = True
02245 
02246     def parseImpl( self, instring, loc, doActions=True ):
02247         try:
02248             loc, tokens = self.expr._parse( instring, loc, doActions )
02249         except (ParseException,IndexError):
02250             if self.defaultValue is not _optionalNotMatched:
02251                 tokens = [ self.defaultValue ]
02252             else:
02253                 tokens = []
02254 
02255         return loc, tokens
02256 
02257     def __str__( self ):
02258         if hasattr(self,"name"):
02259             return self.name
02260             
02261         if self.strRepr is None:
02262             self.strRepr = "[" + _ustr(self.expr) + "]"
02263         
02264         return self.strRepr
02265 
02266 
02267 class SkipTo(ParseElementEnhance):
02268     """Token for skipping over all undefined text until the matched expression is found.
02269        If include is set to true, the matched expression is also consumed.  The ignore
02270        argument is used to define grammars (typically quoted strings and comments) that 
02271        might contain false matches.
02272     """
02273     def __init__( self, other, include=False, ignore=None ):
02274         super( SkipTo, self ).__init__( other )
02275         if ignore is not None:
02276             self.expr = copy.copy( self.expr )
02277             self.expr.ignore(ignore)
02278         self.mayReturnEmpty = True
02279         self.mayIndexError = False
02280         self.includeMatch = include
02281         self.errmsg = "No match found for "+_ustr(self.expr)
02282         self.myException = ParseException("",0,self.errmsg,self)
02283 
02284     def parseImpl( self, instring, loc, doActions=True ):
02285         startLoc = loc
02286         instrlen = len(instring)
02287         expr = self.expr
02288         while loc <= instrlen:
02289             try:
02290                 loc = expr.skipIgnorables( instring, loc )
02291                 expr._parse( instring, loc, doActions=False, callPreParse=False )
02292                 if self.includeMatch:
02293                     skipText = instring[startLoc:loc]
02294                     loc,mat = expr._parse(instring,loc)
02295                     if mat:
02296                         return loc, [ skipText, mat ]
02297                     else:
02298                         return loc, [ skipText ]
02299                 else:
02300                     return loc, [ instring[startLoc:loc] ]
02301             except (ParseException,IndexError):
02302                 loc += 1
02303         exc = self.myException
02304         exc.loc = loc
02305         exc.pstr = instring
02306         raise exc
02307 
02308 class Forward(ParseElementEnhance):
02309     """Forward declaration of an expression to be defined later -
02310        used for recursive grammars, such as algebraic infix notation.
02311        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
02312        
02313        Note: take care when assigning to Forward not to overlook precedence of operators.
02314        Specifically, '|' has a lower precedence than '<<', so that::
02315           fwdExpr << a | b | c
02316        will actually be evaluated as::
02317           (fwdExpr << a) | b | c
02318        thereby leaving b and c out as parseable alternatives.  It is recommended that you
02319        explicitly group the values inserted into the Forward::
02320           fwdExpr << (a | b | c)
02321     """
02322     def __init__( self, other=None ):
02323         super(Forward,self).__init__( other, savelist=False )
02324 
02325     def __lshift__( self, other ):
02326         if isinstance( other, basestring ):
02327             other = Literal(other)
02328         self.expr = other
02329         self.mayReturnEmpty = other.mayReturnEmpty
02330         self.strRepr = None
02331         return self
02332 
02333     def leaveWhitespace( self ):
02334         self.skipWhitespace = False
02335         return self
02336 
02337     def streamline( self ):
02338         if not self.streamlined:
02339             self.streamlined = True
02340             if self.expr is not None: 
02341                 self.expr.streamline()
02342         return self
02343 
02344     def validate( self, validateTrace=[] ):
02345         if self not in validateTrace:
02346             tmp = validateTrace[:]+[self]
02347             if self.expr is not None: 
02348                 self.expr.validate(tmp)
02349         self.checkRecursion([])        
02350         
02351     def __str__( self ):
02352         if hasattr(self,"name"):
02353             return self.name
02354 
02355         self.__class__ = _ForwardNoRecurse
02356         try:
02357             if self.expr is not None: 
02358                 retString = _ustr(self.expr)
02359             else:
02360                 retString = "None"
02361         finally:
02362             self.__class__ = Forward
02363         return "Forward: "+retString
02364 
02365 class _ForwardNoRecurse(Forward):
02366     def __str__( self ):
02367         return "..."
02368         
02369 class TokenConverter(ParseElementEnhance):
02370     """Abstract subclass of ParseExpression, for converting parsed results."""
02371     def __init__( self, expr, savelist=False ):
02372         super(TokenConverter,self).__init__( expr )#, savelist )
02373         self.saveAsList = False
02374 
02375 
02376 class Upcase(TokenConverter):
02377     """Converter to upper case all matching tokens."""
02378     def __init__(self, *args):
02379         super(Upcase,self).__init__(*args)
02380         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
02381                        DeprecationWarning,stacklevel=2)
02382     
02383     def postParse( self, instring, loc, tokenlist ):
02384         return map( string.upper, tokenlist )
02385 
02386 
02387 class Combine(TokenConverter):
02388     """Converter to concatenate all matching tokens to a single string.
02389        By default, the matching patterns must also be contiguous in the input string;
02390        this can be disabled by specifying 'adjacent=False' in the constructor.
02391     """
02392     def __init__( self, expr, joinString="", adjacent=True ):
02393         super(Combine,self).__init__( expr )
02394         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
02395         if adjacent:
02396             self.leaveWhitespace()
02397         self.adjacent = adjacent
02398         self.skipWhitespace = True
02399         self.joinString = joinString
02400 
02401     def ignore( self, other ):
02402         if self.adjacent:
02403             ParserElement.ignore(self, other)
02404         else:
02405             super( Combine, self).ignore( other )
02406         return self
02407 
02408     def postParse( self, instring, loc, tokenlist ):
02409         retToks = tokenlist.copy()
02410         del retToks[:]
02411         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
02412 
02413         if self.resultsName and len(retToks.keys())>0:
02414             return [ retToks ]
02415         else:
02416             return retToks
02417 
02418 class Group(TokenConverter):
02419     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
02420     def __init__( self, expr ):
02421         super(Group,self).__init__( expr )
02422         self.saveAsList = True
02423 
02424     def postParse( self, instring, loc, tokenlist ):
02425         return [ tokenlist ]
02426         
02427 class Dict(TokenConverter):
02428     """Converter to return a repetitive expression as a list, but also as a dictionary.
02429        Each element can also be referenced using the first token in the expression as its key.
02430        Useful for tabular report scraping when the first column can be used as a item key.
02431     """
02432     def __init__( self, exprs ):
02433         super(Dict,self).__init__( exprs )
02434         self.saveAsList = True
02435 
02436     def postParse( self, instring, loc, tokenlist ):
02437         for i,tok in enumerate(tokenlist):
02438             ikey = _ustr(tok[0]).strip()
02439             if len(tok)==1:
02440                 tokenlist[ikey] = ("",i)
02441             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
02442                 tokenlist[ikey] = (tok[1],i)
02443             else:
02444                 dictvalue = tok.copy() #ParseResults(i)
02445                 del dictvalue[0]
02446                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
02447                     tokenlist[ikey] = (dictvalue,i)
02448                 else:
02449                     tokenlist[ikey] = (dictvalue[0],i)
02450 
02451         if self.resultsName:
02452             return [ tokenlist ]
02453         else:
02454             return tokenlist
02455 
02456 
02457 class Suppress(TokenConverter):
02458     """Converter for ignoring the results of a parsed expression."""
02459     def postParse( self, instring, loc, tokenlist ):
02460         return []
02461     
02462     def suppress( self ):
02463         return self
02464 
02465 
02466 class OnlyOnce(object):
02467     """Wrapper for parse actions, to ensure they are only called once."""
02468     def __init__(self, methodCall):
02469         self.callable = ParserElement.normalizeParseActionArgs(methodCall)
02470         self.called = False
02471     def __call__(self,s,l,t):
02472         if not self.called:
02473             results = self.callable(s,l,t)
02474             self.called = True
02475             return results
02476         raise ParseException(s,l,"")
02477 
02478 def traceParseAction(f):
02479     """Decorator for debugging parse actions."""
02480     def z(*paArgs):
02481         thisFunc = f.func_name
02482         s,l,t = paArgs[-3:]
02483         if len(paArgs)>3:
02484             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
02485         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
02486         ret = f(*paArgs)
02487         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
02488         return ret
02489     return z
02490         
02491 #
02492 # global helpers
02493 #
02494 def delimitedList( expr, delim=",", combine=False ):
02495     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
02496        By default, the list elements and delimiters can have intervening whitespace, and 
02497        comments, but this can be overridden by passing 'combine=True' in the constructor.
02498        If combine is set to True, the matching tokens are returned as a single token
02499        string, with the delimiters included; otherwise, the matching tokens are returned
02500        as a list of tokens, with the delimiters suppressed.
02501     """
02502     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
02503     if combine:
02504         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
02505     else:
02506         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
02507 
02508 def countedArray( expr ):
02509     """Helper to define a counted list of expressions.
02510        This helper defines a pattern of the form::
02511            integer expr expr expr...
02512        where the leading integer tells how many expr expressions follow.
02513        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
02514     """
02515     arrayExpr = Forward()
02516     def countFieldParseAction(s,l,t):
02517         n = int(t[0])
02518         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
02519         return []
02520     return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
02521     
02522 def _escapeRegexRangeChars(s):
02523     #~  escape these chars: ^-]
02524     for c in r"\^-]":
02525         s = s.replace(c,"\\"+c)
02526     s = s.replace("\n",r"\n")
02527     s = s.replace("\t",r"\t")
02528     return _ustr(s)
02529     
02530 def oneOf( strs, caseless=False, useRegex=True ):
02531     """Helper to quickly define a set of alternative Literals, and makes sure to do 
02532        longest-first testing when there is a conflict, regardless of the input order, 
02533        but returns a MatchFirst for best performance.  
02534        
02535        Parameters:
02536         - strs - a string of space-delimited literals, or a list of string literals
02537         - caseless - (default=False) - treat all literals as caseless
02538         - useRegex - (default=True) - as an optimization, will generate a Regex
02539           object; otherwise, will generate a MatchFirst object (if caseless=True, or
02540           if creating a Regex raises an exception)
02541     """
02542     if caseless:
02543         isequal = ( lambda a,b: a.upper() == b.upper() )
02544         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
02545         parseElementClass = CaselessLiteral
02546     else:
02547         isequal = ( lambda a,b: a == b )
02548         masks = ( lambda a,b: b.startswith(a) )
02549         parseElementClass = Literal
02550     
02551     if isinstance(strs,(list,tuple)):
02552         symbols = strs[:]
02553     elif isinstance(strs,basestring):
02554         symbols = strs.split()
02555     else:
02556         warnings.warn("Invalid argument to oneOf, expected string or list",
02557                 SyntaxWarning, stacklevel=2)
02558         
02559     i = 0
02560     while i < len(symbols)-1:
02561         cur = symbols[i]
02562         for j,other in enumerate(symbols[i+1:]):
02563             if ( isequal(other, cur) ):
02564                 del symbols[i+j+1]
02565                 break
02566             elif ( masks(cur, other) ):
02567                 del symbols[i+j+1]
02568                 symbols.insert(i,other)
02569                 cur = other
02570                 break
02571         else:
02572             i += 1
02573 
02574     if not caseless and useRegex:
02575         #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
02576         try:
02577             if len(symbols)==len("".join(symbols)):
02578                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
02579             else:
02580                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
02581         except:
02582             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
02583                     SyntaxWarning, stacklevel=2)
02584 
02585 
02586     # last resort, just use MatchFirst
02587     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
02588 
02589 def dictOf( key, value ):
02590     """Helper to easily and clearly define a dictionary by specifying the respective patterns
02591        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
02592        in the proper order.  The key pattern can include delimiting markers or punctuation,
02593        as long as they are suppressed, thereby leaving the significant key text.  The value
02594        pattern can include named results, so that the Dict results can include named token 
02595        fields.
02596     """
02597     return Dict( ZeroOrMore( Group ( key + value ) ) )
02598 
02599 _bslash = "\\"
02600 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
02601 
02602 # convenience constants for positional expressions
02603 empty       = Empty().setName("empty")
02604 lineStart   = LineStart().setName("lineStart")
02605 lineEnd     = LineEnd().setName("lineEnd")
02606 stringStart = StringStart().setName("stringStart")
02607 stringEnd   = StringEnd().setName("stringEnd")
02608 
02609 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
02610 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
02611 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
02612 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
02613 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
02614 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
02615 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
02616 
02617 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
02618         
02619 def srange(s):
02620     r"""Helper to easily define string ranges for use in Word construction.  Borrows
02621        syntax from regexp '[]' string range definitions::
02622           srange("[0-9]")   -> "0123456789"
02623           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
02624           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
02625        The input string must be enclosed in []'s, and the returned string is the expanded 
02626        character set joined into a single string.
02627        The values enclosed in the []'s may be::
02628           a single character
02629           an escaped character with a leading backslash (such as \- or \])
02630           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
02631           an escaped octal character with a leading '\0' (\041, which is a '!' character)
02632           a range of any of the above, separated by a dash ('a-z', etc.)
02633           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
02634     """
02635     try:
02636         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
02637     except:
02638         return ""
02639 
02640 def replaceWith(replStr):
02641     """Helper method for common parse actions that simply return a literal value.  Especially 
02642        useful when used with transformString().
02643     """
02644     def _replFunc(*args):
02645         return [replStr]
02646     return _replFunc
02647 
02648 def removeQuotes(s,l,t):
02649     """Helper parse action for removing quotation marks from parsed quoted strings.
02650        To use, add this parse action to quoted string using::
02651          quotedString.setParseAction( removeQuotes )
02652     """
02653     return t[0][1:-1]
02654 
02655 def upcaseTokens(s,l,t):
02656     """Helper parse action to convert tokens to upper case."""
02657     return map( str.upper, t )
02658 
02659 def downcaseTokens(s,l,t):
02660     """Helper parse action to convert tokens to lower case."""
02661     return map( str.lower, t )
02662 
02663 def _makeTags(tagStr, xml):
02664     """Internal helper to construct opening and closing tag expressions, given a tag name"""
02665     tagAttrName = Word(alphanums)
02666     if (xml):
02667         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
02668         openTag = Suppress("<") + Keyword(tagStr) + \
02669                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
02670                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
02671     else:
02672         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
02673         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
02674         openTag = Suppress("<") + Keyword(tagStr,caseless=True) + \
02675                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
02676                 Suppress("=") + tagAttrValue ))) + \
02677                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
02678     closeTag = Combine("</" + Keyword(tagStr,caseless=not xml) + ">")
02679     
02680     openTag = openTag.setResultsName("start"+"".join(tagStr.replace(":"," ").title().split())).setName("<%s>" % tagStr)
02681     closeTag = closeTag.setResultsName("end"+"".join(tagStr.replace(":"," ").title().split())).setName("</%s>" % tagStr)
02682     
02683     return openTag, closeTag
02684 
02685 def makeHTMLTags(tagStr):
02686     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
02687     return _makeTags( tagStr, False )
02688 
02689 def makeXMLTags(tagStr):
02690     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
02691     return _makeTags( tagStr, True )
02692 
02693 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
02694 
02695 _escapedChar = Regex(r"\\.")
02696 dblQuotedString = Regex(r'"([^"\n\r\\]|("")|(\\.))*"').setName("string enclosed in double quotes")
02697 sglQuotedString = Regex(r"'([^'\n\r\\]|('')|(\\.))*'").setName("string enclosed in single quotes")
02698 quotedString = Regex(r'''("([^"\n\r\\]|("")|(\\.))*")|('([^'\n\r\\]|('')|(\\.))*')''').setName("quotedString using single or double quotes")
02699 
02700 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
02701 cStyleComment = Regex(r"\/\*[\s\S]*?\*\/").setName("C style comment")
02702 htmlComment = Regex(r"<!--[\s\S]*?-->")
02703 restOfLine = Regex(r".*").leaveWhitespace()
02704 dblSlashComment = Regex(r"\/\/.*").setName("// comment")
02705 cppStyleComment = Regex(r"(\/\*[\s\S]*?\*\/)|(\/\/.*)").setName("C++ style comment")
02706 javaStyleComment = cppStyleComment
02707 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
02708 _noncomma = "".join( [ c for c in printables if c != "," ] )
02709 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
02710                                   Optional( Word(" \t") + 
02711                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
02712 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
02713 
02714 
02715 if __name__ == "__main__":
02716 
02717     def test( teststring ):
02718         print teststring,"->",
02719         try:
02720             tokens = simpleSQL.parseString( teststring )
02721             tokenlist = tokens.asList()
02722             print tokenlist
02723             print "tokens = ",        tokens
02724             print "tokens.columns =", tokens.columns
02725             print "tokens.tables =",  tokens.tables
02726             print tokens.asXML("SQL",True)
02727         except ParseException, err:
02728             print err.line
02729             print " "*(err.column-1) + "^"
02730             print err
02731         print
02732 
02733     selectToken    = CaselessLiteral( "select" )
02734     fromToken      = CaselessLiteral( "from" )
02735 
02736     ident          = Word( alphas, alphanums + "_$" )
02737     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
02738     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
02739     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
02740     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
02741     simpleSQL      = ( selectToken + \
02742                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
02743                      fromToken + \
02744                      tableNameList.setResultsName( "tables" ) )
02745     
02746     test( "SELECT * from XYZZY, ABC" )
02747     test( "select * from SYS.XYZZY" )
02748     test( "Select A from Sys.dual" )
02749     test( "Select AA,BB,CC from Sys.dual" )
02750     test( "Select A, B, C from Sys.dual" )
02751     test( "Select A, B, C from Sys.dual" )
02752     test( "Xelect A, B, C from Sys.dual" )
02753     test( "Select A, B, C frox Sys.dual" )
02754     test( "Select" )
02755     test( "Select ^^^ frox Sys.dual" )
02756     test( "Select A, B, C from Sys.dual, Table2   " )

Generated on Tue Jun 9 17:36:27 2009 for CMSSW by  doxygen 1.5.4