00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 __doc__ = \
00027 """
00028 pyparsing module - Classes and methods to define and execute parsing grammars
00029
00030 The pyparsing module is an alternative approach to creating and executing simple grammars,
00031 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
00032 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
00033 provides a library of classes that you use to construct the grammar directly in Python.
00034
00035 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
00036
00037 from pyparsing import Word, alphas
00038
00039 # define grammar of a greeting
00040 greet = Word( alphas ) + "," + Word( alphas ) + "!"
00041
00042 hello = "Hello, World!"
00043 print hello, "->", greet.parseString( hello )
00044
00045 The program outputs the following::
00046
00047 Hello, World! -> ['Hello', ',', 'World', '!']
00048
00049 The Python representation of the grammar is quite readable, owing to the self-explanatory
00050 class names, and the use of '+', '|' and '^' operators.
00051
00052 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
00053 object with named attributes.
00054
00055 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
00056 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
00057 - quoted strings
00058 - embedded comments
00059 """
00060 __version__ = "1.4.3"
00061 __versionTime__ = "1 July 2006 05:32"
00062 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
00063
00064 import string
00065 import copy,sys
00066 import warnings
00067 import re
00068 import sre_constants
00069 import xml.sax.saxutils
00070
00071
00072 def _ustr(obj):
00073 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
00074 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
00075 then < returns the unicode object | encodes it with the default encoding | ... >.
00076 """
00077 try:
00078
00079
00080 return str(obj)
00081
00082 except UnicodeEncodeError, e:
00083
00084
00085
00086
00087
00088 return unicode(obj)
00089
00090
00091
00092
00093
00094
00095
00096 def _str2dict(strg):
00097 return dict( [(c,0) for c in strg] )
00098
00099 alphas = string.lowercase + string.uppercase
00100 nums = string.digits
00101 hexnums = nums + "ABCDEFabcdef"
00102 alphanums = alphas + nums
00103
00104 class ParseBaseException(Exception):
00105 """base exception class for all parsing runtime exceptions"""
00106 __slots__ = ( "loc","msg","pstr","parserElement" )
00107
00108
00109 def __init__( self, pstr, loc, msg, elem=None ):
00110 self.loc = loc
00111 self.msg = msg
00112 self.pstr = pstr
00113 self.parserElement = elem
00114
00115 def __getattr__( self, aname ):
00116 """supported attributes by name are:
00117 - lineno - returns the line number of the exception text
00118 - col - returns the column number of the exception text
00119 - line - returns the line containing the exception text
00120 """
00121 if( aname == "lineno" ):
00122 return lineno( self.loc, self.pstr )
00123 elif( aname in ("col", "column") ):
00124 return col( self.loc, self.pstr )
00125 elif( aname == "line" ):
00126 return line( self.loc, self.pstr )
00127 else:
00128 raise AttributeError, aname
00129
00130 def __str__( self ):
00131 return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
00132 def __repr__( self ):
00133 return _ustr(self)
00134 def markInputline( self, markerString = ">!<" ):
00135 """Extracts the exception line from the input string, and marks
00136 the location of the exception with a special symbol.
00137 """
00138 line_str = self.line
00139 line_column = self.column - 1
00140 if markerString:
00141 line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
00142 return line_str.strip()
00143
00144 class ParseException(ParseBaseException):
00145 """exception thrown when parse expressions don't match class"""
00146 """supported attributes by name are:
00147 - lineno - returns the line number of the exception text
00148 - col - returns the column number of the exception text
00149 - line - returns the line containing the exception text
00150 """
00151 pass
00152
00153 class ParseFatalException(ParseBaseException):
00154 """user-throwable exception thrown when inconsistent parse content
00155 is found; stops all parsing immediately"""
00156 pass
00157
00158 class RecursiveGrammarException(Exception):
00159 """exception thrown by validate() if the grammar could be improperly recursive"""
00160 def __init__( self, parseElementList ):
00161 self.parseElementTrace = parseElementList
00162
00163 def __str__( self ):
00164 return "RecursiveGrammarException: %s" % self.parseElementTrace
00165
00166 class ParseResults(object):
00167 """Structured parse results, to provide multiple means of access to the parsed data:
00168 - as a list (len(results))
00169 - by list index (results[0], results[1], etc.)
00170 - by attribute (results.<resultsName>)
00171 """
00172 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
00173 def __new__(cls, toklist, name=None, asList=True, modal=True ):
00174 if isinstance(toklist, cls):
00175 return toklist
00176 retobj = object.__new__(cls)
00177 retobj.__doinit = True
00178 return retobj
00179
00180
00181
00182 def __init__( self, toklist, name=None, asList=True, modal=True ):
00183 if self.__doinit:
00184 self.__doinit = False
00185 self.__name = None
00186 self.__parent = None
00187 self.__modal = modal
00188 if isinstance(toklist, list):
00189 self.__toklist = toklist[:]
00190 else:
00191 self.__toklist = [toklist]
00192 self.__tokdict = dict()
00193
00194
00195
00196
00197 if name:
00198 if not self.__name:
00199 self.__modal = self.__modal and modal
00200 if isinstance(name,int):
00201 name = _ustr(name)
00202 self.__name = name
00203 if not toklist in (None,'',[]):
00204 if isinstance(toklist,basestring):
00205 toklist = [ toklist ]
00206 if asList:
00207 if isinstance(toklist,ParseResults):
00208 self[name] = (toklist.copy(),-1)
00209 else:
00210 self[name] = (ParseResults(toklist[0]),-1)
00211 self[name].__name = name
00212 else:
00213 try:
00214 self[name] = toklist[0]
00215 except (KeyError,TypeError):
00216 self[name] = toklist
00217
00218 def __getitem__( self, i ):
00219 if isinstance( i, (int,slice) ):
00220 return self.__toklist[i]
00221 else:
00222 if self.__modal:
00223 return self.__tokdict[i][-1][0]
00224 else:
00225 return ParseResults([ v[0] for v in self.__tokdict[i] ])
00226
00227 def __setitem__( self, k, v ):
00228 if isinstance(v,tuple):
00229 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
00230 sub = v[0]
00231 elif isinstance(k,int):
00232 self.__toklist[k] = v
00233 else:
00234 self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
00235 sub = v
00236 if isinstance(sub,ParseResults):
00237 sub.__parent = self
00238
00239 def __delitem__( self, i ):
00240 del self.__toklist[i]
00241
00242 def __contains__( self, k ):
00243 return self.__tokdict.has_key(k)
00244
00245 def __len__( self ): return len( self.__toklist )
00246 def __iter__( self ): return iter( self.__toklist )
00247 def keys( self ):
00248 """Returns all named result keys."""
00249 return self.__tokdict.keys()
00250
00251 def items( self ):
00252 """Returns all named result keys and values as a list of tuples."""
00253 return [(k,v[-1][0]) for k,v in self.__tokdict.items()]
00254
00255 def values( self ):
00256 """Returns all named result values."""
00257 return [ v[-1][0] for v in self.__tokdict.values() ]
00258
00259 def __getattr__( self, name ):
00260 if name not in self.__slots__:
00261 if self.__tokdict.has_key( name ):
00262 if self.__modal:
00263 return self.__tokdict[name][-1][0]
00264 else:
00265 return ParseResults([ v[0] for v in self.__tokdict[name] ])
00266 else:
00267 return ""
00268 return None
00269
00270 def __iadd__( self, other ):
00271 if other.__tokdict:
00272 offset = len(self.__toklist)
00273 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
00274 otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist]
00275 for k,v in otherdictitems:
00276 self[k] = v
00277 if isinstance(v[0],ParseResults):
00278 v[0].__parent = self
00279 self.__toklist += other.__toklist
00280 del other
00281 return self
00282
00283 def __repr__( self ):
00284 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
00285
00286 def __str__( self ):
00287 out = "["
00288 sep = ""
00289 for i in self.__toklist:
00290 if isinstance(i, ParseResults):
00291 out += sep + _ustr(i)
00292 else:
00293 out += sep + repr(i)
00294 sep = ", "
00295 out += "]"
00296 return out
00297
00298 def _asStringList( self, sep='' ):
00299 out = []
00300 for item in self.__toklist:
00301 if out and sep:
00302 out.append(sep)
00303 if isinstance( item, ParseResults ):
00304 out += item._asStringList()
00305 else:
00306 out.append( _ustr(item) )
00307 return out
00308
00309 def asList( self ):
00310 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
00311 out = []
00312 for res in self.__toklist:
00313 if isinstance(res,ParseResults):
00314 out.append( res.asList() )
00315 else:
00316 out.append( res )
00317 return out
00318
00319 def asDict( self ):
00320 """Returns the named parse results as dictionary."""
00321 return dict( self.items() )
00322
00323 def copy( self ):
00324 """Returns a new copy of a ParseResults object."""
00325 ret = ParseResults( self.__toklist )
00326 ret.__tokdict = self.__tokdict.copy()
00327 ret.__parent = self.__parent
00328 ret.__modal = self.__modal
00329 ret.__name = self.__name
00330 return ret
00331
00332 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
00333 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
00334 nl = "\n"
00335 out = []
00336 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
00337 nextLevelIndent = indent + " "
00338
00339
00340 if not formatted:
00341 indent = ""
00342 nextLevelIndent = ""
00343 nl = ""
00344
00345 selfTag = None
00346 if doctag is not None:
00347 selfTag = doctag
00348 else:
00349 if self.__name:
00350 selfTag = self.__name
00351
00352 if not selfTag:
00353 if namedItemsOnly:
00354 return ""
00355 else:
00356 selfTag = "ITEM"
00357
00358 out += [ nl, indent, "<", selfTag, ">" ]
00359
00360 worklist = self.__toklist
00361 for i,res in enumerate(worklist):
00362 if isinstance(res,ParseResults):
00363 if i in namedItems:
00364 out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
00365 else:
00366 out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
00367 else:
00368
00369 resTag = None
00370 if i in namedItems:
00371 resTag = namedItems[i]
00372 if not resTag:
00373 if namedItemsOnly:
00374 continue
00375 else:
00376 resTag = "ITEM"
00377 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
00378 out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
00379
00380 out += [ nl, indent, "</", selfTag, ">" ]
00381 return "".join(out)
00382
00383 def __lookup(self,sub):
00384 for k,vlist in self.__tokdict.items():
00385 for v,loc in vlist:
00386 if sub is v:
00387 return k
00388 return None
00389
00390 def getName(self):
00391 """Returns the results name for this token expression."""
00392 if self.__name:
00393 return self.__name
00394 elif self.__parent:
00395 par = self.__parent
00396 if par:
00397 return par.__lookup(self)
00398 else:
00399 return None
00400 elif (len(self) == 1 and
00401 len(self.__tokdict) == 1 and
00402 self.__tokdict.values()[0][0][1] in (0,-1)):
00403 return self.__tokdict.keys()[0]
00404 else:
00405 return None
00406
00407 def dump(self,indent='',depth=0):
00408 """Diagnostic method for listing out the contents of a ParseResults.
00409 Accepts an optional indent argument so that this string can be embedded
00410 in a nested display of other data."""
00411 out = []
00412 keys = self.items()
00413 keys.sort()
00414 for k,v in keys:
00415 if out:
00416 out.append('\n')
00417 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
00418 if isinstance(v,ParseResults):
00419 if v.keys():
00420 out.append('\n')
00421 out.append( v.dump(indent,depth+1) )
00422 out.append('\n')
00423 else:
00424 out.append(str(v))
00425 else:
00426 out.append(str(v))
00427 out.append('\n')
00428 out.append( indent+str(self.asList()) )
00429 return "".join(out)
00430
00431 def col (loc,strg):
00432 """Returns current column within a string, counting newlines as line separators.
00433 The first column is number 1.
00434 """
00435 return loc - strg.rfind("\n", 0, loc)
00436
00437 def lineno(loc,strg):
00438 """Returns current line number within a string, counting newlines as line separators.
00439 The first line is number 1.
00440 """
00441 return strg.count("\n",0,loc) + 1
00442
00443 def line( loc, strg ):
00444 """Returns the line of text containing loc within a string, counting newlines as line separators.
00445 """
00446 lastCR = strg.rfind("\n", 0, loc)
00447 nextCR = strg.find("\n", loc)
00448 if nextCR > 0:
00449 return strg[lastCR+1:nextCR]
00450 else:
00451 return strg[lastCR+1:]
00452
00453 def _defaultStartDebugAction( instring, loc, expr ):
00454 print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
00455
00456 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
00457 print "Matched",expr,"->",toks.asList()
00458
00459 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
00460 print "Exception raised:", exc
00461
00462 def nullDebugAction(*args):
00463 """'Do-nothing' debug action, to suppress debugging output during parsing."""
00464 pass
00465
00466 class ParserElement(object):
00467 """Abstract base level parser element class."""
00468 DEFAULT_WHITE_CHARS = " \n\t\r"
00469
00470 def setDefaultWhitespaceChars( chars ):
00471 """Overrides the default whitespace chars
00472 """
00473 ParserElement.DEFAULT_WHITE_CHARS = chars
00474 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
00475
00476 def __init__( self, savelist=False ):
00477 self.parseAction = list()
00478 self.failAction = None
00479
00480 self.strRepr = None
00481 self.resultsName = None
00482 self.saveAsList = savelist
00483 self.skipWhitespace = True
00484 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00485 self.copyDefaultWhiteChars = True
00486 self.mayReturnEmpty = False
00487 self.keepTabs = False
00488 self.ignoreExprs = list()
00489 self.debug = False
00490 self.streamlined = False
00491 self.mayIndexError = True
00492 self.errmsg = ""
00493 self.modalResults = True
00494 self.debugActions = ( None, None, None )
00495 self.re = None
00496
00497 def copy( self ):
00498 """Make a copy of this ParserElement. Useful for defining different parse actions
00499 for the same parsing pattern, using copies of the original parse element."""
00500 cpy = copy.copy( self )
00501 cpy.parseAction = self.parseAction[:]
00502 cpy.ignoreExprs = self.ignoreExprs[:]
00503 if self.copyDefaultWhiteChars:
00504 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
00505 return cpy
00506
00507 def setName( self, name ):
00508 """Define name for this expression, for use in debugging."""
00509 self.name = name
00510 self.errmsg = "Expected " + self.name
00511 return self
00512
00513 def setResultsName( self, name, listAllMatches=False ):
00514 """Define name for referencing matching tokens as a nested attribute
00515 of the returned parse results.
00516 NOTE: this returns a *copy* of the original ParserElement object;
00517 this is so that the client can define a basic element, such as an
00518 integer, and reference it in multiple places with different names.
00519 """
00520 newself = self.copy()
00521 newself.resultsName = name
00522 newself.modalResults = not listAllMatches
00523 return newself
00524
00525 def normalizeParseActionArgs( f ):
00526 """Internal method used to decorate parse actions that take fewer than 3 arguments,
00527 so that all parse actions can be called as f(s,l,t)."""
00528 STAR_ARGS = 4
00529 try:
00530 if f.func_code.co_flags & STAR_ARGS:
00531 return f
00532 numargs = f.func_code.co_argcount
00533 if hasattr(f,"im_self"):
00534 numargs -= 1
00535 except AttributeError:
00536 try:
00537
00538
00539 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
00540 return f
00541 numargs = f.__call__.im_func.func_code.co_argcount
00542 if hasattr(f.__call__,"im_self"):
00543 numargs -= 1
00544 except AttributeError:
00545
00546 if f.__call__.func_code.co_flags & STAR_ARGS:
00547 return f
00548 numargs = f.__call__.func_code.co_argcount
00549 if hasattr(f.__call__,"im_self"):
00550 numargs -= 1
00551
00552
00553 if numargs == 3:
00554 return f
00555 else:
00556 if numargs == 2:
00557 def tmp(s,l,t):
00558 return f(l,t)
00559 elif numargs == 1:
00560 def tmp(s,l,t):
00561 return f(t)
00562 else:
00563 def tmp(s,l,t):
00564 return f()
00565 return tmp
00566 normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
00567
00568 def setParseAction( self, *fns ):
00569 """Define action to perform when successfully matching parse element definition.
00570 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
00571 fn(loc,toks), fn(toks), or just fn(), where:
00572 - s = the original string being parsed
00573 - loc = the location of the matching substring
00574 - toks = a list of the matched tokens, packaged as a ParseResults object
00575 If the functions in fns modify the tokens, they can return them as the return
00576 value from fn, and the modified list of tokens will replace the original.
00577 Otherwise, fn does not need to return any value."""
00578 self.parseAction = [self.normalizeParseActionArgs(f) for f in list(fns)]
00579 return self
00580
00581 def addParseAction( self, *fns ):
00582 """Add parse action to expression's list of parse actions. See setParseAction_."""
00583 self.parseAction += [self.normalizeParseActionArgs(f) for f in list(fns)]
00584 return self
00585
00586 def setFailAction( self, fn ):
00587 """Define action to perform if parsing fails at this expression.
00588 Fail acton fn is a callable function that takes the arguments
00589 fn(s,loc,expr,err) where:
00590 - s = string being parsed
00591 - loc = location where expression match was attempted and failed
00592 - expr = the parse expression that failed
00593 - err = the exception thrown
00594 The function returns no value. It may throw ParseFatalException
00595 if it is desired to stop parsing immediately."""
00596 self.failAction = fn
00597 return self
00598
00599 def skipIgnorables( self, instring, loc ):
00600 exprsFound = True
00601 while exprsFound:
00602 exprsFound = False
00603 for e in self.ignoreExprs:
00604 try:
00605 while 1:
00606 loc,dummy = e._parse( instring, loc )
00607 exprsFound = True
00608 except ParseException:
00609 pass
00610 return loc
00611
00612 def preParse( self, instring, loc ):
00613 if self.ignoreExprs:
00614 loc = self.skipIgnorables( instring, loc )
00615
00616 if self.skipWhitespace:
00617 wt = self.whiteChars
00618 instrlen = len(instring)
00619 while loc < instrlen and instring[loc] in wt:
00620 loc += 1
00621
00622 return loc
00623
00624 def parseImpl( self, instring, loc, doActions=True ):
00625 return loc, []
00626
00627 def postParse( self, instring, loc, tokenlist ):
00628 return tokenlist
00629
00630
00631 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
00632 debugging = ( self.debug )
00633
00634 if debugging or self.failAction:
00635
00636 if (self.debugActions[0] ):
00637 self.debugActions[0]( instring, loc, self )
00638 if callPreParse:
00639 preloc = self.preParse( instring, loc )
00640 else:
00641 preloc = loc
00642 tokensStart = loc
00643 try:
00644 try:
00645 loc,tokens = self.parseImpl( instring, preloc, doActions )
00646 except IndexError:
00647 raise ParseException( instring, len(instring), self.errmsg, self )
00648 except ParseException, err:
00649
00650 if self.debugActions[2]:
00651 self.debugActions[2]( instring, tokensStart, self, err )
00652 if self.failAction:
00653 self.failAction( instring, tokensStart, self, err )
00654 raise
00655 else:
00656 if callPreParse:
00657 preloc = self.preParse( instring, loc )
00658 else:
00659 preloc = loc
00660 tokensStart = loc
00661 if self.mayIndexError or loc >= len(instring):
00662 try:
00663 loc,tokens = self.parseImpl( instring, preloc, doActions )
00664 except IndexError:
00665 raise ParseException( instring, len(instring), self.errmsg, self )
00666 else:
00667 loc,tokens = self.parseImpl( instring, preloc, doActions )
00668
00669 tokens = self.postParse( instring, loc, tokens )
00670
00671 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
00672 if self.parseAction and doActions:
00673 if debugging:
00674 try:
00675 for fn in self.parseAction:
00676 tokens = fn( instring, tokensStart, retTokens )
00677 if tokens is not None:
00678 retTokens = ParseResults( tokens,
00679 self.resultsName,
00680 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
00681 modal=self.modalResults )
00682 except ParseException, err:
00683
00684 if (self.debugActions[2] ):
00685 self.debugActions[2]( instring, tokensStart, self, err )
00686 raise
00687 else:
00688 for fn in self.parseAction:
00689 tokens = fn( instring, tokensStart, retTokens )
00690 if tokens is not None:
00691 retTokens = ParseResults( tokens,
00692 self.resultsName,
00693 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
00694 modal=self.modalResults )
00695
00696 if debugging:
00697
00698 if (self.debugActions[1] ):
00699 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
00700
00701 return loc, retTokens
00702
00703 def tryParse( self, instring, loc ):
00704 return self._parse( instring, loc, doActions=False )[0]
00705
00706
00707
00708 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
00709 lookup = (self,instring,loc,callPreParse)
00710 if lookup in ParserElement._exprArgCache:
00711 value = ParserElement._exprArgCache[ lookup ]
00712 if isinstance(value,Exception):
00713 if isinstance(value,ParseBaseException):
00714 value.loc = loc
00715 raise value
00716 return value
00717 else:
00718 try:
00719 ParserElement._exprArgCache[ lookup ] = \
00720 value = self._parseNoCache( instring, loc, doActions, callPreParse )
00721 return value
00722 except ParseBaseException, pe:
00723 ParserElement._exprArgCache[ lookup ] = pe
00724 raise
00725
00726 _parse = _parseNoCache
00727
00728
00729 _exprArgCache = {}
00730 def resetCache():
00731 ParserElement._exprArgCache.clear()
00732 resetCache = staticmethod(resetCache)
00733
00734 _packratEnabled = False
00735 def enablePackrat():
00736 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
00737 Repeated parse attempts at the same string location (which happens
00738 often in many complex grammars) can immediately return a cached value,
00739 instead of re-executing parsing/validating code. Memoizing is done of
00740 both valid results and parsing exceptions.
00741
00742 This speedup may break existing programs that use parse actions that
00743 have side-effects. For this reason, packrat parsing is disabled when
00744 you first import pyparsing. To activate the packrat feature, your
00745 program must call the class method ParserElement.enablePackrat(). If
00746 your program uses psyco to "compile as you go", you must call
00747 enablePackrat before calling psyco.full(). If you do not do this,
00748 Python will crash. For best results, call enablePackrat() immediately
00749 after importing pyparsing.
00750 """
00751 if not ParserElement._packratEnabled:
00752 ParserElement._packratEnabled = True
00753 ParserElement._parse = ParserElement._parseCache
00754 enablePackrat = staticmethod(enablePackrat)
00755
00756 def parseString( self, instring ):
00757 """Execute the parse expression with the given string.
00758 This is the main interface to the client code, once the complete
00759 expression has been built.
00760 """
00761 ParserElement.resetCache()
00762 if not self.streamlined:
00763 self.streamline()
00764
00765 for e in self.ignoreExprs:
00766 e.streamline()
00767 if self.keepTabs:
00768 loc, tokens = self._parse( instring, 0 )
00769 else:
00770 loc, tokens = self._parse( instring.expandtabs(), 0 )
00771 return tokens
00772
00773 def scanString( self, instring ):
00774 """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location."""
00775 if not self.streamlined:
00776 self.streamline()
00777 for e in self.ignoreExprs:
00778 e.streamline()
00779
00780 if not self.keepTabs:
00781 instring = instring.expandtabs()
00782 instrlen = len(instring)
00783 loc = 0
00784 preparseFn = self.preParse
00785 parseFn = self._parse
00786 ParserElement.resetCache()
00787 while loc <= instrlen:
00788 try:
00789 preloc = preparseFn( instring, loc )
00790 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
00791 except ParseException:
00792 loc += 1
00793 else:
00794 yield tokens, preloc, nextLoc
00795 loc = nextLoc
00796
00797 def transformString( self, instring ):
00798 """Extension to scanString, to modify matching text with modified tokens that may
00799 be returned from a parse action. To use transformString, define a grammar and
00800 attach a parse action to it that modifies the returned token list.
00801 Invoking transformString() on a target string will then scan for matches,
00802 and replace the matched text patterns according to the logic in the parse
00803 action. transformString() returns the resulting transformed string."""
00804 out = []
00805 lastE = 0
00806
00807
00808 self.keepTabs = True
00809 for t,s,e in self.scanString( instring ):
00810 out.append( instring[lastE:s] )
00811 if t:
00812 if isinstance(t,ParseResults):
00813 out += t.asList()
00814 elif isinstance(t,list):
00815 out += t
00816 else:
00817 out.append(t)
00818 lastE = e
00819 out.append(instring[lastE:])
00820 return "".join(out)
00821
00822 def searchString( self, instring ):
00823 """Another extension to scanString, simplifying the access to the tokens found
00824 to match the given parse expression.
00825 """
00826 return ParseResults([ t for t,s,e in self.scanString( instring ) ])
00827
00828 def __add__(self, other ):
00829 """Implementation of + operator - returns And"""
00830 if isinstance( other, basestring ):
00831 other = Literal( other )
00832 if not isinstance( other, ParserElement ):
00833 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00834 SyntaxWarning, stacklevel=2)
00835 return And( [ self, other ] )
00836
00837 def __radd__(self, other ):
00838 """Implementation of += operator"""
00839 if isinstance( other, basestring ):
00840 other = Literal( other )
00841 if not isinstance( other, ParserElement ):
00842 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00843 SyntaxWarning, stacklevel=2)
00844 return other + self
00845
00846 def __or__(self, other ):
00847 """Implementation of | operator - returns MatchFirst"""
00848 if isinstance( other, basestring ):
00849 other = Literal( other )
00850 if not isinstance( other, ParserElement ):
00851 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00852 SyntaxWarning, stacklevel=2)
00853 return MatchFirst( [ self, other ] )
00854
00855 def __ror__(self, other ):
00856 """Implementation of |= operator"""
00857 if isinstance( other, basestring ):
00858 other = Literal( other )
00859 if not isinstance( other, ParserElement ):
00860 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00861 SyntaxWarning, stacklevel=2)
00862 return other | self
00863
00864 def __xor__(self, other ):
00865 """Implementation of ^ operator - returns Or"""
00866 if isinstance( other, basestring ):
00867 other = Literal( other )
00868 if not isinstance( other, ParserElement ):
00869 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00870 SyntaxWarning, stacklevel=2)
00871 return Or( [ self, other ] )
00872
00873 def __rxor__(self, other ):
00874 """Implementation of ^= operator"""
00875 if isinstance( other, basestring ):
00876 other = Literal( other )
00877 if not isinstance( other, ParserElement ):
00878 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00879 SyntaxWarning, stacklevel=2)
00880 return other ^ self
00881
00882 def __and__(self, other ):
00883 """Implementation of & operator - returns Each"""
00884 if isinstance( other, basestring ):
00885 other = Literal( other )
00886 if not isinstance( other, ParserElement ):
00887 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00888 SyntaxWarning, stacklevel=2)
00889 return Each( [ self, other ] )
00890
00891 def __rand__(self, other ):
00892 """Implementation of right-& operator"""
00893 if isinstance( other, basestring ):
00894 other = Literal( other )
00895 if not isinstance( other, ParserElement ):
00896 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
00897 SyntaxWarning, stacklevel=2)
00898 return other & self
00899
00900 def __invert__( self ):
00901 """Implementation of ~ operator - returns NotAny"""
00902 return NotAny( self )
00903
00904 def suppress( self ):
00905 """Suppresses the output of this ParserElement; useful to keep punctuation from
00906 cluttering up returned output.
00907 """
00908 return Suppress( self )
00909
00910 def leaveWhitespace( self ):
00911 """Disables the skipping of whitespace before matching the characters in the
00912 ParserElement's defined pattern. This is normally only used internally by
00913 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
00914 """
00915 self.skipWhitespace = False
00916 return self
00917
00918 def setWhitespaceChars( self, chars ):
00919 """Overrides the default whitespace chars
00920 """
00921 self.skipWhitespace = True
00922 self.whiteChars = chars
00923 self.copyDefaultWhiteChars = False
00924 return self
00925
00926 def parseWithTabs( self ):
00927 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
00928 Must be called before parseString when the input grammar contains elements that
00929 match <TAB> characters."""
00930 self.keepTabs = True
00931 return self
00932
00933 def ignore( self, other ):
00934 """Define expression to be ignored (e.g., comments) while doing pattern
00935 matching; may be called repeatedly, to define multiple comment or other
00936 ignorable patterns.
00937 """
00938 if isinstance( other, Suppress ):
00939 if other not in self.ignoreExprs:
00940 self.ignoreExprs.append( other )
00941 else:
00942 self.ignoreExprs.append( Suppress( other ) )
00943 return self
00944
00945 def setDebugActions( self, startAction, successAction, exceptionAction ):
00946 """Enable display of debugging messages while doing pattern matching."""
00947 self.debugActions = (startAction or _defaultStartDebugAction,
00948 successAction or _defaultSuccessDebugAction,
00949 exceptionAction or _defaultExceptionDebugAction)
00950 self.debug = True
00951 return self
00952
00953 def setDebug( self, flag=True ):
00954 """Enable display of debugging messages while doing pattern matching."""
00955 if flag:
00956 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
00957 else:
00958 self.debug = False
00959 return self
00960
00961 def __str__( self ):
00962 return self.name
00963
00964 def __repr__( self ):
00965 return _ustr(self)
00966
00967 def streamline( self ):
00968 self.streamlined = True
00969 self.strRepr = None
00970 return self
00971
00972 def checkRecursion( self, parseElementList ):
00973 pass
00974
00975 def validate( self, validateTrace=[] ):
00976 """Check defined expressions for valid structure, check for infinite recursive definitions."""
00977 self.checkRecursion( [] )
00978
00979 def parseFile( self, file_or_filename ):
00980 """Execute the parse expression on the given file or filename.
00981 If a filename is specified (instead of a file object),
00982 the entire file is opened, read, and closed before parsing.
00983 """
00984 try:
00985 file_contents = file_or_filename.read()
00986 except AttributeError:
00987 f = open(file_or_filename, "rb")
00988 file_contents = f.read()
00989 f.close()
00990 return self.parseString(file_contents)
00991
00992
00993 class Token(ParserElement):
00994 """Abstract ParserElement subclass, for defining atomic matching patterns."""
00995 def __init__( self ):
00996 super(Token,self).__init__( savelist=False )
00997 self.myException = ParseException("",0,"",self)
00998
00999 def setName(self, name):
01000 s = super(Token,self).setName(name)
01001 self.errmsg = "Expected " + self.name
01002 s.myException.msg = self.errmsg
01003 return s
01004
01005
01006 class Empty(Token):
01007 """An empty token, will always match."""
01008 def __init__( self ):
01009 super(Empty,self).__init__()
01010 self.name = "Empty"
01011 self.mayReturnEmpty = True
01012 self.mayIndexError = False
01013
01014
01015 class NoMatch(Token):
01016 """A token that will never match."""
01017 def __init__( self ):
01018 super(NoMatch,self).__init__()
01019 self.name = "NoMatch"
01020 self.mayReturnEmpty = True
01021 self.mayIndexError = False
01022 self.errmsg = "Unmatchable token"
01023 self.myException.msg = self.errmsg
01024
01025 def parseImpl( self, instring, loc, doActions=True ):
01026 exc = self.myException
01027 exc.loc = loc
01028 exc.pstr = instring
01029 raise exc
01030
01031
01032 class Literal(Token):
01033 """Token to exactly match a specified string."""
01034 def __init__( self, matchString ):
01035 super(Literal,self).__init__()
01036 self.match = matchString
01037 self.matchLen = len(matchString)
01038 try:
01039 self.firstMatchChar = matchString[0]
01040 except IndexError:
01041 warnings.warn("null string passed to Literal; use Empty() instead",
01042 SyntaxWarning, stacklevel=2)
01043 self.__class__ = Empty
01044 self.name = '"%s"' % self.match
01045 self.errmsg = "Expected " + self.name
01046 self.mayReturnEmpty = False
01047 self.myException.msg = self.errmsg
01048 self.mayIndexError = False
01049
01050
01051
01052
01053
01054 def parseImpl( self, instring, loc, doActions=True ):
01055 if (instring[loc] == self.firstMatchChar and
01056 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
01057 return loc+self.matchLen, self.match
01058
01059 exc = self.myException
01060 exc.loc = loc
01061 exc.pstr = instring
01062 raise exc
01063
01064 class Keyword(Token):
01065 """Token to exactly match a specified string as a keyword, that is, it must be
01066 immediately followed by a non-keyword character. Compare with Literal::
01067 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
01068 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
01069 Accepts two optional constructor arguments in addition to the keyword string:
01070 identChars is a string of characters that would be valid identifier characters,
01071 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
01072 matching, default is False.
01073 """
01074 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
01075
01076 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
01077 super(Keyword,self).__init__()
01078 self.match = matchString
01079 self.matchLen = len(matchString)
01080 try:
01081 self.firstMatchChar = matchString[0]
01082 except IndexError:
01083 warnings.warn("null string passed to Keyword; use Empty() instead",
01084 SyntaxWarning, stacklevel=2)
01085 self.name = '"%s"' % self.match
01086 self.errmsg = "Expected " + self.name
01087 self.mayReturnEmpty = False
01088 self.myException.msg = self.errmsg
01089 self.mayIndexError = False
01090 self.caseless = caseless
01091 if caseless:
01092 self.caselessmatch = matchString.upper()
01093 identChars = identChars.upper()
01094 self.identChars = _str2dict(identChars)
01095
01096 def parseImpl( self, instring, loc, doActions=True ):
01097 if self.caseless:
01098 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01099 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
01100 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
01101 return loc+self.matchLen, self.match
01102 else:
01103 if (instring[loc] == self.firstMatchChar and
01104 (self.matchLen==1 or instring.startswith(self.match,loc)) and
01105 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
01106 (loc == 0 or instring[loc-1] not in self.identChars) ):
01107 return loc+self.matchLen, self.match
01108
01109 exc = self.myException
01110 exc.loc = loc
01111 exc.pstr = instring
01112 raise exc
01113
01114 def copy(self):
01115 c = super(Keyword,self).copy()
01116 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
01117 return c
01118
01119 def setDefaultKeywordChars( chars ):
01120 """Overrides the default Keyword chars
01121 """
01122 Keyword.DEFAULT_KEYWORD_CHARS = chars
01123 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
01124
01125
01126 class CaselessLiteral(Literal):
01127 """Token to match a specified string, ignoring case of letters.
01128 Note: the matched results will always be in the case of the given
01129 match string, NOT the case of the input text.
01130 """
01131 def __init__( self, matchString ):
01132 super(CaselessLiteral,self).__init__( matchString.upper() )
01133
01134 self.returnString = matchString
01135 self.name = "'%s'" % self.returnString
01136 self.errmsg = "Expected " + self.name
01137 self.myException.msg = self.errmsg
01138
01139 def parseImpl( self, instring, loc, doActions=True ):
01140 if instring[ loc:loc+self.matchLen ].upper() == self.match:
01141 return loc+self.matchLen, self.returnString
01142
01143 exc = self.myException
01144 exc.loc = loc
01145 exc.pstr = instring
01146 raise exc
01147
01148 class CaselessKeyword(Keyword):
01149 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
01150 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
01151
01152 def parseImpl( self, instring, loc, doActions=True ):
01153 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
01154 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
01155 return loc+self.matchLen, self.match
01156
01157 exc = self.myException
01158 exc.loc = loc
01159 exc.pstr = instring
01160 raise exc
01161
01162 class Word(Token):
01163 """Token for matching words composed of allowed character sets.
01164 Defined with string containing all allowed initial characters,
01165 an optional string containing allowed body characters (if omitted,
01166 defaults to the initial character set), and an optional minimum,
01167 maximum, and/or exact length.
01168 """
01169 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
01170 super(Word,self).__init__()
01171 self.initCharsOrig = initChars
01172 self.initChars = _str2dict(initChars)
01173 if bodyChars :
01174 self.bodyCharsOrig = bodyChars
01175 self.bodyChars = _str2dict(bodyChars)
01176 else:
01177 self.bodyCharsOrig = initChars
01178 self.bodyChars = _str2dict(initChars)
01179
01180 self.maxSpecified = max > 0
01181
01182 self.minLen = min
01183
01184 if max > 0:
01185 self.maxLen = max
01186 else:
01187 self.maxLen = sys.maxint
01188
01189 if exact > 0:
01190 self.maxLen = exact
01191 self.minLen = exact
01192
01193 self.name = _ustr(self)
01194 self.errmsg = "Expected " + self.name
01195 self.myException.msg = self.errmsg
01196 self.mayIndexError = False
01197
01198 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
01199 if self.bodyCharsOrig == self.initCharsOrig:
01200 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
01201 elif len(self.bodyCharsOrig) == 1:
01202 self.reString = "%s[%s]*" % \
01203 (re.escape(self.initCharsOrig),
01204 _escapeRegexRangeChars(self.bodyCharsOrig),)
01205 else:
01206 self.reString = "[%s][%s]*" % \
01207 (_escapeRegexRangeChars(self.initCharsOrig),
01208 _escapeRegexRangeChars(self.bodyCharsOrig),)
01209 try:
01210 self.re = re.compile( self.reString )
01211 except:
01212 self.re = None
01213
01214 def parseImpl( self, instring, loc, doActions=True ):
01215 if self.re:
01216 result = self.re.match(instring,loc)
01217 if not result:
01218 exc = self.myException
01219 exc.loc = loc
01220 exc.pstr = instring
01221 raise exc
01222
01223 loc = result.end()
01224 return loc,result.group()
01225
01226 if not(instring[ loc ] in self.initChars):
01227
01228 exc = self.myException
01229 exc.loc = loc
01230 exc.pstr = instring
01231 raise exc
01232 start = loc
01233 loc += 1
01234 instrlen = len(instring)
01235 bodychars = self.bodyChars
01236 maxloc = start + self.maxLen
01237 maxloc = min( maxloc, instrlen )
01238 while loc < maxloc and instring[loc] in bodychars:
01239 loc += 1
01240
01241 throwException = False
01242 if loc - start < self.minLen:
01243 throwException = True
01244 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
01245 throwException = True
01246
01247 if throwException:
01248
01249 exc = self.myException
01250 exc.loc = loc
01251 exc.pstr = instring
01252 raise exc
01253
01254 return loc, instring[start:loc]
01255
01256 def __str__( self ):
01257 try:
01258 return super(Word,self).__str__()
01259 except:
01260 pass
01261
01262
01263 if self.strRepr is None:
01264
01265 def charsAsStr(s):
01266 if len(s)>4:
01267 return s[:4]+"..."
01268 else:
01269 return s
01270
01271 if ( self.initCharsOrig != self.bodyCharsOrig ):
01272 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
01273 else:
01274 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
01275
01276 return self.strRepr
01277
01278
01279 class Regex(Token):
01280 """Token for matching strings that match a given regular expression.
01281 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
01282 """
01283 def __init__( self, pattern, flags=0):
01284 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
01285 super(Regex,self).__init__()
01286
01287 if len(pattern) == 0:
01288 warnings.warn("null string passed to Regex; use Empty() instead",
01289 SyntaxWarning, stacklevel=2)
01290
01291 self.pattern = pattern
01292 self.flags = flags
01293
01294 try:
01295 self.re = re.compile(self.pattern, self.flags)
01296 self.reString = self.pattern
01297 except sre_constants.error,e:
01298 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
01299 SyntaxWarning, stacklevel=2)
01300 raise
01301
01302 self.name = _ustr(self)
01303 self.errmsg = "Expected " + self.name
01304 self.myException.msg = self.errmsg
01305 self.mayIndexError = False
01306 self.mayReturnEmpty = True
01307
01308 def parseImpl( self, instring, loc, doActions=True ):
01309 result = self.re.match(instring,loc)
01310 if not result:
01311 exc = self.myException
01312 exc.loc = loc
01313 exc.pstr = instring
01314 raise exc
01315
01316 loc = result.end()
01317 d = result.groupdict()
01318 ret = ParseResults(result.group())
01319 if d:
01320 for k in d.keys():
01321 ret[k] = d[k]
01322 return loc,ret
01323
01324 def __str__( self ):
01325 try:
01326 return super(Regex,self).__str__()
01327 except:
01328 pass
01329
01330 if self.strRepr is None:
01331 self.strRepr = "Re:(%s)" % repr(self.pattern)
01332
01333 return self.strRepr
01334
01335
01336 class QuotedString(Token):
01337 """Token for matching strings that are delimited by quoting characters.
01338 """
01339 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
01340 """
01341 Defined with the following parameters:
01342 - quoteChar - string of one or more characters defining the quote delimiting string
01343 - escChar - character to escape quotes, typically backslash (default=None)
01344 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
01345 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
01346 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
01347 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
01348 """
01349 super(QuotedString,self).__init__()
01350
01351
01352 quoteChar = quoteChar.strip()
01353 if len(quoteChar) == 0:
01354 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01355 raise SyntaxError()
01356
01357 if endQuoteChar is None:
01358 endQuoteChar = quoteChar
01359 else:
01360 endQuoteChar = endQuoteChar.strip()
01361 if len(endQuoteChar) == 0:
01362 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
01363 raise SyntaxError()
01364
01365 self.quoteChar = quoteChar
01366 self.quoteCharLen = len(quoteChar)
01367 self.firstQuoteChar = quoteChar[0]
01368 self.endQuoteChar = endQuoteChar
01369 self.endQuoteCharLen = len(endQuoteChar)
01370 self.escChar = escChar
01371 self.escQuote = escQuote
01372 self.unquoteResults = unquoteResults
01373
01374 if multiline:
01375 self.flags = re.MULTILINE | re.DOTALL
01376 self.pattern = r'%s([^%s%s]' % \
01377 ( re.escape(self.quoteChar),
01378 _escapeRegexRangeChars(self.endQuoteChar[0]),
01379 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01380 else:
01381 self.flags = 0
01382 self.pattern = r'%s([^%s\n\r%s]' % \
01383 ( re.escape(self.quoteChar),
01384 _escapeRegexRangeChars(self.endQuoteChar[0]),
01385 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
01386 if len(self.endQuoteChar) > 1:
01387 self.pattern += (
01388 '|(' + ')|('.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
01389 _escapeRegexRangeChars(self.endQuoteChar[i]))
01390 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
01391 )
01392 if escQuote:
01393 self.pattern += (r'|(%s)' % re.escape(escQuote))
01394 if escChar:
01395 self.pattern += (r'|(%s.)' % re.escape(escChar))
01396 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
01397 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
01398
01399 try:
01400 self.re = re.compile(self.pattern, self.flags)
01401 self.reString = self.pattern
01402 except sre_constants.error,e:
01403 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
01404 SyntaxWarning, stacklevel=2)
01405 raise
01406
01407 self.name = _ustr(self)
01408 self.errmsg = "Expected " + self.name
01409 self.myException.msg = self.errmsg
01410 self.mayIndexError = False
01411 self.mayReturnEmpty = True
01412
01413 def parseImpl( self, instring, loc, doActions=True ):
01414 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
01415 if not result:
01416 exc = self.myException
01417 exc.loc = loc
01418 exc.pstr = instring
01419 raise exc
01420
01421 loc = result.end()
01422 ret = result.group()
01423
01424 if self.unquoteResults:
01425
01426
01427 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
01428
01429 if isinstance(ret,basestring):
01430
01431 if self.escChar:
01432 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
01433
01434
01435 if self.escQuote:
01436 ret = ret.replace(self.escQuote, self.endQuoteChar)
01437
01438 return loc, ret
01439
01440 def __str__( self ):
01441 try:
01442 return super(QuotedString,self).__str__()
01443 except:
01444 pass
01445
01446 if self.strRepr is None:
01447 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
01448
01449 return self.strRepr
01450
01451
01452 class CharsNotIn(Token):
01453 """Token for matching words composed of characters *not* in a given set.
01454 Defined with string containing all disallowed characters, and an optional
01455 minimum, maximum, and/or exact length.
01456 """
01457 def __init__( self, notChars, min=1, max=0, exact=0 ):
01458 super(CharsNotIn,self).__init__()
01459 self.skipWhitespace = False
01460 self.notChars = notChars
01461
01462 self.minLen = min
01463
01464 if max > 0:
01465 self.maxLen = max
01466 else:
01467 self.maxLen = sys.maxint
01468
01469 if exact > 0:
01470 self.maxLen = exact
01471 self.minLen = exact
01472
01473 self.name = _ustr(self)
01474 self.errmsg = "Expected " + self.name
01475 self.mayReturnEmpty = ( self.minLen == 0 )
01476 self.myException.msg = self.errmsg
01477 self.mayIndexError = False
01478
01479 def parseImpl( self, instring, loc, doActions=True ):
01480 if instring[loc] in self.notChars:
01481
01482 exc = self.myException
01483 exc.loc = loc
01484 exc.pstr = instring
01485 raise exc
01486
01487 start = loc
01488 loc += 1
01489 notchars = self.notChars
01490 maxlen = min( start+self.maxLen, len(instring) )
01491 while loc < maxlen and \
01492 (instring[loc] not in notchars):
01493 loc += 1
01494
01495 if loc - start < self.minLen:
01496
01497 exc = self.myException
01498 exc.loc = loc
01499 exc.pstr = instring
01500 raise exc
01501
01502 return loc, instring[start:loc]
01503
01504 def __str__( self ):
01505 try:
01506 return super(CharsNotIn, self).__str__()
01507 except:
01508 pass
01509
01510 if self.strRepr is None:
01511 if len(self.notChars) > 4:
01512 self.strRepr = "!W:(%s...)" % self.notChars[:4]
01513 else:
01514 self.strRepr = "!W:(%s)" % self.notChars
01515
01516 return self.strRepr
01517
01518 class White(Token):
01519 """Special matching class for matching whitespace. Normally, whitespace is ignored
01520 by pyparsing grammars. This class is included when some whitespace structures
01521 are significant. Define with a string containing the whitespace characters to be
01522 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
01523 as defined for the Word class."""
01524 whiteStrs = {
01525 " " : "<SPC>",
01526 "\t": "<TAB>",
01527 "\n": "<LF>",
01528 "\r": "<CR>",
01529 "\f": "<FF>",
01530 }
01531 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
01532 super(White,self).__init__()
01533 self.matchWhite = ws
01534 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
01535
01536 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
01537 self.mayReturnEmpty = True
01538 self.errmsg = "Expected " + self.name
01539 self.myException.msg = self.errmsg
01540
01541 self.minLen = min
01542
01543 if max > 0:
01544 self.maxLen = max
01545 else:
01546 self.maxLen = sys.maxint
01547
01548 if exact > 0:
01549 self.maxLen = exact
01550 self.minLen = exact
01551
01552 def parseImpl( self, instring, loc, doActions=True ):
01553 if not(instring[ loc ] in self.matchWhite):
01554
01555 exc = self.myException
01556 exc.loc = loc
01557 exc.pstr = instring
01558 raise exc
01559 start = loc
01560 loc += 1
01561 maxloc = start + self.maxLen
01562 maxloc = min( maxloc, len(instring) )
01563 while loc < maxloc and instring[loc] in self.matchWhite:
01564 loc += 1
01565
01566 if loc - start < self.minLen:
01567
01568 exc = self.myException
01569 exc.loc = loc
01570 exc.pstr = instring
01571 raise exc
01572
01573 return loc, instring[start:loc]
01574
01575
01576 class PositionToken(Token):
01577 def __init__( self ):
01578 super(PositionToken,self).__init__()
01579 self.name=self.__class__.__name__
01580 self.mayReturnEmpty = True
01581 self.mayIndexError = False
01582
01583 class GoToColumn(PositionToken):
01584 """Token to advance to a specific column of input text; useful for tabular report scraping."""
01585 def __init__( self, colno ):
01586 super(GoToColumn,self).__init__()
01587 self.col = colno
01588
01589 def preParse( self, instring, loc ):
01590 if col(loc,instring) != self.col:
01591 instrlen = len(instring)
01592 if self.ignoreExprs:
01593 loc = self.skipIgnorables( instring, loc )
01594 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
01595 loc += 1
01596 return loc
01597
01598 def parseImpl( self, instring, loc, doActions=True ):
01599 thiscol = col( loc, instring )
01600 if thiscol > self.col:
01601 raise ParseException( instring, loc, "Text not in expected column", self )
01602 newloc = loc + self.col - thiscol
01603 ret = instring[ loc: newloc ]
01604 return newloc, ret
01605
01606 class LineStart(PositionToken):
01607 """Matches if current position is at the beginning of a line within the parse string"""
01608 def __init__( self ):
01609 super(LineStart,self).__init__()
01610 self.setWhitespaceChars( " \t" )
01611 self.errmsg = "Expected start of line"
01612 self.myException.msg = self.errmsg
01613
01614 def preParse( self, instring, loc ):
01615 preloc = super(LineStart,self).preParse(instring,loc)
01616 if instring[preloc] == "\n":
01617 loc += 1
01618 return loc
01619
01620 def parseImpl( self, instring, loc, doActions=True ):
01621 if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ):
01622
01623 exc = self.myException
01624 exc.loc = loc
01625 exc.pstr = instring
01626 raise exc
01627 return loc, []
01628
01629 class LineEnd(PositionToken):
01630 """Matches if current position is at the end of a line within the parse string"""
01631 def __init__( self ):
01632 super(LineEnd,self).__init__()
01633 self.setWhitespaceChars( " \t" )
01634 self.errmsg = "Expected end of line"
01635 self.myException.msg = self.errmsg
01636
01637 def parseImpl( self, instring, loc, doActions=True ):
01638 if loc<len(instring):
01639 if instring[loc] == "\n":
01640 return loc+1, "\n"
01641 else:
01642
01643 exc = self.myException
01644 exc.loc = loc
01645 exc.pstr = instring
01646 raise exc
01647 elif loc == len(instring):
01648 return loc+1, []
01649 else:
01650 exc = self.myException
01651 exc.loc = loc
01652 exc.pstr = instring
01653 raise exc
01654
01655 class StringStart(PositionToken):
01656 """Matches if current position is at the beginning of the parse string"""
01657 def __init__( self ):
01658 super(StringStart,self).__init__()
01659 self.errmsg = "Expected start of text"
01660 self.myException.msg = self.errmsg
01661
01662 def parseImpl( self, instring, loc, doActions=True ):
01663 if loc != 0:
01664
01665 if loc != self.preParse( instring, 0 ):
01666
01667 exc = self.myException
01668 exc.loc = loc
01669 exc.pstr = instring
01670 raise exc
01671 return loc, []
01672
01673 class StringEnd(PositionToken):
01674 """Matches if current position is at the end of the parse string"""
01675 def __init__( self ):
01676 super(StringEnd,self).__init__()
01677 self.errmsg = "Expected end of text"
01678 self.myException.msg = self.errmsg
01679
01680 def parseImpl( self, instring, loc, doActions=True ):
01681 if loc < len(instring):
01682
01683 exc = self.myException
01684 exc.loc = loc
01685 exc.pstr = instring
01686 raise exc
01687 elif loc == len(instring):
01688 return loc+1, []
01689 else:
01690 exc = self.myException
01691 exc.loc = loc
01692 exc.pstr = instring
01693 raise exc
01694
01695
01696 class ParseExpression(ParserElement):
01697 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
01698 def __init__( self, exprs, savelist = False ):
01699 super(ParseExpression,self).__init__(savelist)
01700 if isinstance( exprs, list ):
01701 self.exprs = exprs
01702 elif isinstance( exprs, basestring ):
01703 self.exprs = [ Literal( exprs ) ]
01704 else:
01705 self.exprs = [ exprs ]
01706
01707 def __getitem__( self, i ):
01708 return self.exprs[i]
01709
01710 def append( self, other ):
01711 self.exprs.append( other )
01712 self.strRepr = None
01713 return self
01714
01715 def leaveWhitespace( self ):
01716 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
01717 all contained expressions."""
01718 self.skipWhitespace = False
01719 self.exprs = [ copy.copy(e) for e in self.exprs ]
01720 for e in self.exprs:
01721 e.leaveWhitespace()
01722 return self
01723
01724 def ignore( self, other ):
01725 if isinstance( other, Suppress ):
01726 if other not in self.ignoreExprs:
01727 super( ParseExpression, self).ignore( other )
01728 for e in self.exprs:
01729 e.ignore( self.ignoreExprs[-1] )
01730 else:
01731 super( ParseExpression, self).ignore( other )
01732 for e in self.exprs:
01733 e.ignore( self.ignoreExprs[-1] )
01734 return self
01735
01736 def __str__( self ):
01737 try:
01738 return super(ParseExpression,self).__str__()
01739 except:
01740 pass
01741
01742 if self.strRepr is None:
01743 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
01744 return self.strRepr
01745
01746 def streamline( self ):
01747 super(ParseExpression,self).streamline()
01748
01749 for e in self.exprs:
01750 e.streamline()
01751
01752
01753
01754
01755 if ( len(self.exprs) == 2 ):
01756 other = self.exprs[0]
01757 if ( isinstance( other, self.__class__ ) and
01758 not(other.parseAction) and
01759 other.resultsName is None and
01760 not other.debug ):
01761 self.exprs = other.exprs[:] + [ self.exprs[1] ]
01762 self.strRepr = None
01763 self.mayReturnEmpty |= other.mayReturnEmpty
01764 self.mayIndexError |= other.mayIndexError
01765
01766 other = self.exprs[-1]
01767 if ( isinstance( other, self.__class__ ) and
01768 not(other.parseAction) and
01769 other.resultsName is None and
01770 not other.debug ):
01771 self.exprs = self.exprs[:-1] + other.exprs[:]
01772 self.strRepr = None
01773 self.mayReturnEmpty |= other.mayReturnEmpty
01774 self.mayIndexError |= other.mayIndexError
01775
01776 return self
01777
01778 def setResultsName( self, name, listAllMatches=False ):
01779 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
01780
01781 return ret
01782
01783 def validate( self, validateTrace=[] ):
01784 tmp = validateTrace[:]+[self]
01785 for e in self.exprs:
01786 e.validate(tmp)
01787 self.checkRecursion( [] )
01788
01789
01790
01791
01792
01793
01794 class And(ParseExpression):
01795 """Requires all given ParseExpressions to be found in the given order.
01796 Expressions may be separated by whitespace.
01797 May be constructed using the '+' operator.
01798 """
01799 def __init__( self, exprs, savelist = True ):
01800 super(And,self).__init__(exprs, savelist)
01801 self.mayReturnEmpty = True
01802 for e in self.exprs:
01803 if not e.mayReturnEmpty:
01804 self.mayReturnEmpty = False
01805 break
01806 self.skipWhitespace = exprs[0].skipWhitespace
01807 self.setWhitespaceChars( exprs[0].whiteChars )
01808
01809 def parseImpl( self, instring, loc, doActions=True ):
01810 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions )
01811 for e in self.exprs[1:]:
01812 loc, exprtokens = e._parse( instring, loc, doActions )
01813 if exprtokens or exprtokens.keys():
01814 resultlist += exprtokens
01815 return loc, resultlist
01816
01817 def __iadd__(self, other ):
01818 if isinstance( other, basestring ):
01819 other = Literal( other )
01820 return self.append( other )
01821
01822 def checkRecursion( self, parseElementList ):
01823 subRecCheckList = parseElementList[:] + [ self ]
01824 for e in self.exprs:
01825 e.checkRecursion( subRecCheckList )
01826 if not e.mayReturnEmpty:
01827 break
01828
01829 def __str__( self ):
01830 if hasattr(self,"name"):
01831 return self.name
01832
01833 if self.strRepr is None:
01834 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01835
01836 return self.strRepr
01837
01838
01839 class Or(ParseExpression):
01840 """Requires that at least one ParseExpression is found.
01841 If two expressions match, the expression that matches the longest string will be used.
01842 May be constructed using the '^' operator.
01843 """
01844 def __init__( self, exprs, savelist = False ):
01845 super(Or,self).__init__(exprs, savelist)
01846 self.mayReturnEmpty = False
01847 for e in self.exprs:
01848 if e.mayReturnEmpty:
01849 self.mayReturnEmpty = True
01850 break
01851
01852 def parseImpl( self, instring, loc, doActions=True ):
01853 maxExcLoc = -1
01854 maxMatchLoc = -1
01855 for e in self.exprs:
01856 try:
01857 loc2 = e.tryParse( instring, loc )
01858 except ParseException, err:
01859 if err.loc > maxExcLoc:
01860 maxException = err
01861 maxExcLoc = err.loc
01862 except IndexError, err:
01863 if len(instring) > maxExcLoc:
01864 maxException = ParseException(instring,len(instring),e.errmsg,self)
01865 maxExcLoc = len(instring)
01866 else:
01867 if loc2 > maxMatchLoc:
01868 maxMatchLoc = loc2
01869 maxMatchExp = e
01870
01871 if maxMatchLoc < 0:
01872 if self.exprs:
01873 raise maxException
01874 else:
01875 raise ParseException(instring, loc, "no defined alternatives to match", self)
01876
01877 return maxMatchExp._parse( instring, loc, doActions )
01878
01879 def __ixor__(self, other ):
01880 if isinstance( other, basestring ):
01881 other = Literal( other )
01882 return self.append( other )
01883
01884 def __str__( self ):
01885 if hasattr(self,"name"):
01886 return self.name
01887
01888 if self.strRepr is None:
01889 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01890
01891 return self.strRepr
01892
01893 def checkRecursion( self, parseElementList ):
01894 subRecCheckList = parseElementList[:] + [ self ]
01895 for e in self.exprs:
01896 e.checkRecursion( subRecCheckList )
01897
01898
01899 class MatchFirst(ParseExpression):
01900 """Requires that at least one ParseExpression is found.
01901 If two expressions match, the first one listed is the one that will match.
01902 May be constructed using the '|' operator.
01903 """
01904 def __init__( self, exprs, savelist = False ):
01905 super(MatchFirst,self).__init__(exprs, savelist)
01906 if exprs:
01907 self.mayReturnEmpty = False
01908 for e in self.exprs:
01909 if e.mayReturnEmpty:
01910 self.mayReturnEmpty = True
01911 break
01912 else:
01913 self.mayReturnEmpty = True
01914
01915 def parseImpl( self, instring, loc, doActions=True ):
01916 maxExcLoc = -1
01917 for e in self.exprs:
01918 try:
01919 ret = e._parse( instring, loc, doActions )
01920 return ret
01921 except ParseException, err:
01922 if err.loc > maxExcLoc:
01923 maxException = err
01924 maxExcLoc = err.loc
01925 except IndexError, err:
01926 if len(instring) > maxExcLoc:
01927 maxException = ParseException(instring,len(instring),e.errmsg,self)
01928 maxExcLoc = len(instring)
01929
01930
01931 else:
01932 if self.exprs:
01933 raise maxException
01934 else:
01935 raise ParseException(instring, loc, "no defined alternatives to match", self)
01936
01937 def __ior__(self, other ):
01938 if isinstance( other, basestring ):
01939 other = Literal( other )
01940 return self.append( other )
01941
01942 def __str__( self ):
01943 if hasattr(self,"name"):
01944 return self.name
01945
01946 if self.strRepr is None:
01947 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
01948
01949 return self.strRepr
01950
01951 def checkRecursion( self, parseElementList ):
01952 subRecCheckList = parseElementList[:] + [ self ]
01953 for e in self.exprs:
01954 e.checkRecursion( subRecCheckList )
01955
01956 class Each(ParseExpression):
01957 """Requires all given ParseExpressions to be found, but in any order.
01958 Expressions may be separated by whitespace.
01959 May be constructed using the '&' operator.
01960 """
01961 def __init__( self, exprs, savelist = True ):
01962 super(Each,self).__init__(exprs, savelist)
01963 self.mayReturnEmpty = True
01964 for e in self.exprs:
01965 if not e.mayReturnEmpty:
01966 self.mayReturnEmpty = False
01967 break
01968 self.skipWhitespace = True
01969 self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
01970 self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
01971 self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
01972 self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
01973 self.required += self.multirequired
01974
01975 def parseImpl( self, instring, loc, doActions=True ):
01976 tmpLoc = loc
01977 tmpReqd = self.required[:]
01978 tmpOpt = self.optionals[:]
01979 matchOrder = []
01980
01981 keepMatching = True
01982 while keepMatching:
01983 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
01984 failed = []
01985 for e in tmpExprs:
01986 try:
01987 tmpLoc = e.tryParse( instring, tmpLoc )
01988 except ParseException:
01989 failed.append(e)
01990 else:
01991 matchOrder.append(e)
01992 if e in tmpReqd:
01993 tmpReqd.remove(e)
01994 elif e in tmpOpt:
01995 tmpOpt.remove(e)
01996 if len(failed) == len(tmpExprs):
01997 keepMatching = False
01998
01999 if tmpReqd:
02000 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
02001 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
02002
02003 resultlist = []
02004 for e in matchOrder:
02005 loc,results = e._parse(instring,loc,doActions)
02006 resultlist.append(results)
02007
02008 finalResults = ParseResults([])
02009 for r in resultlist:
02010 dups = {}
02011 for k in r.keys():
02012 if k in finalResults.keys():
02013 tmp = ParseResults(finalResults[k])
02014 tmp += ParseResults(r[k])
02015 dups[k] = tmp
02016 finalResults += ParseResults(r)
02017 for k,v in dups.items():
02018 finalResults[k] = v
02019 return loc, finalResults
02020
02021 def __str__( self ):
02022 if hasattr(self,"name"):
02023 return self.name
02024
02025 if self.strRepr is None:
02026 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
02027
02028 return self.strRepr
02029
02030 def checkRecursion( self, parseElementList ):
02031 subRecCheckList = parseElementList[:] + [ self ]
02032 for e in self.exprs:
02033 e.checkRecursion( subRecCheckList )
02034
02035
02036 class ParseElementEnhance(ParserElement):
02037 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
02038 def __init__( self, expr, savelist=False ):
02039 super(ParseElementEnhance,self).__init__(savelist)
02040 if isinstance( expr, basestring ):
02041 expr = Literal(expr)
02042 self.expr = expr
02043 self.strRepr = None
02044 if expr is not None:
02045 self.mayIndexError = expr.mayIndexError
02046 self.skipWhitespace = expr.skipWhitespace
02047 self.setWhitespaceChars( expr.whiteChars )
02048 self.saveAsList = expr.saveAsList
02049
02050 def parseImpl( self, instring, loc, doActions=True ):
02051 if self.expr is not None:
02052 return self.expr._parse( instring, loc, doActions )
02053 else:
02054 raise ParseException("",loc,self.errmsg,self)
02055
02056 def leaveWhitespace( self ):
02057 self.skipWhitespace = False
02058 self.expr = copy.copy(self.expr)
02059 if self.expr is not None:
02060 self.expr.leaveWhitespace()
02061 return self
02062
02063 def ignore( self, other ):
02064 if isinstance( other, Suppress ):
02065 if other not in self.ignoreExprs:
02066 super( ParseElementEnhance, self).ignore( other )
02067 if self.expr is not None:
02068 self.expr.ignore( self.ignoreExprs[-1] )
02069 else:
02070 super( ParseElementEnhance, self).ignore( other )
02071 if self.expr is not None:
02072 self.expr.ignore( self.ignoreExprs[-1] )
02073 return self
02074
02075 def streamline( self ):
02076 super(ParseElementEnhance,self).streamline()
02077 if self.expr is not None:
02078 self.expr.streamline()
02079 return self
02080
02081 def checkRecursion( self, parseElementList ):
02082 if self in parseElementList:
02083 raise RecursiveGrammarException( parseElementList+[self] )
02084 subRecCheckList = parseElementList[:] + [ self ]
02085 if self.expr is not None:
02086 self.expr.checkRecursion( subRecCheckList )
02087
02088 def validate( self, validateTrace=[] ):
02089 tmp = validateTrace[:]+[self]
02090 if self.expr is not None:
02091 self.expr.validate(tmp)
02092 self.checkRecursion( [] )
02093
02094 def __str__( self ):
02095 try:
02096 return super(ParseElementEnhance,self).__str__()
02097 except:
02098 pass
02099
02100 if self.strRepr is None and self.expr is not None:
02101 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
02102 return self.strRepr
02103
02104
02105 class FollowedBy(ParseElementEnhance):
02106 """Lookahead matching of the given parse expression. FollowedBy
02107 does *not* advance the parsing position within the input string, it only
02108 verifies that the specified parse expression matches at the current
02109 position. FollowedBy always returns a null token list."""
02110 def __init__( self, expr ):
02111 super(FollowedBy,self).__init__(expr)
02112 self.mayReturnEmpty = True
02113
02114 def parseImpl( self, instring, loc, doActions=True ):
02115 self.expr.tryParse( instring, loc )
02116 return loc, []
02117
02118
02119 class NotAny(ParseElementEnhance):
02120 """Lookahead to disallow matching with the given parse expression. NotAny
02121 does *not* advance the parsing position within the input string, it only
02122 verifies that the specified parse expression does *not* match at the current
02123 position. Also, NotAny does *not* skip over leading whitespace. NotAny
02124 always returns a null token list. May be constructed using the '~' operator."""
02125 def __init__( self, expr ):
02126 super(NotAny,self).__init__(expr)
02127
02128 self.skipWhitespace = False
02129 self.mayReturnEmpty = True
02130 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
02131 self.myException = ParseException("",0,self.errmsg,self)
02132
02133 def parseImpl( self, instring, loc, doActions=True ):
02134 try:
02135 self.expr.tryParse( instring, loc )
02136 except (ParseException,IndexError):
02137 pass
02138 else:
02139
02140 exc = self.myException
02141 exc.loc = loc
02142 exc.pstr = instring
02143 raise exc
02144 return loc, []
02145
02146 def __str__( self ):
02147 if hasattr(self,"name"):
02148 return self.name
02149
02150 if self.strRepr is None:
02151 self.strRepr = "~{" + _ustr(self.expr) + "}"
02152
02153 return self.strRepr
02154
02155
02156 class ZeroOrMore(ParseElementEnhance):
02157 """Optional repetition of zero or more of the given expression."""
02158 def __init__( self, expr ):
02159 super(ZeroOrMore,self).__init__(expr)
02160 self.mayReturnEmpty = True
02161
02162 def parseImpl( self, instring, loc, doActions=True ):
02163 tokens = []
02164 try:
02165 loc, tokens = self.expr._parse( instring, loc, doActions )
02166 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02167 while 1:
02168 if hasIgnoreExprs:
02169 preloc = self.skipIgnorables( instring, loc )
02170 else:
02171 preloc = loc
02172 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02173 if tmptokens or tmptokens.keys():
02174 tokens += tmptokens
02175 except (ParseException,IndexError):
02176 pass
02177
02178 return loc, tokens
02179
02180 def __str__( self ):
02181 if hasattr(self,"name"):
02182 return self.name
02183
02184 if self.strRepr is None:
02185 self.strRepr = "[" + _ustr(self.expr) + "]..."
02186
02187 return self.strRepr
02188
02189 def setResultsName( self, name, listAllMatches=False ):
02190 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
02191 ret.saveAsList = True
02192 return ret
02193
02194
02195 class OneOrMore(ParseElementEnhance):
02196 """Repetition of one or more of the given expression."""
02197 def parseImpl( self, instring, loc, doActions=True ):
02198
02199 loc, tokens = self.expr._parse( instring, loc, doActions )
02200 try:
02201 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
02202 while 1:
02203 if hasIgnoreExprs:
02204 preloc = self.skipIgnorables( instring, loc )
02205 else:
02206 preloc = loc
02207 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
02208 if tmptokens or tmptokens.keys():
02209 tokens += tmptokens
02210 except (ParseException,IndexError):
02211 pass
02212
02213 return loc, tokens
02214
02215 def __str__( self ):
02216 if hasattr(self,"name"):
02217 return self.name
02218
02219 if self.strRepr is None:
02220 self.strRepr = "{" + _ustr(self.expr) + "}..."
02221
02222 return self.strRepr
02223
02224 def setResultsName( self, name, listAllMatches=False ):
02225 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
02226 ret.saveAsList = True
02227 return ret
02228
02229 class _NullToken(object):
02230 def __bool__(self):
02231 return False
02232 def __str__(self):
02233 return ""
02234
02235 _optionalNotMatched = _NullToken()
02236 class Optional(ParseElementEnhance):
02237 """Optional matching of the given expression.
02238 A default return string can also be specified, if the optional expression
02239 is not found.
02240 """
02241 def __init__( self, exprs, default=_optionalNotMatched ):
02242 super(Optional,self).__init__( exprs, savelist=False )
02243 self.defaultValue = default
02244 self.mayReturnEmpty = True
02245
02246 def parseImpl( self, instring, loc, doActions=True ):
02247 try:
02248 loc, tokens = self.expr._parse( instring, loc, doActions )
02249 except (ParseException,IndexError):
02250 if self.defaultValue is not _optionalNotMatched:
02251 tokens = [ self.defaultValue ]
02252 else:
02253 tokens = []
02254
02255 return loc, tokens
02256
02257 def __str__( self ):
02258 if hasattr(self,"name"):
02259 return self.name
02260
02261 if self.strRepr is None:
02262 self.strRepr = "[" + _ustr(self.expr) + "]"
02263
02264 return self.strRepr
02265
02266
02267 class SkipTo(ParseElementEnhance):
02268 """Token for skipping over all undefined text until the matched expression is found.
02269 If include is set to true, the matched expression is also consumed. The ignore
02270 argument is used to define grammars (typically quoted strings and comments) that
02271 might contain false matches.
02272 """
02273 def __init__( self, other, include=False, ignore=None ):
02274 super( SkipTo, self ).__init__( other )
02275 if ignore is not None:
02276 self.expr = copy.copy( self.expr )
02277 self.expr.ignore(ignore)
02278 self.mayReturnEmpty = True
02279 self.mayIndexError = False
02280 self.includeMatch = include
02281 self.errmsg = "No match found for "+_ustr(self.expr)
02282 self.myException = ParseException("",0,self.errmsg,self)
02283
02284 def parseImpl( self, instring, loc, doActions=True ):
02285 startLoc = loc
02286 instrlen = len(instring)
02287 expr = self.expr
02288 while loc <= instrlen:
02289 try:
02290 loc = expr.skipIgnorables( instring, loc )
02291 expr._parse( instring, loc, doActions=False, callPreParse=False )
02292 if self.includeMatch:
02293 skipText = instring[startLoc:loc]
02294 loc,mat = expr._parse(instring,loc)
02295 if mat:
02296 return loc, [ skipText, mat ]
02297 else:
02298 return loc, [ skipText ]
02299 else:
02300 return loc, [ instring[startLoc:loc] ]
02301 except (ParseException,IndexError):
02302 loc += 1
02303 exc = self.myException
02304 exc.loc = loc
02305 exc.pstr = instring
02306 raise exc
02307
02308 class Forward(ParseElementEnhance):
02309 """Forward declaration of an expression to be defined later -
02310 used for recursive grammars, such as algebraic infix notation.
02311 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
02312
02313 Note: take care when assigning to Forward not to overlook precedence of operators.
02314 Specifically, '|' has a lower precedence than '<<', so that::
02315 fwdExpr << a | b | c
02316 will actually be evaluated as::
02317 (fwdExpr << a) | b | c
02318 thereby leaving b and c out as parseable alternatives. It is recommended that you
02319 explicitly group the values inserted into the Forward::
02320 fwdExpr << (a | b | c)
02321 """
02322 def __init__( self, other=None ):
02323 super(Forward,self).__init__( other, savelist=False )
02324
02325 def __lshift__( self, other ):
02326 if isinstance( other, basestring ):
02327 other = Literal(other)
02328 self.expr = other
02329 self.mayReturnEmpty = other.mayReturnEmpty
02330 self.strRepr = None
02331 return self
02332
02333 def leaveWhitespace( self ):
02334 self.skipWhitespace = False
02335 return self
02336
02337 def streamline( self ):
02338 if not self.streamlined:
02339 self.streamlined = True
02340 if self.expr is not None:
02341 self.expr.streamline()
02342 return self
02343
02344 def validate( self, validateTrace=[] ):
02345 if self not in validateTrace:
02346 tmp = validateTrace[:]+[self]
02347 if self.expr is not None:
02348 self.expr.validate(tmp)
02349 self.checkRecursion([])
02350
02351 def __str__( self ):
02352 if hasattr(self,"name"):
02353 return self.name
02354
02355 self.__class__ = _ForwardNoRecurse
02356 try:
02357 if self.expr is not None:
02358 retString = _ustr(self.expr)
02359 else:
02360 retString = "None"
02361 finally:
02362 self.__class__ = Forward
02363 return "Forward: "+retString
02364
02365 class _ForwardNoRecurse(Forward):
02366 def __str__( self ):
02367 return "..."
02368
02369 class TokenConverter(ParseElementEnhance):
02370 """Abstract subclass of ParseExpression, for converting parsed results."""
02371 def __init__( self, expr, savelist=False ):
02372 super(TokenConverter,self).__init__( expr )
02373 self.saveAsList = False
02374
02375
02376 class Upcase(TokenConverter):
02377 """Converter to upper case all matching tokens."""
02378 def __init__(self, *args):
02379 super(Upcase,self).__init__(*args)
02380 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
02381 DeprecationWarning,stacklevel=2)
02382
02383 def postParse( self, instring, loc, tokenlist ):
02384 return map( string.upper, tokenlist )
02385
02386
02387 class Combine(TokenConverter):
02388 """Converter to concatenate all matching tokens to a single string.
02389 By default, the matching patterns must also be contiguous in the input string;
02390 this can be disabled by specifying 'adjacent=False' in the constructor.
02391 """
02392 def __init__( self, expr, joinString="", adjacent=True ):
02393 super(Combine,self).__init__( expr )
02394
02395 if adjacent:
02396 self.leaveWhitespace()
02397 self.adjacent = adjacent
02398 self.skipWhitespace = True
02399 self.joinString = joinString
02400
02401 def ignore( self, other ):
02402 if self.adjacent:
02403 ParserElement.ignore(self, other)
02404 else:
02405 super( Combine, self).ignore( other )
02406 return self
02407
02408 def postParse( self, instring, loc, tokenlist ):
02409 retToks = tokenlist.copy()
02410 del retToks[:]
02411 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
02412
02413 if self.resultsName and len(retToks.keys())>0:
02414 return [ retToks ]
02415 else:
02416 return retToks
02417
02418 class Group(TokenConverter):
02419 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
02420 def __init__( self, expr ):
02421 super(Group,self).__init__( expr )
02422 self.saveAsList = True
02423
02424 def postParse( self, instring, loc, tokenlist ):
02425 return [ tokenlist ]
02426
02427 class Dict(TokenConverter):
02428 """Converter to return a repetitive expression as a list, but also as a dictionary.
02429 Each element can also be referenced using the first token in the expression as its key.
02430 Useful for tabular report scraping when the first column can be used as a item key.
02431 """
02432 def __init__( self, exprs ):
02433 super(Dict,self).__init__( exprs )
02434 self.saveAsList = True
02435
02436 def postParse( self, instring, loc, tokenlist ):
02437 for i,tok in enumerate(tokenlist):
02438 ikey = _ustr(tok[0]).strip()
02439 if len(tok)==1:
02440 tokenlist[ikey] = ("",i)
02441 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
02442 tokenlist[ikey] = (tok[1],i)
02443 else:
02444 dictvalue = tok.copy()
02445 del dictvalue[0]
02446 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
02447 tokenlist[ikey] = (dictvalue,i)
02448 else:
02449 tokenlist[ikey] = (dictvalue[0],i)
02450
02451 if self.resultsName:
02452 return [ tokenlist ]
02453 else:
02454 return tokenlist
02455
02456
02457 class Suppress(TokenConverter):
02458 """Converter for ignoring the results of a parsed expression."""
02459 def postParse( self, instring, loc, tokenlist ):
02460 return []
02461
02462 def suppress( self ):
02463 return self
02464
02465
02466 class OnlyOnce(object):
02467 """Wrapper for parse actions, to ensure they are only called once."""
02468 def __init__(self, methodCall):
02469 self.callable = ParserElement.normalizeParseActionArgs(methodCall)
02470 self.called = False
02471 def __call__(self,s,l,t):
02472 if not self.called:
02473 results = self.callable(s,l,t)
02474 self.called = True
02475 return results
02476 raise ParseException(s,l,"")
02477
02478 def traceParseAction(f):
02479 """Decorator for debugging parse actions."""
02480 def z(*paArgs):
02481 thisFunc = f.func_name
02482 s,l,t = paArgs[-3:]
02483 if len(paArgs)>3:
02484 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
02485 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
02486 ret = f(*paArgs)
02487 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
02488 return ret
02489 return z
02490
02491
02492
02493
02494 def delimitedList( expr, delim=",", combine=False ):
02495 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
02496 By default, the list elements and delimiters can have intervening whitespace, and
02497 comments, but this can be overridden by passing 'combine=True' in the constructor.
02498 If combine is set to True, the matching tokens are returned as a single token
02499 string, with the delimiters included; otherwise, the matching tokens are returned
02500 as a list of tokens, with the delimiters suppressed.
02501 """
02502 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
02503 if combine:
02504 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
02505 else:
02506 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
02507
02508 def countedArray( expr ):
02509 """Helper to define a counted list of expressions.
02510 This helper defines a pattern of the form::
02511 integer expr expr expr...
02512 where the leading integer tells how many expr expressions follow.
02513 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
02514 """
02515 arrayExpr = Forward()
02516 def countFieldParseAction(s,l,t):
02517 n = int(t[0])
02518 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
02519 return []
02520 return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
02521
02522 def _escapeRegexRangeChars(s):
02523
02524 for c in r"\^-]":
02525 s = s.replace(c,"\\"+c)
02526 s = s.replace("\n",r"\n")
02527 s = s.replace("\t",r"\t")
02528 return _ustr(s)
02529
02530 def oneOf( strs, caseless=False, useRegex=True ):
02531 """Helper to quickly define a set of alternative Literals, and makes sure to do
02532 longest-first testing when there is a conflict, regardless of the input order,
02533 but returns a MatchFirst for best performance.
02534
02535 Parameters:
02536 - strs - a string of space-delimited literals, or a list of string literals
02537 - caseless - (default=False) - treat all literals as caseless
02538 - useRegex - (default=True) - as an optimization, will generate a Regex
02539 object; otherwise, will generate a MatchFirst object (if caseless=True, or
02540 if creating a Regex raises an exception)
02541 """
02542 if caseless:
02543 isequal = ( lambda a,b: a.upper() == b.upper() )
02544 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
02545 parseElementClass = CaselessLiteral
02546 else:
02547 isequal = ( lambda a,b: a == b )
02548 masks = ( lambda a,b: b.startswith(a) )
02549 parseElementClass = Literal
02550
02551 if isinstance(strs,(list,tuple)):
02552 symbols = strs[:]
02553 elif isinstance(strs,basestring):
02554 symbols = strs.split()
02555 else:
02556 warnings.warn("Invalid argument to oneOf, expected string or list",
02557 SyntaxWarning, stacklevel=2)
02558
02559 i = 0
02560 while i < len(symbols)-1:
02561 cur = symbols[i]
02562 for j,other in enumerate(symbols[i+1:]):
02563 if ( isequal(other, cur) ):
02564 del symbols[i+j+1]
02565 break
02566 elif ( masks(cur, other) ):
02567 del symbols[i+j+1]
02568 symbols.insert(i,other)
02569 cur = other
02570 break
02571 else:
02572 i += 1
02573
02574 if not caseless and useRegex:
02575
02576 try:
02577 if len(symbols)==len("".join(symbols)):
02578 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
02579 else:
02580 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
02581 except:
02582 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
02583 SyntaxWarning, stacklevel=2)
02584
02585
02586
02587 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
02588
02589 def dictOf( key, value ):
02590 """Helper to easily and clearly define a dictionary by specifying the respective patterns
02591 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
02592 in the proper order. The key pattern can include delimiting markers or punctuation,
02593 as long as they are suppressed, thereby leaving the significant key text. The value
02594 pattern can include named results, so that the Dict results can include named token
02595 fields.
02596 """
02597 return Dict( ZeroOrMore( Group ( key + value ) ) )
02598
02599 _bslash = "\\"
02600 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
02601
02602
02603 empty = Empty().setName("empty")
02604 lineStart = LineStart().setName("lineStart")
02605 lineEnd = LineEnd().setName("lineEnd")
02606 stringStart = StringStart().setName("stringStart")
02607 stringEnd = StringEnd().setName("stringEnd")
02608
02609 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
02610 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
02611 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
02612 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
02613 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
02614 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
02615 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
02616
02617 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
02618
02619 def srange(s):
02620 r"""Helper to easily define string ranges for use in Word construction. Borrows
02621 syntax from regexp '[]' string range definitions::
02622 srange("[0-9]") -> "0123456789"
02623 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
02624 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
02625 The input string must be enclosed in []'s, and the returned string is the expanded
02626 character set joined into a single string.
02627 The values enclosed in the []'s may be::
02628 a single character
02629 an escaped character with a leading backslash (such as \- or \])
02630 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
02631 an escaped octal character with a leading '\0' (\041, which is a '!' character)
02632 a range of any of the above, separated by a dash ('a-z', etc.)
02633 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
02634 """
02635 try:
02636 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
02637 except:
02638 return ""
02639
02640 def replaceWith(replStr):
02641 """Helper method for common parse actions that simply return a literal value. Especially
02642 useful when used with transformString().
02643 """
02644 def _replFunc(*args):
02645 return [replStr]
02646 return _replFunc
02647
02648 def removeQuotes(s,l,t):
02649 """Helper parse action for removing quotation marks from parsed quoted strings.
02650 To use, add this parse action to quoted string using::
02651 quotedString.setParseAction( removeQuotes )
02652 """
02653 return t[0][1:-1]
02654
02655 def upcaseTokens(s,l,t):
02656 """Helper parse action to convert tokens to upper case."""
02657 return map( str.upper, t )
02658
02659 def downcaseTokens(s,l,t):
02660 """Helper parse action to convert tokens to lower case."""
02661 return map( str.lower, t )
02662
02663 def _makeTags(tagStr, xml):
02664 """Internal helper to construct opening and closing tag expressions, given a tag name"""
02665 tagAttrName = Word(alphanums)
02666 if (xml):
02667 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
02668 openTag = Suppress("<") + Keyword(tagStr) + \
02669 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
02670 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
02671 else:
02672 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
02673 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
02674 openTag = Suppress("<") + Keyword(tagStr,caseless=True) + \
02675 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
02676 Suppress("=") + tagAttrValue ))) + \
02677 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
02678 closeTag = Combine("</" + Keyword(tagStr,caseless=not xml) + ">")
02679
02680 openTag = openTag.setResultsName("start"+"".join(tagStr.replace(":"," ").title().split())).setName("<%s>" % tagStr)
02681 closeTag = closeTag.setResultsName("end"+"".join(tagStr.replace(":"," ").title().split())).setName("</%s>" % tagStr)
02682
02683 return openTag, closeTag
02684
02685 def makeHTMLTags(tagStr):
02686 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
02687 return _makeTags( tagStr, False )
02688
02689 def makeXMLTags(tagStr):
02690 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
02691 return _makeTags( tagStr, True )
02692
02693 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]")
02694
02695 _escapedChar = Regex(r"\\.")
02696 dblQuotedString = Regex(r'"([^"\n\r\\]|("")|(\\.))*"').setName("string enclosed in double quotes")
02697 sglQuotedString = Regex(r"'([^'\n\r\\]|('')|(\\.))*'").setName("string enclosed in single quotes")
02698 quotedString = Regex(r'''("([^"\n\r\\]|("")|(\\.))*")|('([^'\n\r\\]|('')|(\\.))*')''').setName("quotedString using single or double quotes")
02699
02700
02701 cStyleComment = Regex(r"\/\*[\s\S]*?\*\/").setName("C style comment")
02702 htmlComment = Regex(r"<!--[\s\S]*?-->")
02703 restOfLine = Regex(r".*").leaveWhitespace()
02704 dblSlashComment = Regex(r"\/\/.*").setName("// comment")
02705 cppStyleComment = Regex(r"(\/\*[\s\S]*?\*\/)|(\/\/.*)").setName("C++ style comment")
02706 javaStyleComment = cppStyleComment
02707 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
02708 _noncomma = "".join( [ c for c in printables if c != "," ] )
02709 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
02710 Optional( Word(" \t") +
02711 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
02712 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
02713
02714
02715 if __name__ == "__main__":
02716
02717 def test( teststring ):
02718 print teststring,"->",
02719 try:
02720 tokens = simpleSQL.parseString( teststring )
02721 tokenlist = tokens.asList()
02722 print tokenlist
02723 print "tokens = ", tokens
02724 print "tokens.columns =", tokens.columns
02725 print "tokens.tables =", tokens.tables
02726 print tokens.asXML("SQL",True)
02727 except ParseException, err:
02728 print err.line
02729 print " "*(err.column-1) + "^"
02730 print err
02731 print
02732
02733 selectToken = CaselessLiteral( "select" )
02734 fromToken = CaselessLiteral( "from" )
02735
02736 ident = Word( alphas, alphanums + "_$" )
02737 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
02738 columnNameList = Group( delimitedList( columnName ) )
02739 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
02740 tableNameList = Group( delimitedList( tableName ) )
02741 simpleSQL = ( selectToken + \
02742 ( '*' | columnNameList ).setResultsName( "columns" ) + \
02743 fromToken + \
02744 tableNameList.setResultsName( "tables" ) )
02745
02746 test( "SELECT * from XYZZY, ABC" )
02747 test( "select * from SYS.XYZZY" )
02748 test( "Select A from Sys.dual" )
02749 test( "Select AA,BB,CC from Sys.dual" )
02750 test( "Select A, B, C from Sys.dual" )
02751 test( "Select A, B, C from Sys.dual" )
02752 test( "Xelect A, B, C from Sys.dual" )
02753 test( "Select A, B, C frox Sys.dual" )
02754 test( "Select" )
02755 test( "Select ^^^ frox Sys.dual" )
02756 test( "Select A, B, C from Sys.dual, Table2 " )