
Go to the documentation of this file.
00001 import re
00003 """ The module includes rule based regexp parser to automatize the parsing of information from simple text based files """
00006 """ a function used to compile the regexps, to be called with map """
00007 rulesRegexpCompileFunction = lambda x: ( len(x)==2 and (x[0], re.compile(x[1])) or (x[0], re.compile(x[1]), x[2]) )
00009 def rulesParser(parsing_rules, lines, compileRules = True):
00010                 """ 
00011                         Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
00012                         to each line and if it matches the line,
00013                         puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
00014                         Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
00015                         rules = [
00016                           ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
00017                         ]
00018                  """
00019                 info = {}
00020                 #we compile the parsing rules
00021                 if compileRules:
00022                         parsing_rules = map(rulesRegexpCompileFunction, parsing_rules)
00023                 """ we dynamicaly check if line passes any of the rules and in this way put the information to the info dict. """
00024                 for line in lines:
00025                         for rule in parsing_rules:
00026                                 if rule[1].match(line):
00027                                         g = rule[1].match(line).groups()
00028                                         #print g
00029                                         #print "rule fields:"  + str(rule[0])
00030                                         i = 0
00031                                         for field_name in rule[0]:
00032                                                 "we use empty field name to mark unneeded parts of regular expression"
00033                                                 if field_name != "":
00034                                                         #print str(i) + ":" + field_name
00035                                                         # we do want to store None values as empty strings ""
00036                                                         #TODO: we might want to change it if we multiple introduced rules having same result targets
00037                                                         if g[i] == None:
00038                                                                 info[field_name] = ""
00039                                                         else:
00040                                                                 info[field_name] = g[i]
00041                                                 i += 1
00042                 #For the values which do not exist we put "" and check for REQUIRED values
00043                 missing_fields = []
00044                 for rule in parsing_rules:
00045                         for field_name in rule[0]:
00046                                 if field_name:
00047                                         if not info.has_key(field_name):
00048                                                 info[field_name] = ""
00049                                         """ check for required fields"""
00050                                         if len(rule) == 3 and rule[2] =="req":
00051                                                 if not info[field_name]:
00052                                                         missing_fields.append(field_name)
00053                 return (info, missing_fields)