Go to the documentation of this file.00001 import re
00002
00003 """ The module includes rule based regexp parser to automatize the parsing of information from simple text based files """
00004
00005
00006 """ a function used to compile the regexps, to be called with map """
00007 rulesRegexpCompileFunction = lambda x: ( len(x)==2 and (x[0], re.compile(x[1])) or (x[0], re.compile(x[1]), x[2]) )
00008
00009 def rulesParser(parsing_rules, lines, compileRules = True):
00010 """
00011 Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
00012 to each line and if it matches the line,
00013 puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
00014 Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
00015 rules = [
00016 ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
00017 ]
00018 """
00019 info = {}
00020
00021 if compileRules:
00022 parsing_rules = map(rulesRegexpCompileFunction, parsing_rules)
00023 """ we dynamicaly check if line passes any of the rules and in this way put the information to the info dict. """
00024 for line in lines:
00025 for rule in parsing_rules:
00026 if rule[1].match(line):
00027 g = rule[1].match(line).groups()
00028
00029
00030 i = 0
00031 for field_name in rule[0]:
00032 "we use empty field name to mark unneeded parts of regular expression"
00033 if field_name != "":
00034
00035
00036
00037 if g[i] == None:
00038 info[field_name] = ""
00039 else:
00040 info[field_name] = g[i]
00041 i += 1
00042
00043 missing_fields = []
00044 for rule in parsing_rules:
00045 for field_name in rule[0]:
00046 if field_name:
00047 if not info.has_key(field_name):
00048 info[field_name] = ""
00049 """ check for required fields"""
00050 if len(rule) == 3 and rule[2] =="req":
00051 if not info[field_name]:
00052 missing_fields.append(field_name)
00053 return (info, missing_fields)