test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
parsingRulesHelper.py
Go to the documentation of this file.
1 import re
2 
3 """ The module includes rule based regexp parser to automatize the parsing of information from simple text based files """
4 
5 
6 """ a function used to compile the regexps, to be called with map """
7 rulesRegexpCompileFunction = lambda x: ( len(x)==2 and (x[0], re.compile(x[1])) or (x[0], re.compile(x[1]), x[2]) )
8 
9 def rulesParser(parsing_rules, lines, compileRules = True):
10  """
11  Applies the (provided) regular expression rules (=rule[1] for rule in parsing_rules)
12  to each line and if it matches the line,
13  puts the mached information to the dictionary as the specified keys (=rule[0]) which is later returned
14  Rule[3] contains whether the field is required to be found. If so and it isn't found the exception would be raised.
15  rules = [
16  ( (field_name_1_to_match, field_name_2), regular expression, /optionaly: is the field required? if so "req"/ )
17  ]
18  """
19  info = {}
20  #we compile the parsing rules
21  if compileRules:
22  parsing_rules = map(rulesRegexpCompileFunction, parsing_rules)
23  """ we dynamicaly check if line passes any of the rules and in this way put the information to the info dict. """
24  for line in lines:
25  for rule in parsing_rules:
26  if rule[1].match(line):
27  g = rule[1].match(line).groups()
28  #print g
29  #print "rule fields:" + str(rule[0])
30  i = 0
31  for field_name in rule[0]:
32  "we use empty field name to mark unneeded parts of regular expression"
33  if field_name != "":
34  #print str(i) + ":" + field_name
35  # we do want to store None values as empty strings ""
36  #TODO: we might want to change it if we multiple introduced rules having same result targets
37  if g[i] == None:
38  info[field_name] = ""
39  else:
40  info[field_name] = g[i]
41  i += 1
42  #For the values which do not exist we put "" and check for REQUIRED values
43  missing_fields = []
44  for rule in parsing_rules:
45  for field_name in rule[0]:
46  if field_name:
47  if field_name not in info:
48  info[field_name] = ""
49  """ check for required fields"""
50  if len(rule) == 3 and rule[2] =="req":
51  if not info[field_name]:
52  missing_fields.append(field_name)
53  return (info, missing_fields)
std::pair< typename Association::data_type::first_type, double > match(Reference key, Association association, bool bestMatchByMaxValue)
Generic matching function.
Definition: Utils.h:10