CMS 3D CMS Logo

VIDSelectorValidator.py
Go to the documentation of this file.
1 import md5
2 import ROOT
3 
4 # load FWLite C++ libraries
5 ROOT.gSystem.Load("libFWCoreFWLite.so")
6 ROOT.gSystem.Load("libDataFormatsFWLite.so")
7 ROOT.FWLiteEnabler.enable()
8 
9 #cms python data types
10 import FWCore.ParameterSet.Config as cms
11 
12 # load FWlite python libraries
13 from DataFormats.FWLite import Handle, Events
14 
15 #hasher= md5.new()
16 #
17 #hasher.update('hello world')
18 #
19 #print hasher.digest()
20 #print hasher.hexdigest()
21 
23  def __init__(self, selector, collection_type, collection_name):
24  self.__hasher = md5.new()
25  self.__selector = selector
26  self.__colltype = collection_type
27  self.__collname = collection_name
28  self.__signalfiles = []
30  self.__mixfiles = []
31 
32  def setSignalFiles(self, files):
33  if not isinstance(files,list):
34  raise Exception('BadFileInput','You need to give "setSignalFiles" a list of strings')
35  self.__signalfiles = files[:]
36 
37  def setBackgroundFiles(self, files):
38  if not isinstance(files,list):
39  raise Exception('BadFileInput','You need to give "setBackgroundFiles" a list of strings')
40  self.__backgroundfiles = files[:]
41 
42  def setMixFiles(self, files):
43  if not isinstance(files,list):
44  raise Exception('BadFileInput','You need to give "setMixFiles" a list of strings')
45  self.__mixfiles = files[:]
46 
47  def runValidation(self):
48  samples = {}
49  samples['signal'] = self.__signalfiles
50  samples['background'] = self.__backgroundfiles
51  samples['mix'] = self.__mixfiles
52 
53  select = self.__selector
54 
55  print 'running validation for: %s'%(select.name())
56 
57  # checksum of the input files
58  if not len(samples['signal'] + samples['background'] + samples['mix']):
59  raise Exception('NoInputFiles','There were no input files given, cannot validate!')
60 
61  for key in sorted(samples.keys()):
62  self.processInputList(samples[key],key)
63 
64  print 'input files checksum: %s'%(self.__hasher.hexdigest())
65 
66  for key in sorted(samples.keys()):
67  if len(samples[key]):
68  local_hash = md5.new()
69  self.processEvents(samples[key],key,local_hash)
70  self.__hasher.update(local_hash.hexdigest())
71 
72  print 'event processing checksum: %s'%(self.__hasher.hexdigest())
73 
74  self.__hasher.update(select.md5String())
75 
76  print 'total checksum: %s'%(self.__hasher.hexdigest())
77 
78  def processInputList(self,the_list,name):
79  for item in the_list:
80  self.__hasher.update(item)
81  print 'Input %s file: %s'%(name,item)
82 
83  def processEvents(self,the_list,name,hasher):
84  #data products
85  handle, productLabel = Handle(self.__colltype), self.__collname
86 
87  #now loop over the events in each category
88  events = Events(the_list)
89  n_pass, n_fail = 0,0
90 
91  sub_cutnames = []
92  sub_hashes = []
93  for idstring in repr(self.__selector).split('\n'):
94  if idstring == '': continue
95  sub_cutnames.append(idstring.split()[2]) # gets the cutname
96  sub_hashes.append(md5.new(idstring))
97 
98  for event in events:
99  event.getByLabel(productLabel,handle)
100  for i,obj in enumerate(handle.product()):
101  if self.__selector(handle.product(),i,event):
102  n_pass += 1
103  else:
104  n_fail += 1
105  icut = 0
106  for idstring in repr(self.__selector).split('\n'):
107  if idstring == '': continue
108  sub_hashes[icut].update(idstring)
109  icut += 1
110 
111  for sub_hash in sub_hashes:
112  hasher.update(sub_hash.hexdigest())
113 
114  hasher.update(str(n_pass))
115  hasher.update(str(n_fail))
116  print '%s sample pass : fail : hash -> %d : %d : %s'%(name,n_pass,n_fail,hasher.hexdigest())
117  print '%s sample cut breakdown:'%(name)
118  for i,sub_hash in enumerate(sub_hashes):
119  print '\t%s hash -> %s'%(sub_cutnames[i],sub_hash.hexdigest())
#define update(a, b)
def __init__(self, selector, collection_type, collection_name)
def processEvents(self, the_list, name, hasher)
double split
Definition: MVATrainer.cc:139