CMS 3D CMS Logo

MatrixUtil.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import os
3 class Matrix(dict):
4  def __setitem__(self,key,value):
5  if key in self:
6  print("ERROR in Matrix")
7  print("overwriting",key,"not allowed")
8  else:
9  self.update({float(key):WF(float(key),value)})
10 
11  def addOverride(self,key,override):
12  self[key].addOverride(override)
13 
14 #the class to collect all possible steps
15 class Steps(dict):
16  def __setitem__(self,key,value):
17  if key in self:
18  print("ERROR in Step")
19  print("overwriting",key,"not allowed")
20  import sys
21  sys.exit(-9)
22  else:
23  self.update({key:value})
24  # make the python file named <step>.py
25  #if not '--python' in value: self[key].update({'--python':'%s.py'%(key,)})
26 
27  def overwrite(self,keypair):
28  value=self[keypair[1]]
29  self.update({keypair[0]:value})
30 
31 class WF(list):
32  def __init__(self,n,l):
33  self.extend(l)
34  self.num=n
35  #the actual steps of this WF
36  self.steps=[]
37  self.overrides={}
38  def addOverride(self,overrides):
39  self.overrides=overrides
40 
41  def interpret(self,stepsDict):
42  for s in self:
43  print('steps',s,stepsDict[s])
44  steps.append(stepsDict[s])
45 
46 
47 
48 def expandLsInterval(lumis):
49  return range(lumis[0],(lumis[1]+1))
50 
52 jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")
53 jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")
54 
55 import json
56 with open(jsonFile2015) as data_file:
57  data_json2015 = json.load(data_file)
58 
59 with open(jsonFile2016) as data_file:
60  data_json2016 = json.load(data_file)
61 
62 # return a portion of the 2015 golden json
63 # LS for a full run by default; otherwise a subset of which you determined the size
64 def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
65  # print "maxNum is %s"%(maxNum)
66  if not isinstance(list_runs[0], int):
67  print("ERROR: list_runs must be a list of integers")
68  return None
69  local_dict = {}
70  ls_count = 0
71 
72  for run in list_runs:
73  if str(run) in l_json.keys():
74  # print "run %s is there"%(run)
75  runNumber = run
76  # print "Doing lumi-section selection for run %s: "%(run)
77  for LSsegment in l_json[str(run)] :
78  # print LSsegment
79  ls_count += (LSsegment[-1] - LSsegment[0] + 1)
80  if (ls_count > maxNum) & (maxNum != -1):
81  break
82  # return local_dict
83  if runNumber in local_dict.keys():
84  local_dict[runNumber].append(LSsegment)
85  else:
86  local_dict[runNumber] = [LSsegment]
87  # print "total LS so far %s - grow %s"%(ls_count,local_dict)
88  #local_dict[runNumber] = [1,2,3]
89  else:
90  print("run %s is NOT present in json %s\n\n"%(run, l_json))
91  # print "++ %s"%(local_dict)
92 
93  if ( len(local_dict) > 0 ) :
94  return local_dict
95  else :
96  print("No luminosity section interval passed the json and your selection; returning None")
97  return None
98 
99 # print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )
100 
101 
102 
103 
104 InputInfoNDefault=2000000
106  def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None) :
107  self.run = run
108  self.ls = ls
109  self.files = files
110  self.events = events
111  self.location = location
112  self.label = label
113  self.dataSet = dataSet
114  self.split = split
115  self.ib_blacklist = ib_blacklist
116  self.ib_block = ib_block
117  self.dataSetParent = dataSetParent
118 
119  def das(self, das_options, dataset):
120  if len(self.run) != 0 or self.ls:
121  queries = self.queries(dataset)
122  if len(self.run) != 0:
123  command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
124  else:
125  lumis = self.lumis()
126  commands = []
127  while queries:
128  commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
129  command = ";".join(commands)
130  command = "({0})".format(command)
131  else:
132  command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
133 
134  # Run filter on DAS output
135  if self.ib_blacklist:
136  command += " | grep -E -v "
137  command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
138  from os import getenv
139  if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
140  return command + " | sort -u"
141 
142  def lumiRanges(self):
143  if len(self.run) != 0:
144  return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
145  if self.ls :
146  return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
147  return None
148 
149  def lumis(self):
150  query_lumis = []
151  if self.ls:
152  for run in sorted(self.ls.keys()):
153  run_lumis = []
154  for rng in self.ls[run]:
155  if isinstance(rng, int):
156  run_lumis.append(str(rng))
157  else:
158  run_lumis.append(str(rng[0])+","+str(rng[1]))
159  query_lumis.append(":".join(run_lumis))
160  return query_lumis
161 
162  def queries(self, dataset):
163  query_by = "block" if self.ib_block else "dataset"
164  query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset
165 
166  if self.ls :
167  the_queries = []
168  #for query_run in self.ls.keys():
169  # print "run is %s"%(query_run)
170  # if you have a LS list specified, still query das for the full run (multiple ls queries take forever)
171  # and use step1_lumiRanges.log to run only on LS which respect your selection
172 
173  # DO WE WANT T2_CERN ?
174  return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]
175  #return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]
176 
177 
178  #
179  #for a_range in self.ls[query_run]:
180  # # print "a_range is %s"%(a_range)
181  # the_queries += ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]
182  #print the_queries
183  return the_queries
184 
185  site = " site=T2_CH_CERN"
186  if "CMSSW_DAS_QUERY_SITES" in os.environ:
187  if os.environ["CMSSW_DAS_QUERY_SITES"]:
188  site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]
189  else:
190  site = ""
191  if len(self.run) != 0:
192  return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]
193  #return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]
194  else:
195  return ["file {0}={1}{2}".format(query_by, query_source, site)]
196  #return ["file {0}={1} ".format(query_by, query_source)]
197 
198  def __str__(self):
199  if self.ib_block:
200  return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)
201  return "input from: {0} with run {1}".format(self.dataSet, self.run)
202 
203 
204 # merge dictionaries, with prioty on the [0] index
205 def merge(dictlist,TELL=False):
206  import copy
207  last=len(dictlist)-1
208  if TELL: print(last,dictlist)
209  if last==0:
210  # ONLY ONE ITEM LEFT
211  return copy.copy(dictlist[0])
212  else:
213  reducedlist=dictlist[0:max(0,last-1)]
214  if TELL: print(reducedlist)
215  # make a copy of the last item
216  d=copy.copy(dictlist[last])
217  # update with the last but one item
218  d.update(dictlist[last-1])
219  # and recursively do the rest
220  reducedlist.append(d)
221  return merge(reducedlist,TELL)
222 
223 def remove(d,key,TELL=False):
224  import copy
225  e = copy.deepcopy(d)
226  if TELL: print("original dict, BEF: %s"%d)
227  del e[key]
228  if TELL: print("copy-removed dict, AFT: %s"%e)
229  return e
230 
231 
232 
233 
234 stCond={'--conditions':'auto:run1_mc'}
235 def Kby(N,s):
236  return {'--relval':'%s000,%s'%(N,s)}
237 def Mby(N,s):
238  return {'--relval':'%s000000,%s'%(N,s)}
239 
240 def changeRefRelease(steps,listOfPairs):
241  for s in steps:
242  if ('INPUT' in steps[s]):
243  oldD=steps[s]['INPUT'].dataSet
244  for (ref,newRef) in listOfPairs:
245  if ref in oldD:
246  steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)
247  if '--pileup_input' in steps[s]:
248  for (ref,newRef) in listOfPairs:
249  if ref in steps[s]['--pileup_input']:
250  steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)
251 
252 def addForAll(steps,d):
253  for s in steps:
254  steps[s].update(d)
255 
256 
257 def genvalid(fragment,d,suffix='all',fi='',dataSet=''):
258  import copy
259  c=copy.copy(d)
260  if suffix:
261  c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)
262  if fi:
263  c['--filein']='lhe:%d'%(fi,)
264  if dataSet:
265  c['--filein']='das:%s'%(dataSet,)
266  c['cfg']=fragment
267  return c
268 
269 
def queries(self, dataset)
Definition: MatrixUtil.py:162
Definition: merge.py:1
def interpret(self, stepsDict)
Definition: MatrixUtil.py:41
def __setitem__(self, key, value)
Definition: MatrixUtil.py:16
def genvalid(fragment, d, suffix='all', fi='', dataSet='')
Definition: MatrixUtil.py:257
def overwrite(self, keypair)
Definition: MatrixUtil.py:27
def replace(string, replacements)
def __init__(self, dataSet, dataSetParent='', label='', run=[], ls={}, files=1000, events=InputInfoNDefault, split=10, location='CAF', ib_blacklist=None, ib_block=None)
Definition: MatrixUtil.py:106
def __init__(self, n, l)
Definition: MatrixUtil.py:32
def lumiRanges(self)
Definition: MatrixUtil.py:142
def findFileInPath(theFile)
def expandLsInterval(lumis)
Definition: MatrixUtil.py:48
def addForAll(steps, d)
Definition: MatrixUtil.py:252
def das(self, das_options, dataset)
Definition: MatrixUtil.py:119
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __setitem__(self, key, value)
Definition: MatrixUtil.py:4
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
def selectedLS(list_runs=[], maxNum=-1, l_json=data_json2015)
Definition: MatrixUtil.py:64
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:223
def changeRefRelease(steps, listOfPairs)
Definition: MatrixUtil.py:240
#define update(a, b)
def Mby(N, s)
Definition: MatrixUtil.py:237
def addOverride(self, key, override)
Definition: MatrixUtil.py:11
def Kby(N, s)
Standard release validation samples ####.
Definition: MatrixUtil.py:235
def addOverride(self, overrides)
Definition: MatrixUtil.py:38
#define str(s)
def merge(dictlist, TELL=False)
Definition: MatrixUtil.py:205