CMS 3D CMS Logo

MatrixUtil.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import os
3 class Matrix(dict):
4  def __setitem__(self,key,value):
5  if key in self:
6  print("ERROR in Matrix")
7  print("overwriting",key,"not allowed")
8  else:
9  self.update({float(key):WF(float(key),value)})
10 
11  def addOverride(self,key,override):
12  self[key].addOverride(override)
13 
14 #the class to collect all possible steps
15 class Steps(dict):
16  def __setitem__(self,key,value):
17  if key in self:
18  print("ERROR in Step")
19  print("overwriting",key,"not allowed")
20  import sys
21  sys.exit(-9)
22  else:
23  self.update({key:value})
24  # make the python file named <step>.py
25  #if not '--python' in value: self[key].update({'--python':'%s.py'%(key,)})
26 
27  def overwrite(self,keypair):
28  value=self[keypair[1]]
29  self.update({keypair[0]:value})
30 
31 class WF(list):
32  def __init__(self,n,l):
33  self.extend(l)
34  self.num=n
35  #the actual steps of this WF
36  self.steps=[]
37  self.overrides={}
38  def addOverride(self,overrides):
39  self.overrides=overrides
40 
41  def interpret(self,stepsDict):
42  for s in self:
43  print('steps',s,stepsDict[s])
44  steps.append(stepsDict[s])
45 
46 
47 
48 def expandLsInterval(lumis):
49  return range(lumis[0],(lumis[1]+1))
50 
52 jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")
53 jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")
54 
55 import json
56 with open(jsonFile2015) as data_file:
57  data_json2015 = json.load(data_file)
58 
59 with open(jsonFile2016) as data_file:
60  data_json2016 = json.load(data_file)
61 
62 # return a portion of the 2015 golden json
63 # LS for a full run by default; otherwise a subset of which you determined the size
64 def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
65  # print "maxNum is %s"%(maxNum)
66  if not isinstance(list_runs[0], int):
67  print("ERROR: list_runs must be a list of integers")
68  return None
69  local_dict = {}
70  ls_count = 0
71 
72  for run in list_runs:
73  if str(run) in l_json.keys():
74  # print "run %s is there"%(run)
75  runNumber = run
76  # print "Doing lumi-section selection for run %s: "%(run)
77  for LSsegment in l_json[str(run)] :
78  # print LSsegment
79  ls_count += (LSsegment[-1] - LSsegment[0] + 1)
80  if (ls_count > maxNum) & (maxNum != -1):
81  break
82  # return local_dict
83  if runNumber in local_dict.keys():
84  local_dict[runNumber].append(LSsegment)
85  else:
86  local_dict[runNumber] = [LSsegment]
87  # print "total LS so far %s - grow %s"%(ls_count,local_dict)
88  #local_dict[runNumber] = [1,2,3]
89  else:
90  print("run %s is NOT present in json %s\n\n"%(run, l_json))
91  # print "++ %s"%(local_dict)
92 
93  if ( len(local_dict) > 0 ) :
94  return local_dict
95  else :
96  print("No luminosity section interval passed the json and your selection; returning None")
97  return None
98 
99 # print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )
100 
101 
102 
103 
104 InputInfoNDefault=2000000
106  def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
107  self.run = run
108  self.ls = ls
109  self.files = files
110  self.events = events
111  self.location = location
112  self.label = label
113  self.dataSet = dataSet
114  self.split = split
115  self.ib_blacklist = ib_blacklist
116  self.ib_block = ib_block
117  self.dataSetParent = dataSetParent
118  self.skimEvents = skimEvents
119 
120  def das(self, das_options, dataset):
121  if not self.skimEvents and (len(self.run) != 0 or self.ls):
122  queries = self.queries(dataset)
123  if len(self.run) != 0:
124  command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
125  else:
126  lumis = self.lumis()
127  commands = []
128  while queries:
129  commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
130  command = ";".join(commands)
131  command = "({0})".format(command)
132  elif not self.skimEvents:
133  command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
134  elif self.skimEvents:
135  from os import getenv
136  if getenv("JENKINS_PREFIX") is not None:
137  # to be assured that whatever happens the files are only those at CERN
138  command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
139  else:
140  command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
141  # Run filter on DAS output
142  if self.ib_blacklist:
143  command += " | grep -E -v "
144  command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
145  if not self.skimEvents:
146  from os import getenv
147  if getenv("CMSSW_USE_IBEOS","false")=="true":
148  return "export CMSSW_USE_IBEOS=true; " + command + " | ibeos-lfn-sort"
149  return command + " | sort -u"
150  else:
151  return command
152 
153  def lumiRanges(self):
154  if len(self.run) != 0:
155  return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
156  if self.ls :
157  return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
158  return None
159 
160  def lumis(self):
161  query_lumis = []
162  if self.ls:
163  for run in sorted(self.ls.keys()):
164  run_lumis = []
165  for rng in self.ls[run]:
166  if isinstance(rng, int):
167  run_lumis.append(str(rng))
168  else:
169  run_lumis.append(str(rng[0])+","+str(rng[1]))
170  query_lumis.append(":".join(run_lumis))
171  return query_lumis
172 
173  def queries(self, dataset):
174  query_by = "block" if self.ib_block else "dataset"
175  query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset
176 
177  if self.ls :
178  the_queries = []
179  #for query_run in self.ls.keys():
180  # print "run is %s"%(query_run)
181  # if you have a LS list specified, still query das for the full run (multiple ls queries take forever)
182  # and use step1_lumiRanges.log to run only on LS which respect your selection
183 
184  # DO WE WANT T2_CERN ?
185  return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]
186  #return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]
187 
188 
189  #
190  #for a_range in self.ls[query_run]:
191  # # print "a_range is %s"%(a_range)
192  # the_queries += ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]
193  #print the_queries
194  return the_queries
195 
196  site = " site=T2_CH_CERN"
197  if "CMSSW_DAS_QUERY_SITES" in os.environ:
198  if os.environ["CMSSW_DAS_QUERY_SITES"]:
199  site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]
200  else:
201  site = ""
202  if len(self.run) != 0:
203  return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]
204  #return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]
205  else:
206  return ["file {0}={1}{2}".format(query_by, query_source, site)]
207  #return ["file {0}={1} ".format(query_by, query_source)]
208 
209  def __str__(self):
210  if self.ib_block:
211  return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)
212  return "input from: {0} with run {1}".format(self.dataSet, self.run)
213 
214 
215 # merge dictionaries, with prioty on the [0] index
216 def merge(dictlist,TELL=False):
217  import copy
218  last=len(dictlist)-1
219  if TELL: print(last,dictlist)
220  if last==0:
221  # ONLY ONE ITEM LEFT
222  return copy.copy(dictlist[0])
223  else:
224  reducedlist=dictlist[0:max(0,last-1)]
225  if TELL: print(reducedlist)
226  # make a copy of the last item
227  d=copy.copy(dictlist[last])
228  # update with the last but one item
229  d.update(dictlist[last-1])
230  # and recursively do the rest
231  reducedlist.append(d)
232  return merge(reducedlist,TELL)
233 
234 def remove(d,key,TELL=False):
235  import copy
236  e = copy.deepcopy(d)
237  if TELL: print("original dict, BEF: %s"%d)
238  del e[key]
239  if TELL: print("copy-removed dict, AFT: %s"%e)
240  return e
241 
242 
243 
244 
245 stCond={'--conditions':'auto:run1_mc'}
246 def Kby(N,s):
247  return {'--relval':'%s000,%s'%(N,s)}
248 def Mby(N,s):
249  return {'--relval':'%s000000,%s'%(N,s)}
250 
251 def changeRefRelease(steps,listOfPairs):
252  for s in steps:
253  if ('INPUT' in steps[s]):
254  oldD=steps[s]['INPUT'].dataSet
255  for (ref,newRef) in listOfPairs:
256  if ref in oldD:
257  steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)
258  if '--pileup_input' in steps[s]:
259  for (ref,newRef) in listOfPairs:
260  if ref in steps[s]['--pileup_input']:
261  steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)
262 
263 def addForAll(steps,d):
264  for s in steps:
265  steps[s].update(d)
266 
267 
268 def genvalid(fragment,d,suffix='all',fi='',dataSet=''):
269  import copy
270  c=copy.copy(d)
271  if suffix:
272  c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)
273  if fi:
274  c['--filein']='lhe:%d'%(fi,)
275  if dataSet:
276  c['--filein']='das:%s'%(dataSet,)
277  c['cfg']=fragment
278  return c
279 
280 def check_dups(input):
281  seen = set()
282  dups = set(x for x in input if x in seen or seen.add(x))
283 
284  return dups
def queries(self, dataset)
Definition: MatrixUtil.py:173
Definition: merge.py:1
def interpret(self, stepsDict)
Definition: MatrixUtil.py:41
def __setitem__(self, key, value)
Definition: MatrixUtil.py:16
def genvalid(fragment, d, suffix='all', fi='', dataSet='')
Definition: MatrixUtil.py:268
def overwrite(self, keypair)
Definition: MatrixUtil.py:27
def replace(string, replacements)
def __init__(self, n, l)
Definition: MatrixUtil.py:32
def lumiRanges(self)
Definition: MatrixUtil.py:153
def findFileInPath(theFile)
def expandLsInterval(lumis)
Definition: MatrixUtil.py:48
def addForAll(steps, d)
Definition: MatrixUtil.py:263
def das(self, das_options, dataset)
Definition: MatrixUtil.py:120
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __setitem__(self, key, value)
Definition: MatrixUtil.py:4
def check_dups(input)
Definition: MatrixUtil.py:280
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def selectedLS(list_runs=[], maxNum=-1, l_json=data_json2015)
Definition: MatrixUtil.py:64
def __init__(self, dataSet, dataSetParent='', label='', run=[], ls={}, files=1000, events=InputInfoNDefault, split=10, location='CAF', ib_blacklist=None, ib_block=None, skimEvents=False)
Definition: MatrixUtil.py:106
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:234
def changeRefRelease(steps, listOfPairs)
Definition: MatrixUtil.py:251
#define update(a, b)
def Mby(N, s)
Definition: MatrixUtil.py:248
def addOverride(self, key, override)
Definition: MatrixUtil.py:11
def Kby(N, s)
Definition: MatrixUtil.py:246
def addOverride(self, overrides)
Definition: MatrixUtil.py:38
#define str(s)
def merge(dictlist, TELL=False)
Definition: MatrixUtil.py:216