CMS 3D CMS Logo

MatrixUtil.py
Go to the documentation of this file.
1 from __future__ import print_function
2 import os
3 class Matrix(dict):
4  def __setitem__(self,key,value):
5  if key in self:
6  print("ERROR in Matrix")
7  print("overwriting",key,"not allowed")
8  else:
9  self.update({float(key):WF(float(key),value)})
10 
11  def addOverride(self,key,override):
12  self[key].addOverride(override)
13 
14 #the class to collect all possible steps
15 class Steps(dict):
16  def __setitem__(self,key,value):
17  if key in self:
18  print("ERROR in Step")
19  print("overwriting",key,"not allowed")
20  import sys
21  sys.exit(-9)
22  else:
23  self.update({key:value})
24  # make the python file named <step>.py
25  #if not '--python' in value: self[key].update({'--python':'%s.py'%(key,)})
26 
27  def overwrite(self,keypair):
28  value=self[keypair[1]]
29  self.update({keypair[0]:value})
30 
31 class WF(list):
32  def __init__(self,n,l):
33  self.extend(l)
34  self.num=n
35  #the actual steps of this WF
36  self.steps=[]
37  self.overrides={}
38  def addOverride(self,overrides):
39  self.overrides=overrides
40 
41  def interpret(self,stepsDict):
42  for s in self:
43  print('steps',s,stepsDict[s])
44  steps.append(stepsDict[s])
45 
46 
47 
48 def expandLsInterval(lumis):
49  return range(lumis[0],(lumis[1]+1))
50 
52 jsonFile2015 = findFileInPath("DPGAnalysis/Skims/data/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_50ns_JSON.txt")
53 jsonFile2016 = findFileInPath("DPGAnalysis/Skims/data/Cert_271036-274240_13TeV_PromptReco_Collisions16_JSON.txt")
54 
55 import json
56 with open(jsonFile2015) as data_file:
57  data_json2015 = json.load(data_file)
58 
59 with open(jsonFile2016) as data_file:
60  data_json2016 = json.load(data_file)
61 
62 # return a portion of the 2015 golden json
63 # LS for a full run by default; otherwise a subset of which you determined the size
64 def selectedLS(list_runs=[],maxNum=-1,l_json=data_json2015):
65  # print "maxNum is %s"%(maxNum)
66  if not isinstance(list_runs[0], int):
67  print("ERROR: list_runs must be a list of integers")
68  return None
69  local_dict = {}
70  ls_count = 0
71 
72  for run in list_runs:
73  if str(run) in l_json.keys():
74  # print "run %s is there"%(run)
75  runNumber = run
76  # print "Doing lumi-section selection for run %s: "%(run)
77  for LSsegment in l_json[str(run)] :
78  # print LSsegment
79  ls_count += (LSsegment[-1] - LSsegment[0] + 1)
80  if (ls_count > maxNum) & (maxNum != -1):
81  break
82  # return local_dict
83  if runNumber in local_dict.keys():
84  local_dict[runNumber].append(LSsegment)
85  else:
86  local_dict[runNumber] = [LSsegment]
87  # print "total LS so far %s - grow %s"%(ls_count,local_dict)
88  #local_dict[runNumber] = [1,2,3]
89  else:
90  print("run %s is NOT present in json %s\n\n"%(run, l_json))
91  # print "++ %s"%(local_dict)
92 
93  if ( len(local_dict) > 0 ) :
94  return local_dict
95  else :
96  print("No luminosity section interval passed the json and your selection; returning None")
97  return None
98 
99 # print "\n\n\n THIS IS WHAT I RETURN: %s \n\n"%( selectedLS([251244,251251]) )
100 
101 
102 
103 
104 InputInfoNDefault=2000000
106  def __init__(self,dataSet,dataSetParent='',label='',run=[],ls={},files=1000,events=InputInfoNDefault,split=10,location='CAF',ib_blacklist=None,ib_block=None,skimEvents=False) :
107  self.run = run
108  self.ls = ls
109  self.files = files
110  self.events = events
111  self.location = location
112  self.label = label
113  self.dataSet = dataSet
114  self.split = split
115  self.ib_blacklist = ib_blacklist
116  self.ib_block = ib_block
117  self.dataSetParent = dataSetParent
118  self.skimEvents = skimEvents
119 
120  def das(self, das_options, dataset):
121  if not self.skimEvents and (len(self.run) != 0 or self.ls):
122  queries = self.queries(dataset)
123  if len(self.run) != 0:
124  command = ";".join(["dasgoclient %s --query '%s'" % (das_options, query) for query in queries])
125  else:
126  lumis = self.lumis()
127  commands = []
128  while queries:
129  commands.append("dasgoclient %s --query 'lumi,%s' --format json | das-selected-lumis.py %s " % (das_options, queries.pop(), lumis.pop()))
130  command = ";".join(commands)
131  command = "({0})".format(command)
132  elif not self.skimEvents:
133  command = "dasgoclient %s --query '%s'" % (das_options, self.queries(dataset)[0])
134  elif self.skimEvents:
135  from os import getenv
136  if getenv("JENKINS_PREFIX") is not None:
137  # to be assured that whatever happens the files are only those at CERN
138  command = "das-up-to-nevents.py -d %s -e %d -pc"%(dataset,self.events)
139  else:
140  command = "das-up-to-nevents.py -d %s -e %d"%(dataset,self.events)
141  # Run filter on DAS output
142  if self.ib_blacklist:
143  command += " | grep -E -v "
144  command += " ".join(["-e '{0}'".format(pattern) for pattern in self.ib_blacklist])
145  if not self.skimEvents:
146  from os import getenv
147  if getenv("CMSSW_USE_IBEOS","false")=="true": return command + " | ibeos-lfn-sort"
148  return command + " | sort -u"
149  else:
150  return command
151 
152  def lumiRanges(self):
153  if len(self.run) != 0:
154  return "echo '{\n"+",".join(('"%d":[[1,268435455]]\n'%(x,) for x in self.run))+"}'"
155  if self.ls :
156  return "echo '{\n"+",".join(('"%d" : %s\n'%( int(x),self.ls[x]) for x in self.ls.keys()))+"}'"
157  return None
158 
159  def lumis(self):
160  query_lumis = []
161  if self.ls:
162  for run in sorted(self.ls.keys()):
163  run_lumis = []
164  for rng in self.ls[run]:
165  if isinstance(rng, int):
166  run_lumis.append(str(rng))
167  else:
168  run_lumis.append(str(rng[0])+","+str(rng[1]))
169  query_lumis.append(":".join(run_lumis))
170  return query_lumis
171 
172  def queries(self, dataset):
173  query_by = "block" if self.ib_block else "dataset"
174  query_source = "{0}#{1}".format(dataset, self.ib_block) if self.ib_block else dataset
175 
176  if self.ls :
177  the_queries = []
178  #for query_run in self.ls.keys():
179  # print "run is %s"%(query_run)
180  # if you have a LS list specified, still query das for the full run (multiple ls queries take forever)
181  # and use step1_lumiRanges.log to run only on LS which respect your selection
182 
183  # DO WE WANT T2_CERN ?
184  return ["file {0}={1} run={2}".format(query_by, query_source, query_run) for query_run in sorted(self.ls.keys())]
185  #return ["file {0}={1} run={2} site=T2_CH_CERN".format(query_by, query_source, query_run) for query_run in self.ls.keys()]
186 
187 
188  #
189  #for a_range in self.ls[query_run]:
190  # # print "a_range is %s"%(a_range)
191  # the_queries += ["file {0}={1} run={2} lumi={3} ".format(query_by, query_source, query_run, query_ls) for query_ls in expandLsInterval(a_range) ]
192  #print the_queries
193  return the_queries
194 
195  site = " site=T2_CH_CERN"
196  if "CMSSW_DAS_QUERY_SITES" in os.environ:
197  if os.environ["CMSSW_DAS_QUERY_SITES"]:
198  site = " site=%s" % os.environ["CMSSW_DAS_QUERY_SITES"]
199  else:
200  site = ""
201  if len(self.run) != 0:
202  return ["file {0}={1} run={2}{3}".format(query_by, query_source, query_run, site) for query_run in self.run]
203  #return ["file {0}={1} run={2} ".format(query_by, query_source, query_run) for query_run in self.run]
204  else:
205  return ["file {0}={1}{2}".format(query_by, query_source, site)]
206  #return ["file {0}={1} ".format(query_by, query_source)]
207 
208  def __str__(self):
209  if self.ib_block:
210  return "input from: {0} with run {1}#{2}".format(self.dataSet, self.ib_block, self.run)
211  return "input from: {0} with run {1}".format(self.dataSet, self.run)
212 
213 
214 # merge dictionaries, with prioty on the [0] index
215 def merge(dictlist,TELL=False):
216  import copy
217  last=len(dictlist)-1
218  if TELL: print(last,dictlist)
219  if last==0:
220  # ONLY ONE ITEM LEFT
221  return copy.copy(dictlist[0])
222  else:
223  reducedlist=dictlist[0:max(0,last-1)]
224  if TELL: print(reducedlist)
225  # make a copy of the last item
226  d=copy.copy(dictlist[last])
227  # update with the last but one item
228  d.update(dictlist[last-1])
229  # and recursively do the rest
230  reducedlist.append(d)
231  return merge(reducedlist,TELL)
232 
233 def remove(d,key,TELL=False):
234  import copy
235  e = copy.deepcopy(d)
236  if TELL: print("original dict, BEF: %s"%d)
237  del e[key]
238  if TELL: print("copy-removed dict, AFT: %s"%e)
239  return e
240 
241 
242 
243 
244 stCond={'--conditions':'auto:run1_mc'}
245 def Kby(N,s):
246  return {'--relval':'%s000,%s'%(N,s)}
247 def Mby(N,s):
248  return {'--relval':'%s000000,%s'%(N,s)}
249 
250 def changeRefRelease(steps,listOfPairs):
251  for s in steps:
252  if ('INPUT' in steps[s]):
253  oldD=steps[s]['INPUT'].dataSet
254  for (ref,newRef) in listOfPairs:
255  if ref in oldD:
256  steps[s]['INPUT'].dataSet=oldD.replace(ref,newRef)
257  if '--pileup_input' in steps[s]:
258  for (ref,newRef) in listOfPairs:
259  if ref in steps[s]['--pileup_input']:
260  steps[s]['--pileup_input']=steps[s]['--pileup_input'].replace(ref,newRef)
261 
262 def addForAll(steps,d):
263  for s in steps:
264  steps[s].update(d)
265 
266 
267 def genvalid(fragment,d,suffix='all',fi='',dataSet=''):
268  import copy
269  c=copy.copy(d)
270  if suffix:
271  c['-s']=c['-s'].replace('genvalid','genvalid_'+suffix)
272  if fi:
273  c['--filein']='lhe:%d'%(fi,)
274  if dataSet:
275  c['--filein']='das:%s'%(dataSet,)
276  c['cfg']=fragment
277  return c
278 
279 def check_dups(input):
280  seen = set()
281  dups = set(x for x in input if x in seen or seen.add(x))
282 
283  return dups
def queries(self, dataset)
Definition: MatrixUtil.py:172
Definition: merge.py:1
def interpret(self, stepsDict)
Definition: MatrixUtil.py:41
def __setitem__(self, key, value)
Definition: MatrixUtil.py:16
def genvalid(fragment, d, suffix='all', fi='', dataSet='')
Definition: MatrixUtil.py:267
def overwrite(self, keypair)
Definition: MatrixUtil.py:27
def replace(string, replacements)
def __init__(self, n, l)
Definition: MatrixUtil.py:32
def lumiRanges(self)
Definition: MatrixUtil.py:152
def findFileInPath(theFile)
def expandLsInterval(lumis)
Definition: MatrixUtil.py:48
def addForAll(steps, d)
Definition: MatrixUtil.py:262
def das(self, das_options, dataset)
Definition: MatrixUtil.py:120
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def __setitem__(self, key, value)
Definition: MatrixUtil.py:4
def check_dups(input)
Definition: MatrixUtil.py:279
static std::string join(char **cmd)
Definition: RemoteFile.cc:21
def selectedLS(list_runs=[], maxNum=-1, l_json=data_json2015)
Definition: MatrixUtil.py:64
def __init__(self, dataSet, dataSetParent='', label='', run=[], ls={}, files=1000, events=InputInfoNDefault, split=10, location='CAF', ib_blacklist=None, ib_block=None, skimEvents=False)
Definition: MatrixUtil.py:106
def remove(d, key, TELL=False)
Definition: MatrixUtil.py:233
def changeRefRelease(steps, listOfPairs)
Definition: MatrixUtil.py:250
#define update(a, b)
def Mby(N, s)
Definition: MatrixUtil.py:247
def addOverride(self, key, override)
Definition: MatrixUtil.py:11
def Kby(N, s)
Standard release validation samples ####.
Definition: MatrixUtil.py:245
def addOverride(self, overrides)
Definition: MatrixUtil.py:38
#define str(s)
def merge(dictlist, TELL=False)
Definition: MatrixUtil.py:215