CMS 3D CMS Logo

createIOVlist.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import json
4 import argparse
5 import subprocess
6 import multiprocessing
7 from pprint import pprint
8 from dbs.apis.dbsClient import DbsApi
9 from random import shuffle
10 import time
11 import os
12 
13 def parser():
14  parser = argparse.ArgumentParser(description='Create json config files for your defined IOV')
15 
16  parser.add_argument("--json-input", type = str, help = "Input json file", default = {})
17  parser.add_argument("--data-txt", type = str, help = "Txt file with data set names", required = True)
18  parser.add_argument("--N-max-IOV", type = int, help = "Maximum number of events per IOV", default = 1e20)
19  parser.add_argument("--rm-bad-runs", type = str, help = "Remove bad runs from json config")
20  parser.add_argument("--iov-txt", type = str, help = "Txt file with IOV boundaries", default = [])
21  parser.add_argument("--out-data", type = str, help = "Name of skimmed file with list of data file names", default = "skimmed_dataset")
22  parser.add_argument("--out-dir", type = str, help = "Output dir name", default = "configs_" + "_".join([str(time.localtime()[i]) for i in range(6)]))
23 
24 
25  return parser.parse_args()
26 
27 
28 
29 def getFileInfo(filename):
30  print "Processing: {}".format(filename)
31 
32 
33  try:
34  edmFileUtilArgs = ['edmFileUtil', '-f', filename, '--eventsInLumis']
35  fullRunInfo = subprocess.check_output(edmFileUtilArgs).split()[14:]
36  runInfo = [tuple(fullRunInfo[index:index+3]) for index in range(0, len(fullRunInfo), 3)]
37 
38 
39  except:
40  print "Not at CERN {}".format(filename)
41  runInfo = filename
42 
43  return runInfo
44 
45 
46 def getFileList(dataset):
47 
48  filelist = []
49  emptyfiles = []
50  nEvents = 0
51 
52 
53  dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
54 
55  print "Processing: {}".format(dataset)
56  sites = subprocess.check_output(["dasgoclient", "--query", "site dataset={}".format(dataset)]).split()
57 
58  if "T2_CH_CERN" in sites:
59  for f in dbs.listFileArray(dataset=dataset.replace("\n", ""), detail=1):
60  filename = f['logical_file_name']
61  nevents = f['event_count']
62 
63  if nevents != 0:
64  filelist.append(filename)
65  nEvents += f['event_count']
66 
67  else:
68  emptyfiles.append(filename)
69 
70  else:
71  print "Not at CERN {}".format(dataset)
72 
73  return filelist, emptyfiles, nEvents
74 
75 
76 def fillJson(runJson, listIOV, filelist, nMax, outDir):
77 
78  sort = lambda lower, run, upper: lower < int(run) < upper
79 
80 
81  if listIOV:
82  lowerBoundaries = [int(run) for run in listIOV[:-1]]
83  upperBoundaries = [int(run)-1 for run in listIOV[1:]]
84 
85  else:
86  lowerBoundaries = [0.]
87  upperBoundaries = [1e20]
88 
89 
90  pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
91  results = [pool.apply_async(getFileInfo, (filename,)) for filename in filelist]
92  output = [result.get() for result in results]
93 
94  fileInfo = [result for result in output if type(result) == list]
95  notAtCern = [result for result in output if type(result) == str]
96 
97 
98  with open("{}/filesNotAtCern.txt".format(outDir), "w") as filesNotCern:
99  for filename in notAtCern:
100  filesNotCern.write(filename)
101  filesNotCern.write("\n")
102 
103  runDic = {}
104 
105 
106  for (runInfo, filename) in zip(fileInfo, filelist):
107  for (run, lumi, events) in runInfo:
108  if events not in ["Events", "Lumi"]:
109  try:
110  runDic[int(run)][int(lumi)] = (int(events), filename)
111 
112  except KeyError:
113  runDic[int(run)] = {int(lumi): (int(events), filename)}
114 
115 
116  jsonAlign = [{} for index in lowerBoundaries]
117  jsonVali = [{} for index in lowerBoundaries]
118  eventsInTotal = [0 for index in lowerBoundaries]
119  eventsInAlign = [0 for index in lowerBoundaries]
120  eventsInVali = [0 for index in lowerBoundaries]
121 
122 
123  if runJson:
124  runJson = runJson.items()
125  shuffle(runJson)
126  filelist = {}
127 
128  else:
129  return jsonAlign, jsonVali, set(filelist)
130 
131 
132  for (run, value) in runJson:
133  try:
134 
135  index = [sort(lower, run, upper) for (lower, upper) in zip(lowerBoundaries, upperBoundaries)].index(True)
136 
137 
138  if int(run) in runDic:
139  alignLumi = [[]]
140  valiLumi = [[]]
141 
142 
143  for (lumi, lumiInfo) in runDic[int(run)].iteritems():
144  eventsInTotal[index] += lumiInfo[0]
145 
146 
147  if eventsInAlign[index] < nMax:
148  if not True in [sort(lower, lumi, upper) for lower, upper in value]:
149  if len(alignLumi[-1]) != 0:
150  alignLumi.append([])
151  continue
152 
153  eventsInAlign[index] += lumiInfo[0]
154  filelist.setdefault(index, set()).add(lumiInfo[1])
155 
156  if len(alignLumi[-1]) == 0:
157  alignLumi[-1] = [lumi, lumi]
158 
159  else:
160  alignLumi[-1][1] = lumi
161 
162  else:
163  if not True in [sort(lower, lumi, upper) for lower, upper in value]:
164  if len(valiLumi[-1]) != 0:
165  valiLumi.append([])
166  continue
167 
168  eventsInVali[index] += lumiInfo[0]
169  if len(valiLumi[-1]) == 0:
170  valiLumi[-1] = [lumi, lumi]
171 
172  else:
173  valiLumi[-1][1] = lumi
174 
175  alignLumi = [element for element in alignLumi if len(element) != 0]
176  valiLumi = [element for element in valiLumi if len(element) != 0]
177 
178  if len(alignLumi) != 0:
179  jsonAlign[index][str(run)] = alignLumi
180 
181  if len(valiLumi) != 0:
182  jsonVali[index][str(run)] = valiLumi
183 
184 
185  except ValueError:
186 
187  pass
188 
189 
190 
191  with open("{}/eventsUsed.txt".format(outDir), "w") as eventsUsed:
192  for index in range(len(eventsInTotal)):
193  eventsUsed.write("Events used in Total for IOV {}: {}".format(lowerBoundaries[index], eventsInTotal[index]) + "\n")
194  eventsUsed.write("Events used for Alignment for IOV {}: {}".format(lowerBoundaries[index], eventsInAlign[index]) + "\n")
195  eventsUsed.write("Events used for Validation for IOV {}: {}".format(lowerBoundaries[index], eventsInVali[index]) + "\n")
196 
197  return jsonAlign, jsonVali, filelist
198 
199 
200 def main():
201 
202  args = parser()
203 
204 
205  os.system("mkdir -p {}".format(args.out_dir))
206 
207 
208  filelist = []
209  emptyfiles = []
210  nEvents = []
211  pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
212 
213  with open(args.data_txt, "r") as datasets: results = [pool.apply_async(getFileList, (dataset.replace("\n", ""),)) for dataset in datasets.readlines()]
214 
215  for result in results:
216  files, empties, events = result.get()
217  filelist.extend(files)
218  emptyfiles.extend(empties)
219  nEvents.append(events)
220 
221  with open("{}/emptyFiles.txt".format(args.out_dir), "w") as empty:
222  for emptyFile in emptyfiles:
223  empty.write(emptyFile + '\n')
224 
225 
226  if args.iov_txt:
227  with open(args.iov_txt) as fIOV:
228  listIOV = [line.strip() for line in fIOV]
229 
230  else:
231  listIOV = args.iov_txt
232 
233 
234  if args.json_input:
235  with open(args.json_input) as fJson:
236  runJson = json.load(fJson)
237 
238  else:
239  runJson = args.json_input
240 
241 
242  jsonAlign, jsonVali, filelist = fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
243 
244 
245  if args.rm_bad_runs != None:
246  with open(args.rm_bad_runs, "r") as badRuns: for badRun in badRuns:
247  for dic in jsonAlign:
248  dic.pop(int(badRun), None)
249 
250  for dic in jsonVali:
251  dic.pop(int(badRun), None)
252 
253 
254 
255  pyTempl = """import FWCore.ParameterSet.Config as cms
256 import FWCore.PythonUtilities.LumiList as LumiList
257 
258 lumiSecs = cms.untracked.VLuminosityBlockRange()
259 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',')
260 readFiles = cms.untracked.vstring()
261 source = cms.Source("PoolSource",
262  lumisToProcess = lumiSecs,
263  fileNames = readFiles)
264 readFiles.extend([
265  {filenames}
266 ])
267 lumiSecs.extend(goodLumiSecs)
268 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1))
269  """
270 
271 
272  if not args.iov_txt:
273  with open("{}/{}.txt".format(args.out_dir, args.out_data), "w") as outData:
274  for filename in filelist:
275  outData.write(filename + '\n')
276 
277 
278  if args.iov_txt and args.json_input:
279  for index, (jsonContent, runNumber) in enumerate(zip(jsonAlign, [int(run) for run in listIOV[:-1]])):
280  with open("{}/IOV_Align_{}.json".format(args.out_dir, runNumber), "w") as fAlignJson:
281  json.dump(jsonContent, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
282 
283  for (jsonContent, runNumber) in zip(jsonVali, [int(run) for run in listIOV[:-1]]):
284  with open("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber), "w") as fValiJson:
285  json.dump(jsonContent, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
286 
287  with open("{}/{}_since{}_cff.py".format(args.out_dir, args.out_data, runNumber), "w") as outData:
288  outData.write(pyTempl.format(json=os.path.abspath("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber)), filenames=",\n".join(["'{}'".format(filename) for filename in filelist[index]])))
289 
290  if args.json_input:
291  mergeJsonAlign = {}
292  [mergeJsonAlign.update(jsonDic) for jsonDic in jsonAlign]
293 
294  mergeJsonVali = {}
295  [mergeJsonVali.update(jsonDic) for jsonDic in jsonVali]
296 
297  with open("{}/Align.json".format(args.out_dir, runNumber), "w") as fAlignJson:
298  json.dump(mergeJsonAlign, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
299 
300  with open("{}/Vali.json".format(args.out_dir, runNumber), "w") as fValiJson:
301  json.dump(mergeJsonVali, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
302 
303  if not os.path.exists("{}/eventsUsed.txt".format(args.out_dir)):
304  with open("{}/eventsUsed.txt".format(args.out_dir), "w") as eventsUsed:
305  eventsUsed.write("Events used for Alignment: {}".format(sum(nEvents)) + "\n")
306  eventsUsed.write("Events used for Validation: {}".format(0) + "\n")
307 
308 if __name__ == "__main__":
309  main()
310 
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const &tracks, int32_t i)
Definition: TracksSoA.h:90
def getFileInfo(filename)
Called in fillJson function in parallel.
def getFileList(dataset)
Called in paralell in the main function.
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def fillJson(runJson, listIOV, filelist, nMax, outDir)
Definition: main.py:1
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
Definition: Utilities.h:27
#define str(s)