CMS 3D CMS Logo

createIOVlist.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import json
4 import argparse
5 import subprocess
6 import multiprocessing
7 from pprint import pprint
8 from dbs.apis.dbsClient import DbsApi
9 from random import shuffle
10 import time
11 import os
12 
13 def parser():
14  parser = argparse.ArgumentParser(description='Create json config files for your defined IOV')
15 
16  parser.add_argument("--json-input", type = str, help = "Input json file", default = {})
17  parser.add_argument("--data-txt", type = str, help = "Txt file with data set names", required = True)
18  parser.add_argument("--N-max-IOV", type = int, help = "Maximum number of events per IOV", default = 1e20)
19  parser.add_argument("--rm-bad-runs", type = str, help = "Remove bad runs from json config")
20  parser.add_argument("--iov-txt", type = str, help = "Txt file with IOV boundaries", default = [])
21  parser.add_argument("--out-data", type = str, help = "Name of skimmed file with list of data file names", default = "skimmed_dataset")
22  parser.add_argument("--out-dir", type = str, help = "Output dir name", default = "configs_" + "_".join([str(time.localtime()[i]) for i in range(6)]))
23 
24 
25  return parser.parse_args()
26 
27 
28 
29 def getFileInfo(filename):
30  print "Processing: {}".format(filename)
31 
32 
33  try:
34  edmFileUtilArgs = ['edmFileUtil', '-f', filename, '--eventsInLumis']
35  fullRunInfo = subprocess.check_output(edmFileUtilArgs).split()[14:]
36  runInfo = [tuple(fullRunInfo[index:index+3]) for index in range(0, len(fullRunInfo), 3)]
37 
38 
39  except:
40  print "Not at CERN {}".format(filename)
41  runInfo = filename
42 
43  return runInfo
44 
45 
46 def getFileList(dataset):
47 
48  filelist = []
49  emptyfiles = []
50  nEvents = 0
51 
52 
53  dbs = DbsApi('https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
54 
55  print "Processing: {}".format(dataset)
56  sites = subprocess.check_output(["dasgoclient", "--query", "site dataset={}".format(dataset)]).split()
57 
58  if "T2_CH_CERN" in sites:
59  for f in dbs.listFileArray(dataset=dataset.replace("\n", ""), detail=1):
60  filename = f['logical_file_name']
61  nevents = f['event_count']
62 
63  if nevents != 0:
64  filelist.append(filename)
65  nEvents += f['event_count']
66 
67  else:
68  emptyfiles.append(filename)
69 
70  else:
71  print "Not at CERN {}".format(dataset)
72 
73  return filelist, emptyfiles, nEvents
74 
75 
76 def fillJson(runJson, listIOV, filelist, nMax, outDir):
77 
78  sort = lambda lower, run, upper: lower < int(run) < upper
79 
80 
81  if listIOV:
82  lowerBoundaries = [int(run) for run in listIOV[:-1]]
83  upperBoundaries = [int(run)-1 for run in listIOV[1:]]
84 
85  else:
86  lowerBoundaries = [0.]
87  upperBoundaries = [1e20]
88 
89 
90  pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
91  results = [pool.apply_async(getFileInfo, (filename,)) for filename in filelist]
92  output = [result.get() for result in results]
93 
94  fileInfo = [result for result in output if type(result) == list]
95  notAtCern = [result for result in output if type(result) == str]
96 
97 
98  with open("{}/filesNotAtCern.txt".format(outDir), "w") as filesNotCern:
99  for filename in notAtCern:
100  filesNotCern.write(filename)
101  filesNotCern.write("\n")
102 
103  runDic = {}
104 
105 
106  for (runInfo, filename) in zip(fileInfo, filelist):
107  for (run, lumi, events) in runInfo:
108  if events not in ["Events", "Lumi"]:
109  try:
110  runDic[int(run)][int(lumi)] = (int(events), filename)
111 
112  except KeyError:
113  runDic[int(run)] = {int(lumi): (int(events), filename)}
114 
115 
116  jsonAlign = [{} for index in lowerBoundaries]
117  jsonVali = [{} for index in lowerBoundaries]
118  eventsInTotal = [0 for index in lowerBoundaries]
119  eventsInAlign = [0 for index in lowerBoundaries]
120  eventsInVali = [0 for index in lowerBoundaries]
121 
122 
123  if runJson:
124  runJson = runJson.items()
125  shuffle(runJson)
126  filelist = {}
127 
128  else:
129  return jsonAlign, jsonVali, set(filelist)
130 
131 
132  for (run, value) in runJson:
133  try:
134 
135  index = [sort(lower, run, upper) for (lower, upper) in zip(lowerBoundaries, upperBoundaries)].index(True)
136 
137 
138  if int(run) in runDic:
139  alignLumi = [[]]
140  valiLumi = [[]]
141 
142 
143  for (lumi, lumiInfo) in runDic[int(run)].iteritems():
144  eventsInTotal[index] += lumiInfo[0]
145 
146 
147  if eventsInAlign[index] < nMax:
148  if not True in [sort(lower, lumi, upper) for lower, upper in value]:
149  if len(alignLumi[-1]) != 0:
150  alignLumi.append([])
151  continue
152 
153  eventsInAlign[index] += lumiInfo[0]
154  filelist.setdefault(index, set()).add(lumiInfo[1])
155 
156  if len(alignLumi[-1]) == 0:
157  alignLumi[-1] = [lumi, lumi]
158 
159  else:
160  alignLumi[-1][1] = lumi
161 
162  else:
163  if not True in [sort(lower, lumi, upper) for lower, upper in value]:
164  if len(valiLumi[-1]) != 0:
165  valiLumi.append([])
166  continue
167 
168  eventsInVali[index] += lumiInfo[0]
169  if len(valiLumi[-1]) == 0:
170  valiLumi[-1] = [lumi, lumi]
171 
172  else:
173  valiLumi[-1][1] = lumi
174 
175  alignLumi = [element for element in alignLumi if len(element) != 0]
176  valiLumi = [element for element in valiLumi if len(element) != 0]
177 
178  if len(alignLumi) != 0:
179  jsonAlign[index][str(run)] = alignLumi
180 
181  if len(valiLumi) != 0:
182  jsonVali[index][str(run)] = valiLumi
183 
184 
185  except ValueError:
186 
187  pass
188 
189 
190 
191  with open("{}/eventsUsed.txt".format(outDir), "w") as eventsUsed:
192  for index in range(len(eventsInTotal)):
193  eventsUsed.write("Events used in Total for IOV {}: {}".format(lowerBoundaries[index], eventsInTotal[index]) + "\n")
194  eventsUsed.write("Events used for Alignment for IOV {}: {}".format(lowerBoundaries[index], eventsInAlign[index]) + "\n")
195  eventsUsed.write("Events used for Validation for IOV {}: {}".format(lowerBoundaries[index], eventsInVali[index]) + "\n")
196 
197  return jsonAlign, jsonVali, filelist
198 
199 
200 def main():
201 
202  args = parser()
203 
204 
205  os.system("mkdir -p {}".format(args.out_dir))
206 
207 
208  filelist = []
209  emptyfiles = []
210  nEvents = []
211  pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
212 
213  with open(args.data_txt, "r") as datasets: results = [pool.apply_async(getFileList, (dataset.replace("\n", ""),)) for dataset in datasets.readlines()]
214 
215  for result in results:
216  files, empties, events = result.get()
217  filelist.extend(files)
218  emptyfiles.extend(empties)
219  nEvents.append(events)
220 
221  with open("{}/emptyFiles.txt".format(args.out_dir), "w") as empty:
222  for emptyFile in emptyfiles:
223  empty.write(emptyFile + '\n')
224 
225 
226  if args.iov_txt:
227  with open(args.iov_txt) as fIOV:
228  listIOV = [line.strip() for line in fIOV]
229 
230  else:
231  listIOV = args.iov_txt
232 
233 
234  if args.json_input:
235  with open(args.json_input) as fJson:
236  runJson = json.load(fJson)
237 
238  else:
239  runJson = args.json_input
240 
241 
242  jsonAlign, jsonVali, filelist = fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
243 
244 
245  if args.rm_bad_runs != None:
246  with open(args.rm_bad_runs, "r") as badRuns: for badRun in badRuns:
247  for dic in jsonAlign:
248  dic.pop(int(badRun), None)
249 
250  for dic in jsonVali:
251  dic.pop(int(badRun), None)
252 
253 
254 
255  pyTempl = """import FWCore.ParameterSet.Config as cms
256 import FWCore.PythonUtilities.LumiList as LumiList
257 
258 lumiSecs = cms.untracked.VLuminosityBlockRange()
259 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',')
260 readFiles = cms.untracked.vstring()
261 source = cms.Source("PoolSource",
262  lumisToProcess = lumiSecs,
263  fileNames = readFiles)
264 readFiles.extend([
265  {filenames}
266 ])
267 lumiSecs.extend(goodLumiSecs)
268 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1))
269  """
270 
271 
272  if not args.iov_txt:
273  with open("{}/{}.txt".format(args.out_dir, args.out_data), "w") as outData:
274  for filename in filelist:
275  outData.write(filename + '\n')
276 
277 
278  if args.iov_txt and args.json_input:
279  for index, (jsonContent, runNumber) in enumerate(zip(jsonAlign, [int(run) for run in listIOV[:-1]])):
280  with open("{}/IOV_Align_{}.json".format(args.out_dir, runNumber), "w") as fAlignJson:
281  json.dump(jsonContent, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
282 
283  for (jsonContent, runNumber) in zip(jsonVali, [int(run) for run in listIOV[:-1]]):
284  with open("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber), "w") as fValiJson:
285  json.dump(jsonContent, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
286 
287  with open("{}/{}_since{}_cff.py".format(args.out_dir, args.out_data, runNumber), "w") as outData:
288  outData.write(pyTempl.format(json=os.path.abspath("{}/IOV_Vali_{}.json".format(args.out_dir, runNumber)), filenames=",\n".join(["'{}'".format(filename) for filename in filelist[index]])))
289 
290  if args.json_input:
291  mergeJsonAlign = {}
292  [mergeJsonAlign.update(jsonDic) for jsonDic in jsonAlign]
293 
294  mergeJsonVali = {}
295  [mergeJsonVali.update(jsonDic) for jsonDic in jsonVali]
296 
297  with open("{}/Align.json".format(args.out_dir, runNumber), "w") as fAlignJson:
298  json.dump(mergeJsonAlign, fAlignJson, sort_keys=True, indent=4, separators=(',', ': '))
299 
300  with open("{}/Vali.json".format(args.out_dir, runNumber), "w") as fValiJson:
301  json.dump(mergeJsonVali, fValiJson, sort_keys=True, indent=4, separators=(',', ': '))
302 
303  if not os.path.exists("{}/eventsUsed.txt".format(args.out_dir)):
304  with open("{}/eventsUsed.txt".format(args.out_dir), "w") as eventsUsed:
305  eventsUsed.write("Events used for Alignment: {}".format(sum(nEvents)) + "\n")
306  eventsUsed.write("Events used for Validation: {}".format(0) + "\n")
307 
308 if __name__ == "__main__":
309  main()
310 
def getFileInfo(filename)
Called in fillJson function in parallel.
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def getFileList(dataset)
Called in paralell in the main function.
static std::string join(char **cmd)
Definition: RemoteFile.cc:19
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def fillJson(runJson, listIOV, filelist, nMax, outDir)
Definition: main.py:1
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)
Definition: Utilities.h:27
#define str(s)