7 from pprint
import pprint
8 from dbs.apis.dbsClient
import DbsApi
9 from random
import shuffle
14 parser = argparse.ArgumentParser(description=
'Create json config files for your defined IOV')
16 parser.add_argument(
"--json-input", type = str, help =
"Input json file", default = {})
17 parser.add_argument(
"--data-txt", type = str, help =
"Txt file with data set names", required =
True)
18 parser.add_argument(
"--N-max-IOV", type = int, help =
"Maximum number of events per IOV", default = 1e20)
19 parser.add_argument(
"--rm-bad-runs", type = str, help =
"Remove bad runs from json config")
20 parser.add_argument(
"--iov-txt", type = str, help =
"Txt file with IOV boundaries", default = [])
21 parser.add_argument(
"--out-data", type = str, help =
"Name of skimmed file with list of data file names", default =
"skimmed_dataset")
22 parser.add_argument(
"--out-dir", type = str, help =
"Output dir name", default =
"configs_" +
"_".
join([
str(time.localtime()[i])
for i
in range(6)]))
25 return parser.parse_args()
30 print "Processing: {}".
format(filename)
34 edmFileUtilArgs = [
'edmFileUtil',
'-f', filename,
'--eventsInLumis']
35 fullRunInfo = subprocess.check_output(edmFileUtilArgs).
split()[14:]
36 runInfo = [
tuple(fullRunInfo[index:index+3])
for index
in range(0, len(fullRunInfo), 3)]
40 print "Not at CERN {}".
format(filename)
53 dbs = DbsApi(
'https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
55 print "Processing: {}".
format(dataset)
56 sites = subprocess.check_output([
"dasgoclient",
"--query",
"site dataset={}".
format(dataset)]).
split()
58 if "T2_CH_CERN" in sites:
59 for f
in dbs.listFileArray(dataset=dataset.replace(
"\n",
""), detail=1):
60 filename = f[
'logical_file_name']
61 nevents = f[
'event_count']
64 filelist.append(filename)
65 nEvents += f[
'event_count']
68 emptyfiles.append(filename)
71 print "Not at CERN {}".
format(dataset)
73 return filelist, emptyfiles, nEvents
76 def fillJson(runJson, listIOV, filelist, nMax, outDir):
78 sort =
lambda lower, run, upper: lower <
int(run) < upper
82 lowerBoundaries = [
int(run)
for run
in listIOV[:-1]]
83 upperBoundaries = [
int(run)-1
for run
in listIOV[1:]]
86 lowerBoundaries = [0.]
87 upperBoundaries = [1e20]
90 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
91 results = [pool.apply_async(getFileInfo, (filename,))
for filename
in filelist]
92 output = [result.get()
for result
in results]
94 fileInfo = [result
for result
in output
if type(result) == list]
95 notAtCern = [result
for result
in output
if type(result) == str]
98 with open(
"{}/filesNotAtCern.txt".
format(outDir),
"w")
as filesNotCern:
99 for filename
in notAtCern:
100 filesNotCern.write(filename)
101 filesNotCern.write(
"\n")
106 for (runInfo, filename)
in zip(fileInfo, filelist):
107 for (run, lumi, events)
in runInfo:
108 if events
not in [
"Events",
"Lumi"]:
110 runDic[
int(run)][
int(lumi)] = (
int(events), filename)
113 runDic[
int(run)] = {
int(lumi): (
int(events), filename)}
116 jsonAlign = [{}
for index
in lowerBoundaries]
117 jsonVali = [{}
for index
in lowerBoundaries]
118 eventsInTotal = [0
for index
in lowerBoundaries]
119 eventsInAlign = [0
for index
in lowerBoundaries]
120 eventsInVali = [0
for index
in lowerBoundaries]
124 runJson = runJson.items()
129 return jsonAlign, jsonVali, set(filelist)
132 for (run, value)
in runJson:
135 index = [
sort(lower, run, upper)
for (lower, upper)
in zip(lowerBoundaries, upperBoundaries)].
index(
True)
138 if int(run)
in runDic:
143 for (lumi, lumiInfo)
in runDic[
int(run)].iteritems():
144 eventsInTotal[index] += lumiInfo[0]
147 if eventsInAlign[index] < nMax:
148 if not True in [
sort(lower, lumi, upper)
for lower, upper
in value]:
149 if len(alignLumi[-1]) != 0:
153 eventsInAlign[index] += lumiInfo[0]
154 filelist.setdefault(index, set()).
add(lumiInfo[1])
156 if len(alignLumi[-1]) == 0:
157 alignLumi[-1] = [lumi, lumi]
160 alignLumi[-1][1] = lumi
163 if not True in [
sort(lower, lumi, upper)
for lower, upper
in value]:
164 if len(valiLumi[-1]) != 0:
168 eventsInVali[index] += lumiInfo[0]
169 if len(valiLumi[-1]) == 0:
170 valiLumi[-1] = [lumi, lumi]
173 valiLumi[-1][1] = lumi
175 alignLumi = [element
for element
in alignLumi
if len(element) != 0]
176 valiLumi = [element
for element
in valiLumi
if len(element) != 0]
178 if len(alignLumi) != 0:
179 jsonAlign[index][
str(run)] = alignLumi
181 if len(valiLumi) != 0:
182 jsonVali[index][
str(run)] = valiLumi
191 with open(
"{}/eventsUsed.txt".
format(outDir),
"w")
as eventsUsed:
192 for index
in range(len(eventsInTotal)):
193 eventsUsed.write(
"Events used in Total for IOV {}: {}".
format(lowerBoundaries[index], eventsInTotal[index]) +
"\n")
194 eventsUsed.write(
"Events used for Alignment for IOV {}: {}".
format(lowerBoundaries[index], eventsInAlign[index]) +
"\n")
195 eventsUsed.write(
"Events used for Validation for IOV {}: {}".
format(lowerBoundaries[index], eventsInVali[index]) +
"\n")
197 return jsonAlign, jsonVali, filelist
205 os.system(
"mkdir -p {}".
format(args.out_dir))
211 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
213 with open(args.data_txt,
"r") as datasets: results = [pool.apply_async(getFileList, (dataset.replace("\n",
""),))
for dataset
in datasets.readlines()]
215 for result
in results:
216 files, empties, events = result.get()
217 filelist.extend(files)
218 emptyfiles.extend(empties)
219 nEvents.append(events)
221 with open(
"{}/emptyFiles.txt".
format(args.out_dir),
"w")
as empty:
222 for emptyFile
in emptyfiles:
223 empty.write(emptyFile +
'\n')
227 with open(args.iov_txt)
as fIOV:
228 listIOV = [line.strip()
for line
in fIOV]
231 listIOV = args.iov_txt
235 with open(args.json_input)
as fJson:
236 runJson = json.load(fJson)
239 runJson = args.json_input
242 jsonAlign, jsonVali, filelist =
fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
245 if args.rm_bad_runs !=
None:
246 with open(args.rm_bad_runs,
"r") as badRuns: for badRun
in badRuns:
247 for dic
in jsonAlign:
248 dic.pop(
int(badRun),
None)
251 dic.pop(
int(badRun),
None)
255 pyTempl =
"""import FWCore.ParameterSet.Config as cms 256 import FWCore.PythonUtilities.LumiList as LumiList 258 lumiSecs = cms.untracked.VLuminosityBlockRange() 259 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',') 260 readFiles = cms.untracked.vstring() 261 source = cms.Source("PoolSource", 262 lumisToProcess = lumiSecs, 263 fileNames = readFiles) 267 lumiSecs.extend(goodLumiSecs) 268 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1)) 273 with open(
"{}/{}.txt".
format(args.out_dir, args.out_data),
"w")
as outData:
274 for filename
in filelist:
275 outData.write(filename +
'\n')
278 if args.iov_txt
and args.json_input:
279 for index, (jsonContent, runNumber)
in enumerate(
zip(jsonAlign, [
int(run)
for run
in listIOV[:-1]])):
280 with open(
"{}/IOV_Align_{}.json".
format(args.out_dir, runNumber),
"w")
as fAlignJson:
281 json.dump(jsonContent, fAlignJson, sort_keys=
True, indent=4, separators=(
',',
': '))
283 for (jsonContent, runNumber)
in zip(jsonVali, [
int(run)
for run
in listIOV[:-1]]):
284 with open(
"{}/IOV_Vali_{}.json".
format(args.out_dir, runNumber),
"w")
as fValiJson:
285 json.dump(jsonContent, fValiJson, sort_keys=
True, indent=4, separators=(
',',
': '))
287 with open(
"{}/{}_since{}_cff.py".
format(args.out_dir, args.out_data, runNumber),
"w")
as outData:
288 outData.write(pyTempl.format(json=os.path.abspath(
"{}/IOV_Vali_{}.json".
format(args.out_dir, runNumber)), filenames=
",\n".
join([
"'{}'".
format(filename)
for filename
in filelist[index]])))
292 [mergeJsonAlign.update(jsonDic)
for jsonDic
in jsonAlign]
295 [mergeJsonVali.update(jsonDic)
for jsonDic
in jsonVali]
297 with open(
"{}/Align.json".
format(args.out_dir, runNumber),
"w")
as fAlignJson:
298 json.dump(mergeJsonAlign, fAlignJson, sort_keys=
True, indent=4, separators=(
',',
': '))
300 with open(
"{}/Vali.json".
format(args.out_dir, runNumber),
"w")
as fValiJson:
301 json.dump(mergeJsonVali, fValiJson, sort_keys=
True, indent=4, separators=(
',',
': '))
303 if not os.path.exists(
"{}/eventsUsed.txt".
format(args.out_dir)):
304 with open(
"{}/eventsUsed.txt".
format(args.out_dir),
"w")
as eventsUsed:
305 eventsUsed.write(
"Events used for Alignment: {}".
format(sum(nEvents)) +
"\n")
306 eventsUsed.write(
"Events used for Validation: {}".
format(0) +
"\n")
308 if __name__ ==
"__main__":
310
def getFileInfo(filename)
Called in fillJson function in parallel.
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def getFileList(dataset)
Called in paralell in the main function.
def split(sequence, size)
static std::string join(char **cmd)
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def fillJson(runJson, listIOV, filelist, nMax, outDir)
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)