7 from pprint
import pprint
8 from dbs.apis.dbsClient
import DbsApi
9 from random
import shuffle
14 parser = argparse.ArgumentParser(description=
'Create json config files for your defined IOV')
16 parser.add_argument(
"--json-input", type = str, help =
"Input json file", default = {})
17 parser.add_argument(
"--data-txt", type = str, help =
"Txt file with data set names", required =
True)
18 parser.add_argument(
"--N-max-IOV", type = int, help =
"Maximum number of events per IOV", default = 1e20)
19 parser.add_argument(
"--rm-bad-runs", type = str, help =
"Remove bad runs from json config")
20 parser.add_argument(
"--iov-txt", type = str, help =
"Txt file with IOV boundaries", default = [])
21 parser.add_argument(
"--out-data", type = str, help =
"Name of skimmed file with list of data file names", default =
"skimmed_dataset")
22 parser.add_argument(
"--out-dir", type = str, help =
"Output dir name", default =
"configs_" +
"_".
join([
str(time.localtime()[i])
for i
in range(6)]))
25 return parser.parse_args()
30 print "Processing: {}".
format(filename)
34 edmFileUtilArgs = [
'edmFileUtil',
'-f', filename,
'--eventsInLumis']
35 fullRunInfo = subprocess.check_output(edmFileUtilArgs).
split()[14:]
36 runInfo = [
tuple(fullRunInfo[index:index+3])
for index
in range(0, len(fullRunInfo), 3)]
40 print "Not at CERN {}".
format(filename)
53 dbs = DbsApi(
'https://cmsweb.cern.ch/dbs/prod/global/DBSReader')
55 print "Processing: {}".
format(dataset)
56 sites = subprocess.check_output([
"dasgoclient",
"--query",
"site dataset={}".
format(dataset)]).
split()
58 if "T2_CH_CERN" in sites:
59 for f
in dbs.listFileArray(dataset=dataset.replace(
"\n",
""), detail=1):
60 filename = f[
'logical_file_name']
61 nevents = f[
'event_count']
64 filelist.append(filename)
65 nEvents += f[
'event_count']
68 emptyfiles.append(filename)
71 print "Not at CERN {}".
format(dataset)
73 return filelist, emptyfiles, nEvents
76 def fillJson(runJson, listIOV, filelist, nMax, outDir):
78 sort =
lambda lower, run, upper: lower <
int(run) < upper
82 lowerBoundaries = [
int(run)
for run
in listIOV[:-1]]
83 upperBoundaries = [
int(run)-1
for run
in listIOV[1:]]
86 lowerBoundaries = [0.]
87 upperBoundaries = [1e20]
90 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
91 results = [pool.apply_async(getFileInfo, (filename,))
for filename
in filelist]
92 output = [result.get()
for result
in results]
94 fileInfo = [result
for result
in output
if type(result) == list]
95 notAtCern = [result
for result
in output
if type(result) == str]
98 with open(
"{}/filesNotAtCern.txt".
format(outDir),
"w")
as filesNotCern:
99 for filename
in notAtCern:
100 filesNotCern.write(filename)
101 filesNotCern.write(
"\n")
106 for (runInfo, filename)
in zip(fileInfo, filelist):
107 for (run, lumi, events)
in runInfo:
108 if events
not in [
"Events",
"Lumi"]:
110 runDic[
int(run)][
int(lumi)] = (
int(events), filename)
113 runDic[
int(run)] = {
int(lumi): (
int(events), filename)}
116 jsonAlign = [{}
for index
in lowerBoundaries]
117 jsonVali = [{}
for index
in lowerBoundaries]
118 eventsInTotal = [0
for index
in lowerBoundaries]
119 eventsInAlign = [0
for index
in lowerBoundaries]
120 eventsInVali = [0
for index
in lowerBoundaries]
124 runJson = runJson.items()
129 return jsonAlign, jsonVali, set(filelist)
132 for (run, value)
in runJson:
135 index = [
sort(lower, run, upper)
for (lower, upper)
in zip(lowerBoundaries, upperBoundaries)].
index(
True)
138 if int(run)
in runDic:
143 for (lumi, lumiInfo)
in runDic[
int(run)].iteritems():
144 eventsInTotal[index] += lumiInfo[0]
147 if eventsInAlign[index] < nMax:
148 if not True in [
sort(lower, lumi, upper)
for lower, upper
in value]:
149 if len(alignLumi[-1]) != 0:
153 eventsInAlign[index] += lumiInfo[0]
154 filelist.setdefault(index, set()).
add(lumiInfo[1])
156 if len(alignLumi[-1]) == 0:
157 alignLumi[-1] = [lumi, lumi]
160 alignLumi[-1][1] = lumi
163 if not True in [
sort(lower, lumi, upper)
for lower, upper
in value]:
164 if len(valiLumi[-1]) != 0:
168 eventsInVali[index] += lumiInfo[0]
169 if len(valiLumi[-1]) == 0:
170 valiLumi[-1] = [lumi, lumi]
173 valiLumi[-1][1] = lumi
175 alignLumi = [element
for element
in alignLumi
if len(element) != 0]
176 valiLumi = [element
for element
in valiLumi
if len(element) != 0]
178 if len(alignLumi) != 0:
179 jsonAlign[index][
str(run)] = alignLumi
181 if len(valiLumi) != 0:
182 jsonVali[index][
str(run)] = valiLumi
191 with open(
"{}/eventsUsed.txt".
format(outDir),
"w")
as eventsUsed:
192 for index
in range(len(eventsInTotal)):
193 eventsUsed.write(
"Events used in Total for IOV {}: {}".
format(lowerBoundaries[index], eventsInTotal[index]) +
"\n")
194 eventsUsed.write(
"Events used for Alignment for IOV {}: {}".
format(lowerBoundaries[index], eventsInAlign[index]) +
"\n")
195 eventsUsed.write(
"Events used for Validation for IOV {}: {}".
format(lowerBoundaries[index], eventsInVali[index]) +
"\n")
197 return jsonAlign, jsonVali, filelist
205 os.system(
"mkdir -p {}".
format(args.out_dir))
211 pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
213 with open(args.data_txt,
"r") as datasets: results = [pool.apply_async(getFileList, (dataset.replace("\n",
""),))
for dataset
in datasets.readlines()]
215 for result
in results:
216 files, empties, events = result.get()
217 filelist.extend(files)
218 emptyfiles.extend(empties)
219 nEvents.append(events)
221 with open(
"{}/emptyFiles.txt".
format(args.out_dir),
"w")
as empty:
222 for emptyFile
in emptyfiles:
223 empty.write(emptyFile +
'\n')
227 with open(args.iov_txt)
as fIOV:
228 listIOV = [line.strip()
for line
in fIOV]
231 listIOV = args.iov_txt
235 with open(args.json_input)
as fJson:
236 runJson = json.load(fJson)
239 runJson = args.json_input
242 jsonAlign, jsonVali, filelist =
fillJson(runJson, listIOV, filelist, args.N_max_IOV, args.out_dir)
245 if args.rm_bad_runs !=
None:
246 with open(args.rm_bad_runs,
"r") as badRuns: for badRun
in badRuns:
247 for dic
in jsonAlign:
248 dic.pop(
int(badRun),
None)
251 dic.pop(
int(badRun),
None)
255 pyTempl =
"""import FWCore.ParameterSet.Config as cms 256 import FWCore.PythonUtilities.LumiList as LumiList 258 lumiSecs = cms.untracked.VLuminosityBlockRange() 259 goodLumiSecs = LumiList.LumiList(filename = '{json}').getCMSSWString().split(',') 260 readFiles = cms.untracked.vstring() 261 source = cms.Source("PoolSource", 262 lumisToProcess = lumiSecs, 263 fileNames = readFiles) 267 lumiSecs.extend(goodLumiSecs) 268 maxEvents = cms.untracked.PSet(input = cms.untracked.int32(-1)) 273 with open(
"{}/{}.txt".
format(args.out_dir, args.out_data),
"w")
as outData:
274 for filename
in filelist:
275 outData.write(filename +
'\n')
278 if args.iov_txt
and args.json_input:
279 for index, (jsonContent, runNumber)
in enumerate(
zip(jsonAlign, [
int(run)
for run
in listIOV[:-1]])):
280 with open(
"{}/IOV_Align_{}.json".
format(args.out_dir, runNumber),
"w")
as fAlignJson:
281 json.dump(jsonContent, fAlignJson, sort_keys=
True, indent=4, separators=(
',',
': '))
283 for (jsonContent, runNumber)
in zip(jsonVali, [
int(run)
for run
in listIOV[:-1]]):
284 with open(
"{}/IOV_Vali_{}.json".
format(args.out_dir, runNumber),
"w")
as fValiJson:
285 json.dump(jsonContent, fValiJson, sort_keys=
True, indent=4, separators=(
',',
': '))
287 with open(
"{}/{}_since{}_cff.py".
format(args.out_dir, args.out_data, runNumber),
"w")
as outData:
288 outData.write(pyTempl.format(json=os.path.abspath(
"{}/IOV_Vali_{}.json".
format(args.out_dir, runNumber)), filenames=
",\n".
join([
"'{}'".
format(filename)
for filename
in filelist[index]])))
292 [mergeJsonAlign.update(jsonDic)
for jsonDic
in jsonAlign]
295 [mergeJsonVali.update(jsonDic)
for jsonDic
in jsonVali]
297 with open(
"{}/Align.json".
format(args.out_dir, runNumber),
"w")
as fAlignJson:
298 json.dump(mergeJsonAlign, fAlignJson, sort_keys=
True, indent=4, separators=(
',',
': '))
300 with open(
"{}/Vali.json".
format(args.out_dir, runNumber),
"w")
as fValiJson:
301 json.dump(mergeJsonVali, fValiJson, sort_keys=
True, indent=4, separators=(
',',
': '))
303 if not os.path.exists(
"{}/eventsUsed.txt".
format(args.out_dir)):
304 with open(
"{}/eventsUsed.txt".
format(args.out_dir),
"w")
as eventsUsed:
305 eventsUsed.write(
"Events used for Alignment: {}".
format(sum(nEvents)) +
"\n")
306 eventsUsed.write(
"Events used for Validation: {}".
format(0) +
"\n")
308 if __name__ ==
"__main__":
310
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const &tracks, int32_t i)
def getFileInfo(filename)
Called in fillJson function in parallel.
def getFileList(dataset)
Called in paralell in the main function.
def split(sequence, size)
static std::string join(char **cmd)
void add(std::map< std::string, TH1 *> &h, TH1 *hist)
def fillJson(runJson, listIOV, filelist, nMax, outDir)
bidiiter shuffle(bidiiter begin, bidiiter end, size_t num_random)