1 from __future__
import print_function
2 from __future__
import absolute_import
9 import Utilities.General.cmssw_das_client
as das_client
11 from .utilities
import cache
15 defaultdasinstance =
"prod/global" 24 if self.
runs and run
not in self.
runs:
return False 28 dasData = das_client.get_data(dasQuery, dasLimit)
29 if isinstance(dasData, str):
30 jsondict = json.loads( dasData )
38 if error
or findinjson(jsondict,
"status") !=
'ok' or "data" not in jsondict:
42 jsonstr =
str(jsondict)
43 if len(jsonstr) > 10000:
44 jsonfile =
"das_query_output_%i.txt" 46 while os.path.lexists(jsonfile % i):
48 jsonfile = jsonfile % i
49 theFile = open( jsonfile,
"w" )
50 theFile.write( jsonstr )
52 msg =
"The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
54 msg =
"The DAS query returned a error. Here is the output\n" + jsonstr
55 msg +=
"\nIt's possible that this was a server error. If so, it may work if you try again later" 60 parts = filename.split(
"/")
62 if parts[0] !=
"" or parts[1] !=
"store":
63 error =
"does not start with /store" 64 elif parts[2]
in [
"mc",
"relval"]:
66 elif not parts[-1].endswith(
".root"):
67 error =
"does not end with something.root" 68 elif len(parts) != 12:
69 error =
"should be exactly 11 slashes counting the first one" 71 runnumberparts = parts[-5:-2]
72 if not all(len(part)==3
for part
in runnumberparts):
73 error =
"the 3 directories {} do not have length 3 each".
format(
"/".
join(runnumberparts))
75 return [
int(
"".
join(runnumberparts))]
77 error =
"the 3 directories {} do not form an integer".
format(
"/".
join(runnumberparts))
81 query =
"run file={} instance={}".
format(filename, dasinstance)
84 return sum((
findinjson(run,
"run_number")
for run
in result), [])
85 except Exception
as e:
88 if error
and allowunknown:
92 error =
"could not figure out which run number this file is from.\nMaybe try with allowunknown=True?\n {}\n{}".
format(filename, error)
98 if isinstance(jsondict,dict):
99 if strings[0]
in jsondict:
101 return findinjson(jsondict[strings[0]], *strings[1:])
108 return findinjson(a[strings[0]], *strings[1:])
109 except (TypeError, KeyError):
112 raise KeyError(
"Can't find " + strings[0])
115 def __init__(self, filename, nevents, runs=None, trydas=True, allowunknown=False, dasinstance=defaultdasinstance):
119 runs =
getrunnumbersfromfile(filename, trydas=trydas, allowunknown=allowunknown, dasinstance=dasinstance)
120 if isinstance(runs, str):
128 __metaclass__ = abc.ABCMeta
134 @abc.abstractproperty
139 runrange =
RunRange(firstrun=firstrun, lastrun=lastrun, runs=runs)
141 if outputfile
is None:
142 outputfile = os.path.join(os.environ[
"CMSSW_BASE"],
"src",
"Alignment",
"OfflineValidation",
"python", self.filenamebase+
"_cff.py")
144 if maxevents < 0: maxevents =
float(
"inf")
145 totalevents = sum(datafile.nevents
for datafile
in self.getfiles(usecache)
if all(run
in runrange
for run
in datafile.runs))
147 raise ValueError(
"No events within the run range!")
148 accepted = rejected = 0.
150 fractiontoaccept = 1.*maxevents / totalevents
152 with open(outputfile,
"w")
as f:
153 f.write(
"#"+self.headercomment+
"\n")
154 f.write(validationheader)
155 for datafile
in self.getfiles(usecache):
156 if all(run
in runrange
for run
in datafile.runs):
157 if accepted == 0
or accepted / (accepted+rejected) <= fractiontoaccept:
158 f.write(
'"' + datafile.filename +
'",\n')
159 accepted += datafile.nevents
161 rejected += datafile.nevents
162 elif any(run
in runrange
for run
in datafile.runs):
163 raise DatasetError(
"file {} has multiple runs {}, which straddle firstrun or lastrun".
format(datafile.filename, datafile.runs))
164 f.write(
"#total events in these files: {}".
format(accepted))
165 f.write(validationfooter)
167 def writefilelist_hippy(self, firstrun, lastrun, runs, eventsperjob, maxevents, outputfile, usecache=True):
168 runrange =
RunRange(firstrun=firstrun, lastrun=lastrun, runs=runs)
169 if maxevents < 0: maxevents =
float(
"inf")
170 totalevents = sum(datafile.nevents
for datafile
in self.
getfiles(usecache)
if all(run
in runrange
for run
in datafile.runs))
172 raise ValueError(
"No events within the run range!")
173 accepted = rejected = inthisjob = 0.
175 fractiontoaccept = 1.*maxevents / totalevents
178 with open(outputfile,
"w")
as f:
179 for datafile
in self.
getfiles(usecache):
180 if all(run
in runrange
for run
in datafile.runs):
181 if accepted == 0
or accepted / (accepted+rejected) <= fractiontoaccept:
182 if writecomma: f.write(
",")
183 f.write(
"'" + datafile.filename +
"'")
184 accepted += datafile.nevents
185 inthisjob += datafile.nevents
186 if inthisjob >= eventsperjob:
193 rejected += datafile.nevents
194 elif any(run
in runrange
for run
in datafile.runs):
195 raise DatasetError(
"file {} has multiple runs {}, which straddle firstrun or lastrun".
format(datafile.filename, datafile.runs))
199 def __init__(self, datasetname, dasinstance=defaultdasinstance):
201 if re.match(
r'/.+/.+/.+', datasetname):
212 filename = os.path.join(os.environ[
"CMSSW_BASE"],
"src",
"Alignment",
"CommonAlignment",
"data", self.
filenamebase+
".csv")
217 if os.path.exists(filename):
222 with open(filename)
as f:
223 for row
in csv.DictReader(f):
232 raise DatasetError(
"No files are available for the dataset '{}'. This can be " 233 "due to a typo or due to a DAS problem. Please check the " 234 "spelling of the dataset and/or try again.".
format(datasetname))
237 with open(filename,
"w")
as f:
238 writer = csv.DictWriter(f, (
"filename",
"nevents",
"runs"))
240 for datafile
in result:
241 writer.writerow(datafile.getdict())
242 except Exception
as e:
243 print(
"Couldn't write the dataset csv file:\n\n{}".
format(e))
252 dasinstance = defaultdasinstance
253 for kw, kwarg
in kwargs.iteritems():
254 if kw ==
"dasinstance":
257 raise TypeError(
"Unknown kwarg {}={}".
format(kw, kwarg))
262 return sum([d.getfiles(usecache=usecache)
for d
in self.
datasets], [])
266 return ", ".
join(d.headercomment
for d
in self.
datasets)
268 validationheader =
""" 269 import FWCore.ParameterSet.Config as cms 271 maxEvents = cms.untracked.PSet( input = cms.untracked.int32(-1) ) 272 readFiles = cms.untracked.vstring() 273 secFiles = cms.untracked.vstring() 274 source = cms.Source ("PoolSource",fileNames = readFiles, secondaryFileNames = secFiles) 278 validationfooter =
""" def __init__(self, datasets, kwargs)
def dasquery(dasQuery, dasLimit=0)
bool any(const std::vector< T > &v, const T &what)
def getfiles(self, usecache)
S & print(S &os, JobReport::InputFile const &f)
def getrunnumbersfromfile(filename, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
def __contains__(self, run)
def getfiles(self, usecache)
def writefilelist_hippy(self, firstrun, lastrun, runs, eventsperjob, maxevents, outputfile, usecache=True)
std::pair< RunNumber, RunNumber > RunRange
def writefilelist_validation(self, firstrun, lastrun, runs, maxevents, outputfile=None, usecache=True)
static std::string join(char **cmd)
def __init__(self, datasetname, dasinstance=defaultdasinstance)
def findinjson(jsondict, strings)
def getfiles(self, usecache)
def __init__(self, filename, nevents, runs=None, trydas=True, allowunknown=False, dasinstance=defaultdasinstance)
def __init__(self, firstrun, lastrun, runs)