10 from FWCore.PythonUtilities.LumiList
import LumiList
11 from TkAlExceptions
import AllInOneError
15 def __init__( self, datasetName, dasLimit = 0, tryPredefinedFirst = True,
16 cmssw = os.environ[
"CMSSW_BASE"], cmsswrelease = os.environ[
"CMSSW_RELEASE_BASE"]):
33 if re.match(
r'/.+/.+/.+', self.
__name ):
35 fileName =
"Dataset" + self.__name.replace(
"/",
"_") +
"_cff.py"
38 fileName = self.
__name +
"_cff.py"
40 searchPath1 = os.path.join( self.
__cmssw,
"python",
41 "Alignment",
"OfflineValidation",
43 searchPath2 = os.path.join( self.
__cmssw,
"src",
44 "Alignment",
"OfflineValidation",
47 "python",
"Alignment",
48 "OfflineValidation", fileName )
51 elif os.path.exists( searchPath1 ):
54 elif os.path.exists( searchPath2 ):
55 msg = (
"The predefined dataset '%s' does exist in '%s', but "
56 "you need to run 'scram b' first."
57 %( self.
__name, searchPath2 ))
60 print "Getting the data from DAS again. To go faster next time, run scram b."
63 elif os.path.exists( searchPath3 ):
69 msg = (
"The predefined dataset '%s' does not exist. Please "
70 "create it first or check for typos."%( self.
__name ))
74 self.
__name =
"Dataset" + self.__name.replace(
"/",
"_")
80 """ Yield successive n-sized chunks from theList.
82 for i
in xrange( 0, len( theList ), n ):
85 __source_template= (
"%(header)s"
87 "import FWCore.PythonUtilities.LumiList as LumiList\n\n"
89 "readFiles = cms.untracked.vstring()\n"
90 "secFiles = cms.untracked.vstring()\n"
91 "%(process)ssource = cms.Source(\"PoolSource\",\n"
93 "%(tab)s secondaryFileNames ="
95 "%(tab)s fileNames = readFiles\n"
99 "%(process)smaxEvents = cms.untracked.PSet( "
100 "input = cms.untracked.int32(%(nEvents)s) )\n"
101 "%(skipEventsString)s\n")
103 __dummy_source_template = (
"readFiles = cms.untracked.vstring()\n"
104 "secFiles = cms.untracked.vstring()\n"
105 "%(process)ssource = cms.Source(\"PoolSource\",\n"
106 "%(tab)s secondaryFileNames ="
108 "%(tab)s fileNames = readFiles\n"
110 "readFiles.extend(['dummy_File.root'])\n"
111 "%(process)smaxEvents = cms.untracked.PSet( "
112 "input = cms.untracked.int32(%(nEvents)s) )\n"
113 "%(skipEventsString)s\n")
116 firstRun =
None, lastRun =
None, repMap =
None,
117 crab =
False, parent =
False ):
119 firstRun = int( firstRun )
121 lastRun = int( lastRun )
122 if ( begin
and firstRun )
or ( end
and lastRun ):
123 msg = (
"The Usage of "
124 +
"'begin' & 'firstRun' " * int( bool( begin
and
126 +
"and " * int( bool( ( begin
and firstRun )
and
127 ( end
and lastRun ) ) )
128 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
133 begin = begin, end = end, firstRun = firstRun,
135 if ( firstRun
and lastRun )
and ( firstRun > lastRun ):
136 msg = (
"The lower time/runrange limit ('begin'/'firstRun') "
137 "chosen is greater than the upper time/runrange limit "
138 "('end'/'lastRun').")
140 if self.
predefined()
and (jsonPath
or begin
or end
or firstRun
or lastRun):
141 msg = (
"The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
142 "only work for official datasets, not predefined _cff.py files" )
147 if firstRun
or lastRun
or jsonPath:
148 goodLumiSecStr = (
"lumiSecs = cms.untracked."
149 "VLuminosityBlockRange()\n" )
150 lumiStr =
" lumisToProcess = lumiSecs,\n"
154 selectedRunList = [ run
for run
in selectedRunList \
157 selectedRunList = [ run
for run
in selectedRunList \
159 lumiList = [ str( self.
__findInJson(run,
"run_number") ) +
":1-" \
160 + str( self.
__findInJson(run,
"run_number") ) +
":max" \
161 for run
in selectedRunList ]
166 theLumiList = LumiList ( filename = jsonPath )
170 if theLumiList
is not None:
171 allRuns = theLumiList.getRuns()
174 if firstRun
and int( run ) < firstRun:
175 runsToRemove.append( run )
176 if lastRun
and int( run ) > lastRun:
177 runsToRemove.append( run )
178 theLumiList.removeRuns( runsToRemove )
180 theLumiList.getCMSSWString().
split(
','), 255 ) )
182 with open(jsonPath)
as f:
183 jsoncontents = f.read()
184 if "process.source.lumisToProcess" in jsoncontents:
185 msg =
"%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
186 if firstRun
or lastRun:
187 msg += (
"\n (after applying firstRun and/or lastRun)")
188 msg +=
".\nPlease note that, depending on the format of this file, it may not work as expected."
189 msg +=
"\nCheck your config file to make sure that it worked properly."
193 if firstRun
or lastRun:
196 jsoncontents = re.sub(
"\d+:(\d+|max)-\d+:(\d+|max)", self.
getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
202 lumiSecExtend = jsoncontents
203 splitLumiList = [[
""]]
205 if splitLumiList
and splitLumiList[0]:
206 if splitLumiList[0][0]:
207 lumiSecStr = [
"',\n'".
join( lumis ) \
208 for lumis
in splitLumiList ]
209 lumiSecStr = [
"lumiSecs.extend( [\n'" + lumis +
"'\n] )" \
210 for lumis
in lumiSecStr ]
211 lumiSecExtend =
"\n".
join( lumiSecStr )
216 msg =
"You are trying to run a validation without any runs! Check that:"
217 if firstRun
or lastRun:
218 msg +=
"\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
220 msg +=
"\n - your JSON file is correct for this dataset, and the runs contain data"
221 if (firstRun
or lastRun)
and jsonPath:
222 msg +=
"\n - firstRun and lastRun are consistent with your JSON file"
224 msg = msg.replace(
"firstRun",
"begin")
226 msg = msg.replace(
"lastRun",
"end")
238 fileStr = [
"',\n'".
join( files )
for files
in splitFileList ]
239 fileStr = [
"readFiles.extend( [\n'" + files +
"'\n] )" \
240 for files
in fileStr ]
241 files =
"\n".
join( fileStr )
245 parentFileStr = [
"',\n'".
join( parentFiles )
for parentFiles
in splitParentFileList ]
246 parentFileStr = [
"secFiles.extend( [\n'" + parentFiles +
"'\n] )" \
247 for parentFiles
in parentFileStr ]
248 parentFiles =
"\n".
join( parentFileStr )
249 files +=
"\n\n" + parentFiles
253 theMap[
"files"] = files
254 theMap[
"json"] = jsonPath
255 theMap[
"lumiStr"] = lumiStr
256 theMap[
"goodLumiSecStr"] = goodLumiSecStr%( theMap )
257 theMap[
"lumiSecExtend"] = lumiSecExtend
261 dataset_snippet = self.__source_template%( theMap )
262 return dataset_snippet
265 'Find rightmost value less than x'
266 i = bisect.bisect_left( a, x )
272 'Find leftmost item greater than or equal to x'
273 i = bisect.bisect_left( a, x )
279 if isinstance(strings, str):
280 strings = [ strings ]
282 if len(strings) == 0:
284 if isinstance(jsondict,dict):
285 if strings[0]
in jsondict:
287 return self.
__findInJson(jsondict[strings[0]], strings[1:])
295 except (TypeError, KeyError):
298 raise KeyError(
"Can't find " + strings[0])
301 """s must be in the format run1:lum1-run2:lum2"""
303 run1 = s.split(
"-")[0].
split(
":")[0]
304 lum1 = s.split(
"-")[0].
split(
":")[1]
305 run2 = s.split(
"-")[1].
split(
":")[0]
306 lum2 = s.split(
"-")[1].
split(
":")[1]
307 if int(run2) < firstRun
or int(run1) > lastRun:
309 if int(run1) < firstRun
or firstRun < 0:
312 if int(run2) > lastRun:
319 return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
322 def forcerunrangefunction(s):
324 return forcerunrangefunction
327 dasData = das_client.get_data(
'https://cmsweb.cern.ch',
328 dasQuery, 0, dasLimit,
False )
329 if isinstance(dasData, str):
330 jsondict = json.loads( dasData )
338 if error
or self.
__findInJson(jsondict,
"status") !=
'ok' or "data" not in jsondict:
339 jsonstr = str(jsondict)
340 if len(jsonstr) > 10000:
341 jsonfile =
"das_query_output_%i.txt"
343 while os.path.lexists(jsonfile % i):
345 jsonfile = jsonfile % i
346 theFile = open( jsonfile,
"w" )
347 theFile.write( jsonstr )
349 msg =
"The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
351 msg =
"The DAS query returned a error. Here is the output\n" + jsonstr
352 msg +=
"\nIt's possible that this was a server error. If so, it may work if you try again later"
360 for line
in f.readlines():
361 if line.startswith(
"#data type: "):
362 if datatype
is not None:
364 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
368 dasQuery_type = (
'dataset dataset=%s | grep dataset.datatype,'
369 'dataset.name'%( self.
__name ) )
375 print (
"Cannot find the datatype of the dataset '%s'\n"
376 "It may not be possible to automatically find the magnetic field,\n"
377 "and you will not be able run in CRAB mode"
382 dasQuery =
"parent dataset=" + self.
__name
388 "Here is the DAS output:\n" + str(jsondict) +
389 "\nIt's possible that this was a server error. If so, it may work if you try again later")
392 Bfieldlocation = os.path.join( self.
__cmssw,
"python",
"Configuration",
"StandardSequences" )
393 if not os.path.isdir(Bfieldlocation):
394 Bfieldlocation = os.path.join( self.
__cmsswrelease,
"python",
"Configuration",
"StandardSequences" )
395 Bfieldlist = [ f.replace(
"_cff.py",
'') \
396 for f
in os.listdir(Bfieldlocation) \
397 if f.startswith(
"MagneticField_")
and f.endswith(
"_cff.py") ]
398 Bfieldlist.sort( key =
lambda Bfield: -len(Bfield) )
404 for line
in f.readlines():
405 if line.startswith(
"#data type: "):
406 if datatype
is not None:
408 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
409 datatype = datatype.split(
"#")[0].strip()
410 if line.startswith(
"#magnetic field: "):
411 if Bfield
is not None:
413 Bfield = line.replace(
"#magnetic field: ",
"").
replace(
"\n",
"")
414 Bfield = Bfield.split(
"#")[0].strip()
415 if Bfield
is not None:
416 Bfield = Bfield.split(
",")[0]
417 if Bfield
in Bfieldlist
or Bfield ==
"unknown":
423 elif Bfield ==
"AutoFromDBCurrent":
424 return "MagneticField"
425 elif "MagneticField_" + Bfield
in Bfieldlist:
426 return "MagneticField_" + Bfield
429 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
430 print "Using Bfield='unknown' - this will revert to the default"
432 elif datatype ==
"data":
433 return "MagneticField"
438 return "MagneticField"
440 dasQuery_B = (
'dataset dataset=%s'%( self.
__name ) )
444 Bfield = self.
__findInJson(data, [
"dataset",
"mcm",
"sequences",
"magField"])
445 if Bfield
in Bfieldlist:
447 elif Bfield ==
"38T" or Bfield ==
"38T_PostLS1":
448 return "MagneticField"
449 elif "MagneticField_" + Bfield
in Bfieldlist:
450 return "MagneticField_" + Bfield
454 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
455 print "Using Bfield='unknown' - this will revert to the default magnetic field"
460 for possibleB
in Bfieldlist:
461 if (possibleB !=
"MagneticField"
462 and possibleB.replace(
"MagneticField_",
"")
in self.__name.replace(
"TkAlCosmics0T",
"")):
465 if possibleB ==
"MagneticField_38T" or possibleB ==
"MagneticField_38T_PostLS1":
466 return "MagneticField"
472 """For MC, this returns the same as the previous function.
473 For data, it gets the magnetic field from the runs. This is important for
474 deciding which template to use for offlinevalidation
479 Bfield = self.__magneticField.split(
"T")[0].
replace(
"MagneticField_",
"")
481 return float(Bfield) / 10.0
487 for line
in f.readlines():
488 if line.startswith(
"#magnetic field: ")
and "," in line:
489 if Bfield
is not None:
491 return float(line.replace(
"#magnetic field: ",
"").
split(
",")[1].
split(
"#")[0].strip())
494 dasQuery = (
'run = %s'%run)
499 return "unknown Can't get the magnetic field for run %s from DAS" % run
505 return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
509 if abs(firstrunB - lastrunB) <= tolerance:
510 return .5*(firstrunB + lastrunB)
511 print firstrunB, lastrunB, tolerance
512 return (
"unknown The beginning and end of your run range for %s\n"
513 "have different magnetic fields (%s, %s)!\n"
514 "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
515 "or increasing the tolerance (in dataset.py) from %s.") % (self.
__name, firstrunB, lastrunB, tolerance)
518 if "unknown" in firstrunB:
528 extendstring =
"secFiles.extend"
530 extendstring =
"readFiles.extend"
531 with open(self.__fileName)
as f:
534 for line
in f.readlines():
538 files.append({name: line.translate(
None,
"', " +
'"')})
539 if extendstring
in line
and "[" in line
and "]" not in line:
551 searchdataset = self.
__name
552 dasQuery_files = (
'file dataset=%s | grep file.name, file.nevents, '
553 'file.creation_time, '
554 'file.modification_time'%( searchdataset ) )
555 print "Requesting file information for '%s' from DAS..."%( searchdataset ),
556 data = self.
__getData( dasQuery_files, dasLimit )
558 data = [ self.
__findInJson(entry,
"file")
for entry
in data ]
560 msg = (
"No files are available for the dataset '%s'. This can be "
561 "due to a typo or due to a DAS problem. Please check the "
562 "spelling of the dataset and/or retry to run "
563 "'validateAlignments.py'."%( self.
name() ))
565 fileInformationList = []
570 fileCreationTime = self.
__findInJson(file,
"creation_time")
573 print (
"DAS query gives bad output for file '%s'. Skipping it.\n"
574 "It may work if you try again later.") % fileName
579 fileDict = {
"name": fileName,
580 "creation_time": fileCreationTime,
581 "nevents": fileNEvents
583 fileInformationList.append( fileDict )
584 fileInformationList.sort( key=
lambda info: self.
__findInJson(info,
"name") )
589 return fileInformationList
594 dasQuery_runs = (
'run dataset=%s | grep run.run_number,'
595 'run.creation_time'%( self.
__name ) )
596 print "Requesting run information for '%s' from DAS..."%( self.
__name ),
599 data = [ self.
__findInJson(entry,
"run")
for entry
in data ]
600 data.sort( key =
lambda run: self.
__findInJson(run,
"run_number") )
605 if len(stringForDas) != 8:
606 raise AllInOneError(stringForDas +
" is not a valid date string.\n"
607 +
"DAS accepts dates in the form 'yyyymmdd'")
608 year = stringForDas[:4]
609 month = stringForDas[4:6]
610 day = stringForDas[6:8]
611 return datetime.date(int(year), int(month), int(day))
614 return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
617 firstRun =
None, lastRun =
None,
619 if ( begin
and firstRun )
or ( end
and lastRun ):
620 msg = (
"The Usage of "
621 +
"'begin' & 'firstRun' " * int( bool( begin
and
623 +
"and " * int( bool( ( begin
and firstRun )
and
624 ( end
and lastRun ) ) )
625 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
634 for delta
in [ 1, 5, 10, 20, 30 ]:
637 dasQuery_begin =
"run date between[%s,%s]" % (firstdate, lastdate)
638 begindata = self.
__getData(dasQuery_begin)
639 if len(begindata) > 0:
640 begindata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
644 msg = (
"Your 'begin' is after the creation time of the last "
645 "run in the dataset\n'%s'"%( self.
__name ) )
647 firstRun = runList[runIndex]
652 raise AllInOneError(
"No runs within a reasonable time interval after your 'begin'."
653 "Try using a 'begin' that has runs soon after it (within 2 months at most)")
657 for delta
in [ 1, 5, 10, 20, 30 ]:
660 dasQuery_end =
"run date between[%s,%s]" % (firstdate, lastdate)
663 enddata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
667 msg = (
"Your 'end' is before the creation time of the first "
668 "run in the dataset\n'%s'"%( self.
__name ) )
670 lastRun = runList[runIndex]
675 raise AllInOneError(
"No runs within a reasonable time interval before your 'end'."
676 "Try using an 'end' that has runs soon before it (within 2 months at most)")
679 return firstRun, lastRun
681 return begin, end, firstRun, lastRun
701 def datasetSnippet( self, jsonPath = None, begin = None, end = None,
702 firstRun =
None, lastRun =
None, crab =
False, parent =
False ):
705 if "secFiles.extend" not in f.read():
706 msg = (
"The predefined dataset '%s' does not contain secondary files, "
707 "which your validation requires!") % self.
__name
712 print (
"Retreiving the files from DAS. You will be asked if you want "
713 "to overwrite the old dataset.\n"
714 "It will still be compatible with validations that don't need secondary files.")
719 snippet = (
"process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
720 "process.maxEvents = cms.untracked.PSet(\n"
721 " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
723 "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
727 if "secFiles.extend" in f.read():
728 snippet +=
"\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
730 theMap = {
"process":
"process.",
731 "tab":
" " * len(
"process." ),
732 "nEvents":
".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
733 "skipEventsString":
"process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
745 if jsonPath ==
"" and begin ==
"" and end ==
"" and firstRun ==
"" and lastRun ==
"":
748 except AllInOneError, e:
749 print "Can't store the dataset as a cff:"
751 print "This may be inconvenient in the future, but will not cause a problem for this validation."
752 return datasetSnippet
754 def dump_cff( self, outName = None, jsonPath = None, begin = None,
755 end =
None, firstRun =
None, lastRun =
None, parent =
False ):
760 outName =
"Dataset" + self.__name.replace(
"/",
"_")
761 packageName = os.path.join(
"Alignment",
"OfflineValidation" )
762 if not os.path.exists( os.path.join(
763 self.
__cmssw,
"src", packageName ) ):
764 msg = (
"You try to store the predefined dataset'%s'.\n"
765 "For that you need to check out the package '%s' to your "
766 "private relase area in\n"%( outName, packageName )
769 theMap = {
"process":
"",
771 "nEvents": str( -1 ),
772 "skipEventsString":
"",
773 "importCms":
"import FWCore.ParameterSet.Config as cms\n",
774 "header":
"#Do not delete or (unless you know what you're doing) change these comments\n"
776 "#data type: %(dataType)s\n"
777 "#magnetic field: .oO[magneticField]Oo.\n"
789 if magneticField ==
"MagneticField":
790 magneticField =
"%s, %s #%s" % (magneticField,
792 "Use MagneticField_cff.py; the number is for determining which track selection to use."
794 dataset_cff = dataset_cff.replace(
".oO[magneticField]Oo.",magneticField)
795 filePath = os.path.join( self.
__cmssw,
"src", packageName,
796 "python", outName +
"_cff.py" )
797 if os.path.exists( filePath ):
798 existMsg =
"The predefined dataset '%s' already exists.\n"%( outName )
799 askString =
"Do you want to overwrite it? [y/n]\n"
800 inputQuery = existMsg + askString
802 userInput = raw_input( inputQuery ).lower()
805 elif userInput ==
"n":
808 inputQuery = askString
809 print (
"The predefined dataset '%s' will be stored in the file\n"
812 "\nFor future use you have to do 'scram b'." )
814 theFile = open( filePath,
"w" )
815 theFile.write( dataset_cff )
849 if __name__ ==
'__main__':
850 print "Start testing..."
851 datasetName =
'/MinimumBias/Run2012D-TkAlMinBias-v1/ALCARECO'
852 jsonFile = (
'/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/'
853 'Collisions12/8TeV/Prompt/'
854 'Cert_190456-207898_8TeV_PromptReco_Collisions12_JSON.txt' )
856 print dataset.datasetSnippet( jsonPath = jsonFile,
859 dataset.dump_cff( outName =
"Dataset_Test_TkAlMinBias_Run2012D",
def __getMagneticFieldForRun
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
tuple __dummy_source_template
def getForceRunRangeFunction
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run