10 from FWCore.PythonUtilities.LumiList
import LumiList
11 from TkAlExceptions
import AllInOneError
15 def __init__( self, datasetName, dasLimit = 0, tryPredefinedFirst = True,
16 cmssw = os.environ[
"CMSSW_BASE"], cmsswrelease = os.environ[
"CMSSW_RELEASE_BASE"]):
33 if re.match(
r'/.+/.+/.+', self.
__name ):
35 fileName =
"Dataset" + self.__name.replace(
"/",
"_") +
"_cff.py"
38 fileName = self.
__name +
"_cff.py"
40 searchPath1 = os.path.join( self.
__cmssw,
"python",
41 "Alignment",
"OfflineValidation",
43 searchPath2 = os.path.join( self.
__cmssw,
"src",
44 "Alignment",
"OfflineValidation",
47 "python",
"Alignment",
48 "OfflineValidation", fileName )
51 elif os.path.exists( searchPath1 ):
54 elif os.path.exists( searchPath2 ):
55 msg = (
"The predefined dataset '%s' does exist in '%s', but "
56 "you need to run 'scram b' first."
57 %( self.
__name, searchPath2 ))
60 print "Getting the data from DAS again. To go faster next time, run scram b."
63 elif os.path.exists( searchPath3 ):
69 msg = (
"The predefined dataset '%s' does not exist. Please "
70 "create it first or check for typos."%( self.
__name ))
74 self.
__name =
"Dataset" + self.__name.replace(
"/",
"_")
80 """ Yield successive n-sized chunks from theList.
82 for i
in xrange( 0, len( theList ), n ):
85 __source_template= (
"%(header)s"
87 "import FWCore.PythonUtilities.LumiList as LumiList\n\n"
89 "readFiles = cms.untracked.vstring()\n"
90 "secFiles = cms.untracked.vstring()\n"
91 "%(process)ssource = cms.Source(\"PoolSource\",\n"
93 "%(tab)s secondaryFileNames ="
95 "%(tab)s fileNames = readFiles\n"
99 "%(process)smaxEvents = cms.untracked.PSet( "
100 "input = cms.untracked.int32(%(nEvents)s) )\n"
101 "%(skipEventsString)s\n")
103 __dummy_source_template = (
"readFiles = cms.untracked.vstring()\n"
104 "secFiles = cms.untracked.vstring()\n"
105 "%(process)ssource = cms.Source(\"PoolSource\",\n"
106 "%(tab)s secondaryFileNames ="
108 "%(tab)s fileNames = readFiles\n"
110 "readFiles.extend(['dummy_File.root'])\n"
111 "%(process)smaxEvents = cms.untracked.PSet( "
112 "input = cms.untracked.int32(%(nEvents)s) )\n"
113 "%(skipEventsString)s\n")
116 firstRun =
None, lastRun =
None, repMap =
None,
117 crab =
False, parent =
False ):
119 firstRun = int( firstRun )
121 lastRun = int( lastRun )
122 if ( begin
and firstRun )
or ( end
and lastRun ):
123 msg = (
"The Usage of "
124 +
"'begin' & 'firstRun' " * int( bool( begin
and
126 +
"and " * int( bool( ( begin
and firstRun )
and
127 ( end
and lastRun ) ) )
128 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
133 begin = begin, end = end, firstRun = firstRun,
135 if ( firstRun
and lastRun )
and ( firstRun > lastRun ):
136 msg = (
"The lower time/runrange limit ('begin'/'firstRun') "
137 "chosen is greater than the upper time/runrange limit "
138 "('end'/'lastRun').")
140 if self.
predefined()
and (jsonPath
or begin
or end
or firstRun
or lastRun):
141 msg = (
"The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
142 "only work for official datasets, not predefined _cff.py files" )
147 if firstRun
or lastRun
or jsonPath:
148 goodLumiSecStr = (
"lumiSecs = cms.untracked."
149 "VLuminosityBlockRange()\n" )
150 lumiStr =
" lumisToProcess = lumiSecs,\n"
154 selectedRunList = [ run
for run
in selectedRunList \
157 selectedRunList = [ run
for run
in selectedRunList \
159 lumiList = [ str( self.
__findInJson(run,
"run_number") ) +
":1-" \
160 + str( self.
__findInJson(run,
"run_number") ) +
":max" \
161 for run
in selectedRunList ]
166 theLumiList = LumiList ( filename = jsonPath )
170 if theLumiList
is not None:
171 allRuns = theLumiList.getRuns()
174 if firstRun
and int( run ) < firstRun:
175 runsToRemove.append( run )
176 if lastRun
and int( run ) > lastRun:
177 runsToRemove.append( run )
178 theLumiList.removeRuns( runsToRemove )
180 theLumiList.getCMSSWString().
split(
','), 255 ) )
182 with open(jsonPath)
as f:
183 jsoncontents = f.read()
184 if "process.source.lumisToProcess" in jsoncontents:
185 msg =
"%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
186 if firstRun
or lastRun:
187 msg += (
"\n (after applying firstRun and/or lastRun)")
188 msg +=
".\nPlease note that, depending on the format of this file, it may not work as expected."
189 msg +=
"\nCheck your config file to make sure that it worked properly."
194 if firstRun
or lastRun:
195 jsoncontents = re.sub(
"\d+:(\d+|max)-\d+:(\d+|max)", self.
getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
196 lumiSecExtend = jsoncontents
197 splitLumiList = [[
""]]
199 if not len(splitLumiList[0][0]) == 0:
200 lumiSecStr = [
"',\n'".
join( lumis ) \
201 for lumis
in splitLumiList ]
202 lumiSecStr = [
"lumiSecs.extend( [\n'" + lumis +
"'\n] )" \
203 for lumis
in lumiSecStr ]
204 lumiSecExtend =
"\n".
join( lumiSecStr )
215 fileStr = [
"',\n'".
join( files )
for files
in splitFileList ]
216 fileStr = [
"readFiles.extend( [\n'" + files +
"'\n] )" \
217 for files
in fileStr ]
218 files =
"\n".
join( fileStr )
222 parentFileStr = [
"',\n'".
join( parentFiles )
for parentFiles
in splitParentFileList ]
223 parentFileStr = [
"secFiles.extend( [\n'" + parentFiles +
"'\n] )" \
224 for parentFiles
in parentFileStr ]
225 parentFiles =
"\n".
join( parentFileStr )
226 files +=
"\n\n" + parentFiles
230 theMap[
"files"] = files
231 theMap[
"json"] = jsonPath
232 theMap[
"lumiStr"] = lumiStr
233 theMap[
"goodLumiSecStr"] = goodLumiSecStr%( theMap )
234 theMap[
"lumiSecExtend"] = lumiSecExtend
238 dataset_snippet = self.__source_template%( theMap )
239 return dataset_snippet
242 'Find rightmost value less than x'
243 i = bisect.bisect_left( a, x )
249 'Find leftmost item greater than or equal to x'
250 i = bisect.bisect_left( a, x )
256 if isinstance(strings, str):
257 strings = [ strings ]
259 if len(strings) == 0:
261 if isinstance(jsondict,dict):
262 if strings[0]
in jsondict:
264 return self.
__findInJson(jsondict[strings[0]], strings[1:])
272 except (TypeError, KeyError):
275 raise KeyError(
"Can't find " + strings[0])
278 """s must be in the format run1:lum1-run2:lum2"""
281 run1 = s.split(
"-")[0].
split(
":")[0]
282 lum1 = s.split(
"-")[0].
split(
":")[1]
283 run2 = s.split(
"-")[1].
split(
":")[0]
284 lum2 = s.split(
"-")[1].
split(
":")[1]
285 if int(run2) < firstRun
or int(run1) > lastRun:
287 if int(run1) < firstRun
or firstRun < 0:
290 if int(run2) > lastRun:
297 return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
300 def forcerunrangefunction(s):
302 return forcerunrangefunction
305 dasData = das_client.get_data(
'https://cmsweb.cern.ch',
306 dasQuery, 0, dasLimit,
False )
307 if isinstance(dasData, str):
308 jsondict = json.loads( dasData )
316 if error
or self.
__findInJson(jsondict,
"status") !=
'ok' or "data" not in jsondict:
317 msg = (
"The DAS query returned a error. Here is the output\n" + str(jsondict) +
318 "\nIt's possible that this was a server error. If so, it may work if you try again later")
326 for line
in f.readlines():
327 if line.startswith(
"#data type: "):
328 if datatype
is not None:
330 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
333 dasQuery_type = (
'dataset dataset=%s | grep dataset.datatype,'
334 'dataset.name'%( self.
__name ) )
340 print (
"Cannot find the datatype of the dataset '%s'\n"
341 "It may not be possible to automatically find the magnetic field,\n"
342 "and you will not be able run in CRAB mode"
347 dasQuery =
"parent dataset=" + self.
__name
353 "Here is the DAS output:\n" + str(jsondict) +
354 "\nIt's possible that this was a server error. If so, it may work if you try again later")
357 Bfieldlocation = os.path.join( self.
__cmsswrelease,
"python",
"Configuration",
"StandardSequences" )
358 Bfieldlist = [ f.replace(
"MagneticField_",
'').
replace(
"_cff.py",
'') \
359 for f
in os.listdir(Bfieldlocation) \
360 if f.startswith(
"MagneticField_")
and f.endswith(
"_cff.py")
and f !=
"MagneticField_cff.py" ]
361 Bfieldlist.sort( key =
lambda Bfield: -len(Bfield) )
367 for line
in f.readlines():
368 if line.startswith(
"#data type: "):
369 if datatype
is not None:
371 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
372 if line.startswith(
"#magnetic field: "):
373 if Bfield
is not None:
375 Bfield = line.replace(
"#magnetic field: ",
"").
replace(
"\n",
"")
376 if Bfield
is not None:
377 Bfield = Bfield.split(
",")[0]
378 if Bfield
in Bfieldlist
or Bfield ==
"unknown":
381 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
382 print "Using Bfield='unknown' - this will revert to the default"
384 elif datatype ==
"data":
385 return "AutoFromDBCurrent"
390 return "AutoFromDBCurrent"
392 dasQuery_B = (
'dataset dataset=%s'%( self.
__name ) )
396 Bfield = self.
__findInJson(data, [
"dataset",
"mcm",
"sequences",
"magField"])
397 if Bfield
in Bfieldlist:
402 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
403 print "Using Bfield='unknown' - this will revert to the default magnetic field"
408 for possibleB
in Bfieldlist:
409 if possibleB
in self.__name.replace(
"TkAlCosmics0T",
""):
415 """For MC, this returns the same as the previous function.
416 For data, it gets the magnetic field from the runs. This is important for
417 deciding which template to use for offlinevalidation
420 Bfield = self.__magneticField.split(
"T")[0]
421 return float(Bfield) / 10.0
425 for line
in f.readlines():
426 if line.startswith(
"#magnetic field: ")
and "," in line:
427 if Bfield
is not None:
429 return float(line.replace(
"#magnetic field: ",
"").
split(
",")[1])
432 dasQuery = (
'run = %s'%run)
437 return "unknown Can't get the magnetic field for run %s from DAS" % run
443 return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
447 if abs(firstrunB - lastrunB) <= tolerance:
448 return .5*(firstrunB + lastrunB)
449 print firstrunB, lastrunB, tolerance
450 return (
"unknown The beginning and end of your run range for %s\n"
451 "have different magnetic fields (%s, %s)!\n"
452 "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
453 "or increasing the tolerance (in dataset.py) from %s.") % (self.
__name, firstrunB, lastrunB, tolerance)
455 if "unknown" in firstrunB:
463 extendstring =
"secFiles.extend"
465 extendstring =
"readFiles.extend"
466 with open(self.__fileName)
as f:
469 for line
in f.readlines():
473 files.append({name: line.translate(
None,
"', " +
'"')})
474 if extendstring
in line
and "[" in line
and "]" not in line:
486 searchdataset = self.
__name
487 dasQuery_files = (
'file dataset=%s | grep file.name, file.nevents, '
488 'file.creation_time, '
489 'file.modification_time'%( searchdataset ) )
490 print "Requesting file information for '%s' from DAS..."%( searchdataset ),
491 data = self.
__getData( dasQuery_files, dasLimit )
493 data = [ self.
__findInJson(entry,
"file")
for entry
in data ]
495 msg = (
"No files are available for the dataset '%s'. This can be "
496 "due to a typo or due to a DAS problem. Please check the "
497 "spelling of the dataset and/or retry to run "
498 "'validateAlignments.py'."%( self.
name() ))
500 fileInformationList = []
505 fileCreationTime = self.
__findInJson(file,
"creation_time")
508 print (
"DAS query gives bad output for file '%s'. Skipping it.\n"
509 "It may work if you try again later.") % fileName
514 fileDict = {
"name": fileName,
515 "creation_time": fileCreationTime,
516 "nevents": fileNEvents
518 fileInformationList.append( fileDict )
519 fileInformationList.sort( key=
lambda info: self.
__findInJson(info,
"name") )
524 return fileInformationList
529 dasQuery_runs = (
'run dataset=%s | grep run.run_number,'
530 'run.creation_time'%( self.
__name ) )
531 print "Requesting run information for '%s' from DAS..."%( self.
__name ),
534 data = [ self.
__findInJson(entry,
"run")
for entry
in data ]
535 data.sort( key =
lambda run: self.
__findInJson(run,
"run_number") )
540 if len(stringForDas) != 8:
541 raise AllInOneError(stringForDas +
" is not a valid date string.\n"
542 +
"DAS accepts dates in the form 'yyyymmdd'")
543 year = stringForDas[:4]
544 month = stringForDas[4:6]
545 day = stringForDas[6:8]
546 return datetime.date(int(year), int(month), int(day))
549 return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
552 firstRun =
None, lastRun =
None,
554 if ( begin
and firstRun )
or ( end
and lastRun ):
555 msg = (
"The Usage of "
556 +
"'begin' & 'firstRun' " * int( bool( begin
and
558 +
"and " * int( bool( ( begin
and firstRun )
and
559 ( end
and lastRun ) ) )
560 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
569 for delta
in [ 1, 5, 10, 20, 30 ]:
572 dasQuery_begin =
"run date between[%s,%s]" % (firstdate, lastdate)
573 begindata = self.
__getData(dasQuery_begin)
574 if len(begindata) > 0:
575 begindata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
579 msg = (
"Your 'begin' is after the creation time of the last "
580 "run in the dataset\n'%s'"%( self.
__name ) )
582 firstRun = runList[runIndex]
587 raise AllInOneError(
"No runs within a reasonable time interval after your 'begin'."
588 "Try using a 'begin' that has runs soon after it (within 2 months at most)")
592 for delta
in [ 1, 5, 10, 20, 30 ]:
595 dasQuery_end =
"run date between[%s,%s]" % (firstdate, lastdate)
598 enddata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
602 msg = (
"Your 'end' is before the creation time of the first "
603 "run in the dataset\n'%s'"%( self.
__name ) )
605 lastRun = runList[runIndex]
610 raise AllInOneError(
"No runs within a reasonable time interval before your 'end'."
611 "Try using an 'end' that has runs soon before it (within 2 months at most)")
614 return firstRun, lastRun
616 return begin, end, firstRun, lastRun
636 def datasetSnippet( self, jsonPath = None, begin = None, end = None,
637 firstRun =
None, lastRun =
None, crab =
False, parent =
False ):
640 if "secFiles.extend" not in f.read():
641 msg = (
"The predefined dataset '%s' does not contain secondary files, "
642 "which your validation requires!") % self.
__name
647 print (
"Retreiving the files from DAS. You will be asked if you want "
648 "to overwrite the old dataset.\n"
649 "It will still be compatible with validations that don't need secondary files.")
654 snippet = (
"process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
655 "process.maxEvents = cms.untracked.PSet(\n"
656 " input = cms.untracked.int32(.oO[nEvents]Oo.)\n"
658 "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
662 if "secFiles.extend" in f.read():
663 snippet +=
"\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
665 theMap = {
"process":
"process.",
666 "tab":
" " * len(
"process." ),
667 "nEvents":
".oO[nEvents]Oo.",
668 "skipEventsString":
"process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
680 if jsonPath ==
"" and begin ==
"" and end ==
"" and firstRun ==
"" and lastRun ==
"":
683 except AllInOneError, e:
684 print "Can't store the dataset as a cff:"
686 print "This may be inconvenient in the future, but will not cause a problem for this validation."
687 return datasetSnippet
689 def dump_cff( self, outName = None, jsonPath = None, begin = None,
690 end =
None, firstRun =
None, lastRun =
None, parent =
False ):
695 outName =
"Dataset" + self.__name.replace(
"/",
"_")
696 packageName = os.path.join(
"Alignment",
"OfflineValidation" )
697 if not os.path.exists( os.path.join(
698 self.
__cmssw,
"src", packageName ) ):
699 msg = (
"You try to store the predefined dataset'%s'.\n"
700 "For that you need to check out the package '%s' to your "
701 "private relase area in\n"%( outName, packageName )
704 theMap = {
"process":
"",
706 "nEvents": str( -1 ),
707 "skipEventsString":
"",
708 "importCms":
"import FWCore.ParameterSet.Config as cms\n",
709 "header":
"#Do not delete or (unless you know what you're doing) change these comments\n"
711 "#data type: %(dataType)s\n"
712 "#magnetic field: .oO[magneticField]Oo.\n"
724 if magneticField ==
"AutoFromDBCurrent":
726 dataset_cff = dataset_cff.replace(
".oO[magneticField]Oo.",magneticField)
727 filePath = os.path.join( self.
__cmssw,
"src", packageName,
728 "python", outName +
"_cff.py" )
729 if os.path.exists( filePath ):
730 existMsg =
"The predefined dataset '%s' already exists.\n"%( outName )
731 askString =
"Do you want to overwrite it? [y/n]\n"
732 inputQuery = existMsg + askString
734 userInput = raw_input( inputQuery ).lower()
737 elif userInput ==
"n":
740 inputQuery = askString
741 print (
"The predefined dataset '%s' will be stored in the file\n"
744 "\nFor future use you have to do 'scram b'." )
746 theFile = open( filePath,
"w" )
747 theFile.write( dataset_cff )
781 if __name__ ==
'__main__':
782 print "Start testing..."
783 datasetName =
'/MinimumBias/Run2012D-TkAlMinBias-v1/ALCARECO'
784 jsonFile = (
'/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/'
785 'Collisions12/8TeV/Prompt/'
786 'Cert_190456-207898_8TeV_PromptReco_Collisions12_JSON.txt' )
788 print dataset.datasetSnippet( jsonPath = jsonFile,
791 dataset.dump_cff( outName =
"Dataset_Test_TkAlMinBias_Run2012D",
def __getMagneticFieldForRun
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
tuple __dummy_source_template
def getForceRunRangeFunction
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run