10 from FWCore.PythonUtilities.LumiList
import LumiList
11 from TkAlExceptions
import AllInOneError
15 def __init__( self, datasetName, dasLimit = 0, tryPredefinedFirst = True,
16 cmssw = os.environ[
"CMSSW_BASE"], cmsswrelease = os.environ[
"CMSSW_RELEASE_BASE"]):
33 if re.match(
r'/.+/.+/.+', self.
__name ):
35 fileName =
"Dataset" + self.__name.replace(
"/",
"_") +
"_cff.py"
38 fileName = self.
__name +
"_cff.py"
40 searchPath1 = os.path.join( self.
__cmssw,
"python",
41 "Alignment",
"OfflineValidation",
43 searchPath2 = os.path.join( self.
__cmssw,
"src",
44 "Alignment",
"OfflineValidation",
47 "python",
"Alignment",
48 "OfflineValidation", fileName )
51 elif os.path.exists( searchPath1 ):
54 elif os.path.exists( searchPath2 ):
55 msg = (
"The predefined dataset '%s' does exist in '%s', but "
56 "you need to run 'scram b' first."
57 %( self.
__name, searchPath2 ))
60 print "Getting the data from DAS again. To go faster next time, run scram b."
63 elif os.path.exists( searchPath3 ):
69 msg = (
"The predefined dataset '%s' does not exist. Please "
70 "create it first or check for typos."%( self.
__name ))
74 self.
__name =
"Dataset" + self.__name.replace(
"/",
"_")
80 """ Yield successive n-sized chunks from theList.
82 for i
in xrange( 0, len( theList ), n ):
85 __source_template= (
"%(header)s"
87 "import FWCore.PythonUtilities.LumiList as LumiList\n\n"
89 "readFiles = cms.untracked.vstring()\n"
90 "secFiles = cms.untracked.vstring()\n"
91 "%(process)ssource = cms.Source(\"PoolSource\",\n"
93 "%(tab)s secondaryFileNames ="
95 "%(tab)s fileNames = readFiles\n"
99 "%(process)smaxEvents = cms.untracked.PSet( "
100 "input = cms.untracked.int32(%(nEvents)s) )\n"
101 "%(skipEventsString)s\n")
103 __dummy_source_template = (
"readFiles = cms.untracked.vstring()\n"
104 "secFiles = cms.untracked.vstring()\n"
105 "%(process)ssource = cms.Source(\"PoolSource\",\n"
106 "%(tab)s secondaryFileNames ="
108 "%(tab)s fileNames = readFiles\n"
110 "readFiles.extend(['dummy_File.root'])\n"
111 "%(process)smaxEvents = cms.untracked.PSet( "
112 "input = cms.untracked.int32(%(nEvents)s) )\n"
113 "%(skipEventsString)s\n")
116 firstRun =
None, lastRun =
None, repMap =
None,
117 crab =
False, parent =
False ):
119 firstRun = int( firstRun )
121 lastRun = int( lastRun )
122 if ( begin
and firstRun )
or ( end
and lastRun ):
123 msg = (
"The Usage of "
124 +
"'begin' & 'firstRun' " * int( bool( begin
and
126 +
"and " * int( bool( ( begin
and firstRun )
and
127 ( end
and lastRun ) ) )
128 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
133 begin = begin, end = end, firstRun = firstRun,
135 if ( firstRun
and lastRun )
and ( firstRun > lastRun ):
136 msg = (
"The lower time/runrange limit ('begin'/'firstRun') "
137 "chosen is greater than the upper time/runrange limit "
138 "('end'/'lastRun').")
140 if self.
predefined()
and (jsonPath
or begin
or end
or firstRun
or lastRun):
141 msg = (
"The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
142 "only work for official datasets, not predefined _cff.py files" )
147 if firstRun
or lastRun
or jsonPath:
148 goodLumiSecStr = (
"lumiSecs = cms.untracked."
149 "VLuminosityBlockRange()\n" )
150 lumiStr =
" lumisToProcess = lumiSecs,\n"
154 selectedRunList = [ run
for run
in selectedRunList \
157 selectedRunList = [ run
for run
in selectedRunList \
159 lumiList = [ str( self.
__findInJson(run,
"run_number") ) +
":1-" \
160 + str( self.
__findInJson(run,
"run_number") ) +
":max" \
161 for run
in selectedRunList ]
166 theLumiList = LumiList ( filename = jsonPath )
170 if theLumiList
is not None:
171 allRuns = theLumiList.getRuns()
174 if firstRun
and int( run ) < firstRun:
175 runsToRemove.append( run )
176 if lastRun
and int( run ) > lastRun:
177 runsToRemove.append( run )
178 theLumiList.removeRuns( runsToRemove )
180 theLumiList.getCMSSWString().
split(
','), 255 ) )
182 with open(jsonPath)
as f:
183 jsoncontents = f.read()
184 if "process.source.lumisToProcess" in jsoncontents:
185 msg =
"%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath
186 if firstRun
or lastRun:
187 msg += (
"\n (after applying firstRun and/or lastRun)")
188 msg +=
".\nPlease note that, depending on the format of this file, it may not work as expected."
189 msg +=
"\nCheck your config file to make sure that it worked properly."
193 if firstRun
or lastRun:
196 jsoncontents = re.sub(
"\d+:(\d+|max)-\d+:(\d+|max)", self.
getForceRunRangeFunction(firstRun, lastRun), jsoncontents)
202 lumiSecExtend = jsoncontents
203 splitLumiList = [[
""]]
205 if not len(splitLumiList[0][0]) == 0:
206 lumiSecStr = [
"',\n'".
join( lumis ) \
207 for lumis
in splitLumiList ]
208 lumiSecStr = [
"lumiSecs.extend( [\n'" + lumis +
"'\n] )" \
209 for lumis
in lumiSecStr ]
210 lumiSecExtend =
"\n".
join( lumiSecStr )
223 fileStr = [
"',\n'".
join( files )
for files
in splitFileList ]
224 fileStr = [
"readFiles.extend( [\n'" + files +
"'\n] )" \
225 for files
in fileStr ]
226 files =
"\n".
join( fileStr )
230 parentFileStr = [
"',\n'".
join( parentFiles )
for parentFiles
in splitParentFileList ]
231 parentFileStr = [
"secFiles.extend( [\n'" + parentFiles +
"'\n] )" \
232 for parentFiles
in parentFileStr ]
233 parentFiles =
"\n".
join( parentFileStr )
234 files +=
"\n\n" + parentFiles
238 theMap[
"files"] = files
239 theMap[
"json"] = jsonPath
240 theMap[
"lumiStr"] = lumiStr
241 theMap[
"goodLumiSecStr"] = goodLumiSecStr%( theMap )
242 theMap[
"lumiSecExtend"] = lumiSecExtend
246 dataset_snippet = self.__source_template%( theMap )
247 return dataset_snippet
250 'Find rightmost value less than x'
251 i = bisect.bisect_left( a, x )
257 'Find leftmost item greater than or equal to x'
258 i = bisect.bisect_left( a, x )
264 if isinstance(strings, str):
265 strings = [ strings ]
267 if len(strings) == 0:
269 if isinstance(jsondict,dict):
270 if strings[0]
in jsondict:
272 return self.
__findInJson(jsondict[strings[0]], strings[1:])
280 except (TypeError, KeyError):
283 raise KeyError(
"Can't find " + strings[0])
286 """s must be in the format run1:lum1-run2:lum2"""
288 run1 = s.split(
"-")[0].
split(
":")[0]
289 lum1 = s.split(
"-")[0].
split(
":")[1]
290 run2 = s.split(
"-")[1].
split(
":")[0]
291 lum2 = s.split(
"-")[1].
split(
":")[1]
292 if int(run2) < firstRun
or int(run1) > lastRun:
294 if int(run1) < firstRun
or firstRun < 0:
297 if int(run2) > lastRun:
304 return "%s:%s-%s:%s" % (run1, lum1, run2, lum2)
307 def forcerunrangefunction(s):
309 return forcerunrangefunction
312 dasData = das_client.get_data(
'https://cmsweb.cern.ch',
313 dasQuery, 0, dasLimit,
False )
314 if isinstance(dasData, str):
315 jsondict = json.loads( dasData )
323 if error
or self.
__findInJson(jsondict,
"status") !=
'ok' or "data" not in jsondict:
324 jsonstr = str(jsondict)
325 if len(jsonstr) > 10000:
326 jsonfile =
"das_query_output_%i.txt"
328 while os.path.lexists(jsonfile % i):
330 jsonfile = jsonfile % i
331 theFile = open( jsonfile,
"w" )
332 theFile.write( jsonstr )
334 msg =
"The DAS query returned an error. The output is very long, and has been stored in:\n" + jsonfile
336 msg =
"The DAS query returned a error. Here is the output\n" + jsonstr
337 msg +=
"\nIt's possible that this was a server error. If so, it may work if you try again later"
345 for line
in f.readlines():
346 if line.startswith(
"#data type: "):
347 if datatype
is not None:
349 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
353 dasQuery_type = (
'dataset dataset=%s | grep dataset.datatype,'
354 'dataset.name'%( self.
__name ) )
360 print (
"Cannot find the datatype of the dataset '%s'\n"
361 "It may not be possible to automatically find the magnetic field,\n"
362 "and you will not be able run in CRAB mode"
367 dasQuery =
"parent dataset=" + self.
__name
373 "Here is the DAS output:\n" + str(jsondict) +
374 "\nIt's possible that this was a server error. If so, it may work if you try again later")
377 Bfieldlocation = os.path.join( self.
__cmssw,
"python",
"Configuration",
"StandardSequences" )
378 if not os.path.isdir(Bfieldlocation):
379 Bfieldlocation = os.path.join( self.
__cmsswrelease,
"python",
"Configuration",
"StandardSequences" )
380 Bfieldlist = [ f.replace(
"_cff.py",
'') \
381 for f
in os.listdir(Bfieldlocation) \
382 if f.startswith(
"MagneticField_")
and f.endswith(
"_cff.py") ]
383 Bfieldlist.sort( key =
lambda Bfield: -len(Bfield) )
389 for line
in f.readlines():
390 if line.startswith(
"#data type: "):
391 if datatype
is not None:
393 datatype = line.replace(
"#data type: ",
"").
replace(
"\n",
"")
394 datatype = datatype.split(
"#")[0].strip()
395 if line.startswith(
"#magnetic field: "):
396 if Bfield
is not None:
398 Bfield = line.replace(
"#magnetic field: ",
"").
replace(
"\n",
"")
399 Bfield = Bfield.split(
"#")[0].strip()
400 if Bfield
is not None:
401 Bfield = Bfield.split(
",")[0]
402 if Bfield
in Bfieldlist
or Bfield ==
"unknown":
408 elif Bfield ==
"AutoFromDBCurrent":
409 return "MagneticField"
410 elif "MagneticField_" + Bfield
in Bfieldlist:
411 return "MagneticField_" + Bfield
414 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
415 print "Using Bfield='unknown' - this will revert to the default"
417 elif datatype ==
"data":
418 return "MagneticField"
423 return "MagneticField"
425 dasQuery_B = (
'dataset dataset=%s'%( self.
__name ) )
429 Bfield = self.
__findInJson(data, [
"dataset",
"mcm",
"sequences",
"magField"])
430 if Bfield
in Bfieldlist:
432 elif Bfield ==
"38T" or Bfield ==
"38T_PostLS1":
433 return "MagneticField"
434 elif "MagneticField_" + Bfield
in Bfieldlist:
435 return "MagneticField_" + Bfield
439 print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield
440 print "Using Bfield='unknown' - this will revert to the default magnetic field"
445 for possibleB
in Bfieldlist:
446 if (possibleB !=
"MagneticField"
447 and possibleB.replace(
"MagneticField_",
"")
in self.__name.replace(
"TkAlCosmics0T",
"")):
450 if possibleB ==
"MagneticField_38T" or possibleB ==
"MagneticField_38T_PostLS1":
451 return "MagneticField"
457 """For MC, this returns the same as the previous function.
458 For data, it gets the magnetic field from the runs. This is important for
459 deciding which template to use for offlinevalidation
464 Bfield = self.__magneticField.split(
"T")[0].
replace(
"MagneticField_",
"")
466 return float(Bfield) / 10.0
472 for line
in f.readlines():
473 if line.startswith(
"#magnetic field: ")
and "," in line:
474 if Bfield
is not None:
476 return float(line.replace(
"#magnetic field: ",
"").
split(
",")[1].
split(
"#")[0].strip())
479 dasQuery = (
'run = %s'%run)
484 return "unknown Can't get the magnetic field for run %s from DAS" % run
490 return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS."
494 if abs(firstrunB - lastrunB) <= tolerance:
495 return .5*(firstrunB + lastrunB)
496 print firstrunB, lastrunB, tolerance
497 return (
"unknown The beginning and end of your run range for %s\n"
498 "have different magnetic fields (%s, %s)!\n"
499 "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n"
500 "or increasing the tolerance (in dataset.py) from %s.") % (self.
__name, firstrunB, lastrunB, tolerance)
503 if "unknown" in firstrunB:
513 extendstring =
"secFiles.extend"
515 extendstring =
"readFiles.extend"
516 with open(self.__fileName)
as f:
519 for line
in f.readlines():
523 files.append({name: line.translate(
None,
"', " +
'"')})
524 if extendstring
in line
and "[" in line
and "]" not in line:
536 searchdataset = self.
__name
537 dasQuery_files = (
'file dataset=%s | grep file.name, file.nevents, '
538 'file.creation_time, '
539 'file.modification_time'%( searchdataset ) )
540 print "Requesting file information for '%s' from DAS..."%( searchdataset ),
541 data = self.
__getData( dasQuery_files, dasLimit )
543 data = [ self.
__findInJson(entry,
"file")
for entry
in data ]
545 msg = (
"No files are available for the dataset '%s'. This can be "
546 "due to a typo or due to a DAS problem. Please check the "
547 "spelling of the dataset and/or retry to run "
548 "'validateAlignments.py'."%( self.
name() ))
550 fileInformationList = []
555 fileCreationTime = self.
__findInJson(file,
"creation_time")
558 print (
"DAS query gives bad output for file '%s'. Skipping it.\n"
559 "It may work if you try again later.") % fileName
564 fileDict = {
"name": fileName,
565 "creation_time": fileCreationTime,
566 "nevents": fileNEvents
568 fileInformationList.append( fileDict )
569 fileInformationList.sort( key=
lambda info: self.
__findInJson(info,
"name") )
574 return fileInformationList
579 dasQuery_runs = (
'run dataset=%s | grep run.run_number,'
580 'run.creation_time'%( self.
__name ) )
581 print "Requesting run information for '%s' from DAS..."%( self.
__name ),
584 data = [ self.
__findInJson(entry,
"run")
for entry
in data ]
585 data.sort( key =
lambda run: self.
__findInJson(run,
"run_number") )
590 if len(stringForDas) != 8:
591 raise AllInOneError(stringForDas +
" is not a valid date string.\n"
592 +
"DAS accepts dates in the form 'yyyymmdd'")
593 year = stringForDas[:4]
594 month = stringForDas[4:6]
595 day = stringForDas[6:8]
596 return datetime.date(int(year), int(month), int(day))
599 return str(date.year) + str(date.month).zfill(2) + str(date.day).zfill(2)
602 firstRun =
None, lastRun =
None,
604 if ( begin
and firstRun )
or ( end
and lastRun ):
605 msg = (
"The Usage of "
606 +
"'begin' & 'firstRun' " * int( bool( begin
and
608 +
"and " * int( bool( ( begin
and firstRun )
and
609 ( end
and lastRun ) ) )
610 +
"'end' & 'lastRun' " * int( bool( end
and lastRun ) )
619 for delta
in [ 1, 5, 10, 20, 30 ]:
622 dasQuery_begin =
"run date between[%s,%s]" % (firstdate, lastdate)
623 begindata = self.
__getData(dasQuery_begin)
624 if len(begindata) > 0:
625 begindata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
629 msg = (
"Your 'begin' is after the creation time of the last "
630 "run in the dataset\n'%s'"%( self.
__name ) )
632 firstRun = runList[runIndex]
637 raise AllInOneError(
"No runs within a reasonable time interval after your 'begin'."
638 "Try using a 'begin' that has runs soon after it (within 2 months at most)")
642 for delta
in [ 1, 5, 10, 20, 30 ]:
645 dasQuery_end =
"run date between[%s,%s]" % (firstdate, lastdate)
648 enddata.sort(key =
lambda run: self.
__findInJson(run, [
"run",
"run_number"]))
652 msg = (
"Your 'end' is before the creation time of the first "
653 "run in the dataset\n'%s'"%( self.
__name ) )
655 lastRun = runList[runIndex]
660 raise AllInOneError(
"No runs within a reasonable time interval before your 'end'."
661 "Try using an 'end' that has runs soon before it (within 2 months at most)")
664 return firstRun, lastRun
666 return begin, end, firstRun, lastRun
686 def datasetSnippet( self, jsonPath = None, begin = None, end = None,
687 firstRun =
None, lastRun =
None, crab =
False, parent =
False ):
690 if "secFiles.extend" not in f.read():
691 msg = (
"The predefined dataset '%s' does not contain secondary files, "
692 "which your validation requires!") % self.
__name
697 print (
"Retreiving the files from DAS. You will be asked if you want "
698 "to overwrite the old dataset.\n"
699 "It will still be compatible with validations that don't need secondary files.")
704 snippet = (
"process.load(\"Alignment.OfflineValidation.%s_cff\")\n"
705 "process.maxEvents = cms.untracked.PSet(\n"
706 " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n"
708 "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)"
712 if "secFiles.extend" in f.read():
713 snippet +=
"\nprocess.source.secondaryFileNames = cms.untracked.vstring()"
715 theMap = {
"process":
"process.",
716 "tab":
" " * len(
"process." ),
717 "nEvents":
".oO[nEvents]Oo. / .oO[parallelJobs]Oo.",
718 "skipEventsString":
"process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n",
730 if jsonPath ==
"" and begin ==
"" and end ==
"" and firstRun ==
"" and lastRun ==
"":
733 except AllInOneError, e:
734 print "Can't store the dataset as a cff:"
736 print "This may be inconvenient in the future, but will not cause a problem for this validation."
737 return datasetSnippet
739 def dump_cff( self, outName = None, jsonPath = None, begin = None,
740 end =
None, firstRun =
None, lastRun =
None, parent =
False ):
745 outName =
"Dataset" + self.__name.replace(
"/",
"_")
746 packageName = os.path.join(
"Alignment",
"OfflineValidation" )
747 if not os.path.exists( os.path.join(
748 self.
__cmssw,
"src", packageName ) ):
749 msg = (
"You try to store the predefined dataset'%s'.\n"
750 "For that you need to check out the package '%s' to your "
751 "private relase area in\n"%( outName, packageName )
754 theMap = {
"process":
"",
756 "nEvents": str( -1 ),
757 "skipEventsString":
"",
758 "importCms":
"import FWCore.ParameterSet.Config as cms\n",
759 "header":
"#Do not delete or (unless you know what you're doing) change these comments\n"
761 "#data type: %(dataType)s\n"
762 "#magnetic field: .oO[magneticField]Oo.\n"
774 if magneticField ==
"MagneticField":
775 magneticField =
"%s, %s #%s" % (magneticField,
777 "Use MagneticField_cff.py; the number is for determining which track selection to use."
779 dataset_cff = dataset_cff.replace(
".oO[magneticField]Oo.",magneticField)
780 filePath = os.path.join( self.
__cmssw,
"src", packageName,
781 "python", outName +
"_cff.py" )
782 if os.path.exists( filePath ):
783 existMsg =
"The predefined dataset '%s' already exists.\n"%( outName )
784 askString =
"Do you want to overwrite it? [y/n]\n"
785 inputQuery = existMsg + askString
787 userInput = raw_input( inputQuery ).lower()
790 elif userInput ==
"n":
793 inputQuery = askString
794 print (
"The predefined dataset '%s' will be stored in the file\n"
797 "\nFor future use you have to do 'scram b'." )
799 theFile = open( filePath,
"w" )
800 theFile.write( dataset_cff )
834 if __name__ ==
'__main__':
835 print "Start testing..."
836 datasetName =
'/MinimumBias/Run2012D-TkAlMinBias-v1/ALCARECO'
837 jsonFile = (
'/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/'
838 'Collisions12/8TeV/Prompt/'
839 'Cert_190456-207898_8TeV_PromptReco_Collisions12_JSON.txt' )
841 print dataset.datasetSnippet( jsonPath = jsonFile,
844 dataset.dump_cff( outName =
"Dataset_Test_TkAlMinBias_Run2012D",
def __getMagneticFieldForRun
Abs< T >::type abs(const T &t)
static std::string join(char **cmd)
tuple __dummy_source_template
def getForceRunRangeFunction
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run