Public Member Functions | |
def | __init__ |
def | convertTimeToRun |
def | datasetSnippet |
def | dataType |
def | dump_cff |
def | fileInfoList |
def | fileList |
def | name |
def | predefined |
def | runList |
Private Member Functions | |
def | __chunks |
def | __createSnippet |
def | __find_ge |
def | __find_lt |
def | __getData |
def | __getDataType |
def | __getFileInfoList |
def | __getRunList |
Private Attributes | |
__dasLimit | |
__dataType | |
__fileInfoList | |
__fileList | |
__name | |
__predefined | |
__runList | |
Static Private Attributes | |
tuple | __dummy_source_template |
Definition at line 13 of file dataset.py.
def dataset::Dataset::__init__ | ( | self, | |
datasetName, | |||
dasLimit = 0 |
|||
) |
Definition at line 14 of file dataset.py.
00015 : 00016 self.__name = datasetName 00017 # check, if dataset name matches CMS dataset naming scheme 00018 if re.match( r'/.+/.+/.+', self.__name ): 00019 self.__dataType = self.__getDataType() 00020 self.__predefined = False 00021 else: 00022 fileName = self.__name + "_cff.py" 00023 searchPath1 = os.path.join( os.environ["CMSSW_BASE"], "python", 00024 "Alignment", "OfflineValidation", 00025 fileName ) 00026 searchPath2 = os.path.join( os.environ["CMSSW_BASE"], "src", 00027 "Alignment", "OfflineValidation", 00028 "python", fileName ) 00029 searchPath3 = os.path.join( os.environ["CMSSW_RELEASE_BASE"], 00030 "python", "Alignment", 00031 "OfflineValidation", fileName ) 00032 if os.path.exists( searchPath1 ): 00033 pass 00034 elif os.path.exists( searchPath2 ): 00035 msg = ("The predefined dataset '%s' does exist in '%s', but " 00036 "you need to run 'scram b' first." 00037 %( self.__name, searchPath2 )) 00038 raise AllInOneError( msg ) 00039 elif os.path.exists( searchPath3 ): 00040 pass 00041 else: 00042 msg = ("The predefined dataset '%s' does not exist. Please " 00043 "create it first or check for typos."%( self.__name )) 00044 raise AllInOneError( msg ) 00045 self.__dataType = "unknown" 00046 self.__predefined = True 00047 self.__dasLimit = dasLimit 00048 self.__fileList = None 00049 self.__fileInfoList = None 00050 self.__runList = None
def dataset::Dataset::__chunks | ( | self, | |
theList, | |||
n | |||
) | [private] |
Yield successive n-sized chunks from theList.
Definition at line 51 of file dataset.py.
def dataset::Dataset::__createSnippet | ( | self, | |
jsonPath = None , |
|||
begin = None , |
|||
end = None , |
|||
firstRun = None , |
|||
lastRun = None , |
|||
repMap = None , |
|||
crab = False |
|||
) | [private] |
Definition at line 57 of file dataset.py.
00060 : 00061 if firstRun: 00062 firstRun = int( firstRun ) 00063 if lastRun: 00064 lastRun = int( lastRun ) 00065 if ( begin and firstRun ) or ( end and lastRun ): 00066 msg = ( "The Usage of " 00067 + "'begin' & 'firstRun' " * int( bool( begin and 00068 firstRun ) ) 00069 + "and " * int( bool( ( begin and firstRun ) and 00070 ( end and lastRun ) ) ) 00071 + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) 00072 + "is ambigous." ) 00073 raise AllInOneError( msg ) 00074 if begin or end: 00075 ( firstRun, lastRun ) = self.convertTimeToRun( 00076 begin = begin, end = end, firstRun = firstRun, 00077 lastRun = lastRun ) 00078 if ( firstRun and lastRun ) and ( firstRun > lastRun ): 00079 msg = ( "The lower time/runrange limit ('begin'/'firstRun') " 00080 "chosen is greater than the upper time/runrange limit " 00081 "('end'/'lastRun').") 00082 raise AllInOneError( msg ) 00083 goodLumiSecStr = "" 00084 lumiStr = "" 00085 lumiSecExtend = "" 00086 if firstRun or lastRun: 00087 goodLumiSecStr = ( "lumiSecs = cms.untracked." 00088 "VLuminosityBlockRange()\n" ) 00089 lumiStr = " lumisToProcess = lumiSecs,\n" 00090 if not jsonPath: 00091 selectedRunList = self.__getRunList() 00092 if firstRun: 00093 selectedRunList = [ run for run in selectedRunList \ 00094 if run["run_number"] >= firstRun ] 00095 if lastRun: 00096 selectedRunList = [ run for run in selectedRunList \ 00097 if run["run_number"] <= lastRun ] 00098 lumiList = [ str( run["run_number"] ) + ":1-" \ 00099 + str( run["run_number"] ) + ":max" \ 00100 for run in selectedRunList ] 00101 splitLumiList = list( self.__chunks( lumiList, 255 ) ) 00102 else: 00103 theLumiList = LumiList ( filename = jsonPath ) 00104 allRuns = theLumiList.getRuns() 00105 runsToRemove = [] 00106 for run in allRuns: 00107 if firstRun and int( run ) < firstRun: 00108 runsToRemove.append( run ) 00109 if lastRun and int( run ) > lastRun: 00110 runsToRemove.append( run ) 00111 theLumiList.removeRuns( runsToRemove ) 00112 splitLumiList = list( self.__chunks( 00113 theLumiList.getCMSSWString().split(','), 255 ) ) 00114 if not len(splitLumiList[0][0]) == 0: 00115 lumiSecStr = [ "',\n'".join( lumis ) \ 00116 for lumis in splitLumiList ] 00117 lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ 00118 for lumis in lumiSecStr ] 00119 lumiSecExtend = "\n".join( lumiSecStr ) 00120 elif jsonPath: 00121 goodLumiSecStr = ( "goodLumiSecs = LumiList.LumiList(filename" 00122 "= '%(json)s').getCMSSWString().split(',')\n" 00123 "lumiSecs = cms.untracked" 00124 ".VLuminosityBlockRange()\n" 00125 ) 00126 lumiStr = " lumisToProcess = lumiSecs,\n" 00127 lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n" 00128 if crab: 00129 files = "" 00130 else: 00131 splitFileList = list( self.__chunks( self.fileList(), 255 ) ) 00132 fileStr = [ "',\n'".join( files ) for files in splitFileList ] 00133 fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ 00134 for files in fileStr ] 00135 files = "\n".join( fileStr ) 00136 theMap = repMap 00137 theMap["files"] = files 00138 theMap["json"] = jsonPath 00139 theMap["lumiStr"] = lumiStr 00140 theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap ) 00141 theMap["lumiSecExtend"] = lumiSecExtend 00142 if crab: 00143 dataset_snippet = self.__dummy_source_template%( theMap ) 00144 else: 00145 dataset_snippet = self.__source_template%( theMap ) 00146 return dataset_snippet
def dataset::Dataset::__find_ge | ( | self, | |
a, | |||
x | |||
) | [private] |
Definition at line 165 of file dataset.py.
def dataset::Dataset::__find_lt | ( | self, | |
a, | |||
x | |||
) | [private] |
Definition at line 158 of file dataset.py.
def dataset::Dataset::__getData | ( | self, | |
dasQuery, | |||
dasLimit = 0 |
|||
) | [private] |
Definition at line 172 of file dataset.py.
00173 : 00174 dasData = das_client.get_data( 'https://cmsweb.cern.ch', 00175 dasQuery, 0, dasLimit, False ) 00176 jsondict = json.loads( dasData ) 00177 # Check, if the DAS query fails 00178 if jsondict["status"] != 'ok': 00179 msg = "Status not 'ok', but:", jsondict["status"] 00180 raise AllInOneError(msg) 00181 return jsondict["data"]
def dataset::Dataset::__getDataType | ( | self | ) | [private] |
Definition at line 182 of file dataset.py.
def dataset::Dataset::__getFileInfoList | ( | self, | |
dasLimit | |||
) | [private] |
Definition at line 188 of file dataset.py.
00189 : 00190 if self.__fileInfoList: 00191 return self.__fileInfoList 00192 dasQuery_files = ( 'file dataset=%s | grep file.name, file.nevents, ' 00193 'file.creation_time, ' 00194 'file.modification_time'%( self.__name ) ) 00195 print "Requesting file information for '%s' from DAS..."%( self.__name ), 00196 data = self.__getData( dasQuery_files, dasLimit ) 00197 print "Done." 00198 data = [ entry["file"] for entry in data ] 00199 if len( data ) == 0: 00200 msg = ("No files are available for the dataset '%s'. This can be " 00201 "due to a typo or due to a DAS problem. Please check the " 00202 "spelling of the dataset and/or retry to run " 00203 "'validateAlignments.py'."%( self.name() )) 00204 raise AllInOneError( msg ) 00205 fileInformationList = [] 00206 for file in data: 00207 fileName = file[0]["name"] 00208 fileCreationTime = file[0]["creation_time"] 00209 for ii in range(3): 00210 try: 00211 fileNEvents = file[ii]["nevents"] 00212 except KeyError: 00213 continue 00214 break 00215 # select only non-empty files 00216 if fileNEvents == 0: 00217 continue 00218 fileDict = { "name": fileName, 00219 "creation_time": fileCreationTime, 00220 "nevents": fileNEvents 00221 } 00222 fileInformationList.append( fileDict ) 00223 fileInformationList.sort( key=lambda info: info["name"] ) 00224 return fileInformationList
def dataset::Dataset::__getRunList | ( | self | ) | [private] |
Definition at line 225 of file dataset.py.
00226 : 00227 if self.__runList: 00228 return self.__runList 00229 dasQuery_runs = ( 'run dataset=%s | grep run.run_number,' 00230 'run.creation_time'%( self.__name ) ) 00231 print "Requesting run information for '%s' from DAS..."%( self.__name ), 00232 data = self.__getData( dasQuery_runs ) 00233 print "Done." 00234 data = [ entry["run"][0] for entry in data ] 00235 data.sort( key = lambda run: run["creation_time"] ) 00236 self.__runList = data 00237 return data
def dataset::Dataset::convertTimeToRun | ( | self, | |
begin = None , |
|||
end = None , |
|||
firstRun = None , |
|||
lastRun = None , |
|||
shortTuple = True |
|||
) |
Definition at line 254 of file dataset.py.
00257 : 00258 if ( begin and firstRun ) or ( end and lastRun ): 00259 msg = ( "The Usage of " 00260 + "'begin' & 'firstRun' " * int( bool( begin and 00261 firstRun ) ) 00262 + "and " * int( bool( ( begin and firstRun ) and 00263 ( end and lastRun ) ) ) 00264 + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) 00265 + "is ambigous." ) 00266 raise AllInOneError( msg ) 00267 00268 runList = [ run["run_number"] for run in self.__getRunList() ] 00269 runTimeList = [ run["creation_time"] for run in self.__getRunList() ] 00270 if begin: 00271 try: 00272 runIndex = self.__find_ge( runTimeList, begin ) 00273 except ValueError: 00274 msg = ( "Your 'begin' is after the creation time of the last " 00275 "run in the dataset\n'%s'"%( self.__name ) ) 00276 raise AllInOneError( msg ) 00277 firstRun = runList[runIndex] 00278 begin = None 00279 if end: 00280 try: 00281 runIndex = self.__find_lt( runTimeList, end ) 00282 except ValueError: 00283 msg = ( "Your 'end' is before the creation time of the first " 00284 "run in the dataset\n'%s'"%( self.__name ) ) 00285 raise AllInOneError( msg ) 00286 lastRun = runList[runIndex] 00287 end = None 00288 if shortTuple: 00289 return firstRun, lastRun 00290 else: 00291 return begin, end, firstRun, lastRun
def dataset::Dataset::datasetSnippet | ( | self, | |
jsonPath = None , |
|||
begin = None , |
|||
end = None , |
|||
firstRun = None , |
|||
lastRun = None , |
|||
nEvents = None , |
|||
crab = False |
|||
) |
Definition at line 295 of file dataset.py.
00298 : 00299 if self.__predefined: 00300 return ("process.load(\"Alignment.OfflineValidation.%s_cff\")\n" 00301 "process.maxEvents = cms.untracked.PSet(\n" 00302 " input = cms.untracked.int32(%s)\n" 00303 ")" 00304 %( self.__name, nEvents )) 00305 theMap = { "process": "process.", 00306 "tab": " " * len( "process." ), 00307 "nEvents": str( nEvents ), 00308 "importCms": "" 00309 } 00310 datasetSnippet = self.__createSnippet( jsonPath = jsonPath, 00311 begin = begin, 00312 end = end, 00313 firstRun = firstRun, 00314 lastRun = lastRun, 00315 repMap = theMap, 00316 crab = crab ) 00317 return datasetSnippet
def dataset::Dataset::dataType | ( | self | ) |
Definition at line 292 of file dataset.py.
def dataset::Dataset::dump_cff | ( | self, | |
outName = None , |
|||
jsonPath = None , |
|||
begin = None , |
|||
end = None , |
|||
firstRun = None , |
|||
lastRun = None |
|||
) |
Definition at line 318 of file dataset.py.
00320 : 00321 if outName == None: 00322 outName = "Dataset" 00323 packageName = os.path.join( "Alignment", "OfflineValidation" ) 00324 if not os.path.exists( os.path.join( 00325 os.environ["CMSSW_BASE"], "src", packageName ) ): 00326 msg = ("You try to store the predefined dataset'%s'.\n" 00327 "For that you need to check out the package '%s' to your " 00328 "private relase area in\n"%( outName, packageName ) 00329 + os.environ["CMSSW_BASE"] ) 00330 raise AllInOneError( msg ) 00331 theMap = { "process": "", 00332 "tab": "", 00333 "nEvents": str( -1 ), 00334 "importCms": "import FWCore.ParameterSet.Config as cms\n" } 00335 dataset_cff = self.__createSnippet( jsonPath = jsonPath, 00336 begin = begin, 00337 end = end, 00338 firstRun = firstRun, 00339 lastRun = lastRun, 00340 repMap = theMap) 00341 filePath = os.path.join( os.environ["CMSSW_BASE"], "src", packageName, 00342 "python", outName + "_cff.py" ) 00343 if os.path.exists( filePath ): 00344 existMsg = "The predefined dataset '%s' already exists.\n"%( outName ) 00345 askString = "Do you want to overwrite it? [y/n]\n" 00346 inputQuery = existMsg + askString 00347 while True: 00348 userInput = raw_input( inputQuery ).lower() 00349 if userInput == "y": 00350 break 00351 elif userInput == "n": 00352 return 00353 else: 00354 inputQuery = askString 00355 print ( "The predefined dataset '%s' will be stored in the file\n" 00356 %( outName ) 00357 + filePath + 00358 "\nFor future use you have to do 'scram b'." ) 00359 print 00360 theFile = open( filePath, "w" ) 00361 theFile.write( dataset_cff ) 00362 theFile.close() 00363 return
def dataset::Dataset::fileInfoList | ( | self | ) |
Definition at line 372 of file dataset.py.
def dataset::Dataset::fileList | ( | self | ) |
Definition at line 364 of file dataset.py.
def dataset::Dataset::name | ( | self | ) |
Definition at line 375 of file dataset.py.
def dataset::Dataset::predefined | ( | self | ) |
Definition at line 378 of file dataset.py.
def dataset::Dataset::runList | ( | self | ) |
Definition at line 381 of file dataset.py.
dataset::Dataset::__dasLimit [private] |
Definition at line 14 of file dataset.py.
dataset::Dataset::__dataType [private] |
Definition at line 14 of file dataset.py.
tuple dataset::Dataset::__dummy_source_template [static, private] |
("%(process)smaxEvents = cms.untracked.PSet( " "input = cms.untracked.int32(%(nEvents)s) )\n" "readFiles = cms.untracked.vstring()\n" "secFiles = cms.untracked.vstring()\n" "%(process)ssource = cms.Source(\"PoolSource\",\n" "%(tab)s secondaryFileNames =" "secFiles,\n" "%(tab)s fileNames = readFiles\n" ")\n" "readFiles.extend(['dummy_File.root'])\n")
Definition at line 147 of file dataset.py.
dataset::Dataset::__fileInfoList [private] |
Definition at line 14 of file dataset.py.
dataset::Dataset::__fileList [private] |
Definition at line 14 of file dataset.py.
dataset::Dataset::__name [private] |
Definition at line 14 of file dataset.py.
dataset::Dataset::__predefined [private] |
Definition at line 14 of file dataset.py.
dataset::Dataset::__runList [private] |
Definition at line 14 of file dataset.py.