------------------------------------------------------ Automatic pick-up of RelVal input files ------------------------------------------------------ More...
Inherits FWCore::GuiBrowsers::ConfigToolBase::ConfigToolBase.
Public Member Functions | |
def | __call__ |
def | __init__ |
def | apply |
def | getDefaultParameters |
def | messageEmptyList |
Private Attributes | |
_comment | |
_parameters | |
Static Private Attributes | |
tuple | _defaultParameters = dicttypes.SortedKeysDict() |
string | _label = 'pickRelValInputFiles' |
------------------------------------------------------ Automatic pick-up of RelVal input files ------------------------------------------------------
Picks up RelVal input files automatically and returns a vector of strings with the paths to be used in [PoolSource].fileNames PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug ) - useDAS : switch to perform query in DAS rather than in DBS optional; default: False - cmsswVersion : CMSSW release to pick up the RelVal files from optional; default: the current release (determined automatically from environment) - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from applies also, if 'cmsswVersion' is set explicitly optional; default: False - relVal : RelVal sample to be used optional; default: 'RelValTTbar' - dataTier : data tier to be used optional; default: 'GEN-SIM-RECO' - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py possibly overwritten, if 'globalTag' is set explicitly optional; default: 'startup' - globalTag : name of GlobalTag as it is used in the data path of the RelVals optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in. !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path, but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B' - maxVersions : max. versioning number of RelVal to check optional; default: 9 - skipFiles : number of files to skip for a found RelVal sample optional; default: 0 - numberOfFiles: number of files to pick up setting it to negative values, returns all found ('skipFiles' remains active though) optional; default: -1 - debug : switch to enable enhanced messages in 'stdout' optional; default: False
Definition at line 55 of file cmsswVersionTools.py.
def cmsswVersionTools::PickRelValInputFiles::__init__ | ( | self | ) |
Definition at line 95 of file cmsswVersionTools.py.
00096 : 00097 ConfigToolBase.__init__( self ) 00098 self.addParameter( self._defaultParameters, 'useDAS' , False , '' ) 00099 self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' ) 00100 self.addParameter( self._defaultParameters, 'formerVersion', False , '' ) 00101 self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' ) 00102 self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' ) 00103 self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' ) 00104 self.addParameter( self._defaultParameters, 'globalTag' , autoCond[ self.getDefaultParameters()[ 'condition' ].value ][ : -5 ], 'auto from \'condition\'' ) 00105 self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' ) 00106 self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' ) 00107 self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' ) 00108 self.addParameter( self._defaultParameters, 'debug' , False , '' ) 00109 self._parameters = copy.deepcopy( self._defaultParameters ) 00110 self._comment = ""
def cmsswVersionTools::PickRelValInputFiles::__call__ | ( | self, | |
useDAS = None , |
|||
cmsswVersion = None , |
|||
formerVersion = None , |
|||
relVal = None , |
|||
dataTier = None , |
|||
condition = None , |
|||
globalTag = None , |
|||
maxVersions = None , |
|||
skipFiles = None , |
|||
numberOfFiles = None , |
|||
debug = None |
|||
) |
Definition at line 111 of file cmsswVersionTools.py.
00124 : 00125 if useDAS is None: 00126 useDAS = self.getDefaultParameters()[ 'useDAS' ].value 00127 if cmsswVersion is None: 00128 cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value 00129 if formerVersion is None: 00130 formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value 00131 if relVal is None: 00132 relVal = self.getDefaultParameters()[ 'relVal' ].value 00133 if dataTier is None: 00134 dataTier = self.getDefaultParameters()[ 'dataTier' ].value 00135 if condition is None: 00136 condition = self.getDefaultParameters()[ 'condition' ].value 00137 if globalTag is None: 00138 globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition' 00139 if maxVersions is None: 00140 maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value 00141 if skipFiles is None: 00142 skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value 00143 if numberOfFiles is None: 00144 numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value 00145 if debug is None: 00146 debug = self.getDefaultParameters()[ 'debug' ].value 00147 self.setParameter( 'useDAS' , useDAS ) 00148 self.setParameter( 'cmsswVersion' , cmsswVersion ) 00149 self.setParameter( 'formerVersion', formerVersion ) 00150 self.setParameter( 'relVal' , relVal ) 00151 self.setParameter( 'dataTier' , dataTier ) 00152 self.setParameter( 'condition' , condition ) 00153 self.setParameter( 'globalTag' , globalTag ) 00154 self.setParameter( 'maxVersions' , maxVersions ) 00155 self.setParameter( 'skipFiles' , skipFiles ) 00156 self.setParameter( 'numberOfFiles', numberOfFiles ) 00157 self.setParameter( 'debug' , debug ) 00158 return self.apply()
def cmsswVersionTools::PickRelValInputFiles::apply | ( | self | ) |
Definition at line 163 of file cmsswVersionTools.py.
00164 : 00165 useDAS = self._parameters[ 'useDAS' ].value 00166 cmsswVersion = self._parameters[ 'cmsswVersion' ].value 00167 formerVersion = self._parameters[ 'formerVersion' ].value 00168 relVal = self._parameters[ 'relVal' ].value 00169 dataTier = self._parameters[ 'dataTier' ].value 00170 condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given 00171 globalTag = self._parameters[ 'globalTag' ].value 00172 maxVersions = self._parameters[ 'maxVersions' ].value 00173 skipFiles = self._parameters[ 'skipFiles' ].value 00174 numberOfFiles = self._parameters[ 'numberOfFiles' ].value 00175 debug = self._parameters[ 'debug' ].value 00176 00177 filePaths = [] 00178 00179 # Determine corresponding CMSSW version for RelVals 00180 preId = '_pre' 00181 patchId = '_patch' # patch releases 00182 hltPatchId = '_hltpatch' # HLT patch releases 00183 dqmPatchId = '_dqmpatch' # DQM patch releases 00184 slhcId = '_SLHC' # SLHC releases 00185 rootId = '_root' # ROOT test releases 00186 ibId = '_X_' # IBs 00187 if patchId in cmsswVersion: 00188 cmsswVersion = cmsswVersion.split( patchId )[ 0 ] 00189 elif hltPatchId in cmsswVersion: 00190 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ] 00191 elif dqmPatchId in cmsswVersion: 00192 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ] 00193 elif rootId in cmsswVersion: 00194 cmsswVersion = cmsswVersion.split( rootId )[ 0 ] 00195 elif slhcId in cmsswVersion: 00196 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ] 00197 elif ibId in cmsswVersion or formerVersion: 00198 outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate() 00199 if len( outputTuple[ 1 ] ) != 0: 00200 print '%s INFO : SCRAM error'%( self._label ) 00201 if debug: 00202 print ' from trying to determine last valid releases before \'%s\''%( cmsswVersion ) 00203 print 00204 print outputTuple[ 1 ] 00205 print 00206 self.messageEmptyList() 00207 return filePaths 00208 versions = { 'last' :'' 00209 , 'lastToLast':'' 00210 } 00211 for line in outputTuple[ 0 ].splitlines(): 00212 version = line.split()[ 1 ] 00213 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version: 00214 if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ): 00215 versions[ 'lastToLast' ] = versions[ 'last' ] 00216 versions[ 'last' ] = version 00217 if version == cmsswVersion: 00218 break 00219 # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1') 00220 if formerVersion: 00221 # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0 00222 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ): 00223 versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-) 00224 # Use pre-release as "former version" for CMSSW_X_Y_0 00225 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ): 00226 versions[ 'lastToLast' ] = '' 00227 for line in outputTuple[ 0 ].splitlines(): 00228 version = line.split()[ 1 ] 00229 versionParts = version.partition( preId ) 00230 if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId: 00231 versions[ 'lastToLast' ] = version 00232 elif versions[ 'lastToLast' ] != '': 00233 break 00234 # Don't use CMSSW_X_Y_0 as "former version" for pre-releases 00235 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ): 00236 versions[ 'lastToLast' ] = '' # no alternative :-( 00237 cmsswVersion = versions[ 'lastToLast' ] 00238 else: 00239 cmsswVersion = versions[ 'last' ] 00240 00241 # Debugging output 00242 if debug: 00243 print '%s DEBUG: Called with...'%( self._label ) 00244 for key in self._parameters.keys(): 00245 print ' %s:\t'%( key ), 00246 print self._parameters[ key ].value, 00247 if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value: 00248 print ' (default)' 00249 else: 00250 print 00251 if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value: 00252 if formerVersion: 00253 print ' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ) 00254 else: 00255 print ' ==> modified to last valid release %s'%( cmsswVersion ) 00256 00257 # Check domain 00258 domain = socket.getfqdn().split( '.' ) 00259 domainSE = '' 00260 if len( domain ) == 0: 00261 print '%s INFO : Cannot determine domain of this computer'%( self._label ) 00262 if debug: 00263 self.messageEmptyList() 00264 return filePaths 00265 elif os.uname()[0] == "Darwin": 00266 print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ) 00267 if debug: 00268 self.messageEmptyList() 00269 return filePaths 00270 elif len( domain ) == 1: 00271 print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ) 00272 if debug: 00273 self.messageEmptyList() 00274 return filePaths 00275 if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ): 00276 print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ) 00277 if debug: 00278 self.messageEmptyList() 00279 return filePaths 00280 if domain[ -2 ] == 'cern': 00281 domainSE = 'T2_CH_CERN' 00282 elif domain[ -2 ] == 'fnal': 00283 domainSE = 'T1_US_FNAL_MSS' 00284 if debug: 00285 print '%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ) 00286 print '%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ) 00287 00288 # Find files 00289 validVersion = 0 00290 dataset = '' 00291 datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier ) 00292 if useDAS: 00293 if debug: 00294 print '%s DEBUG: Using DAS query'%( self._label ) 00295 dasLimit = numberOfFiles 00296 if dasLimit <= 0: 00297 dasLimit += 1 00298 for version in range( maxVersions, 0, -1 ): 00299 filePaths = [] 00300 filePathsTmp = [] 00301 fileCount = 0 00302 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) 00303 dasQuery = 'file dataset=%s | grep file.name'%( dataset ) 00304 if debug: 00305 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) 00306 print ' \'%s\''%( dasQuery ) 00307 # partially stolen from das_client.py for option '--format=plain', needs filter ("grep") in the query 00308 dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False ) 00309 jsondict = json.loads( dasData ) 00310 if debug: 00311 print '%s DEBUG: Received DAS data:'%( self._label ) 00312 print ' \'%s\''%( dasData ) 00313 print '%s DEBUG: Determined JSON dictionary:'%( self._label ) 00314 print ' \'%s\''%( jsondict ) 00315 if jsondict[ 'status' ] != 'ok': 00316 print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, dasData) 00317 exit( 1 ) 00318 mongo_query = jsondict[ 'mongo_query' ] 00319 filters = mongo_query[ 'filters' ] 00320 data = jsondict[ 'data' ] 00321 if debug: 00322 print '%s DEBUG: Query in JSON dictionary:'%( self._label ) 00323 print ' \'%s\''%( mongo_query ) 00324 print '%s DEBUG: Filters in query:'%( self._label ) 00325 print ' \'%s\''%( filters ) 00326 print '%s DEBUG: Data in JSON dictionary:'%( self._label ) 00327 print ' \'%s\''%( data ) 00328 for row in data: 00329 filePath = [ r for r in das_client.get_value( row, filters ) ][ 0 ] 00330 if debug: 00331 print '%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ) 00332 if len( filePath ) > 0: 00333 if validVersion != version: 00334 dasTest = das_client.get_data( 'https://cmsweb.cern.ch', 'site dataset=%s | grep site.name'%( dataset ), 0, 999, False ) 00335 jsontestdict = json.loads( dasTest ) 00336 mongo_testquery = jsontestdict[ 'mongo_query' ] 00337 testfilters = mongo_testquery[ 'filters' ] 00338 testdata = jsontestdict[ 'data' ] 00339 if debug: 00340 print '%s DEBUG: Received DAS data (site test):'%( self._label ) 00341 print ' \'%s\''%( dasTest ) 00342 print '%s DEBUG: Determined JSON dictionary (site test):'%( self._label ) 00343 print ' \'%s\''%( jsontestdict ) 00344 print '%s DEBUG: Query in JSON dictionary (site test):'%( self._label ) 00345 print ' \'%s\''%( mongo_testquery ) 00346 print '%s DEBUG: Filters in query (site test):'%( self._label ) 00347 print ' \'%s\''%( testfilters ) 00348 print '%s DEBUG: Data in JSON dictionary (site test):'%( self._label ) 00349 print ' \'%s\''%( testdata ) 00350 foundSE = False 00351 for testrow in testdata: 00352 siteName = [ tr for tr in das_client.get_value( testrow, testfilters ) ][ 0 ] 00353 if siteName == domainSE: 00354 foundSE = True 00355 break 00356 if not foundSE: 00357 if debug: 00358 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) 00359 break 00360 validVersion = version 00361 if debug: 00362 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) 00363 if numberOfFiles == 0: 00364 break 00365 # protect from double entries ( 'unique' flag in query does not work here) 00366 if not filePath in filePathsTmp: 00367 filePathsTmp.append( filePath ) 00368 if debug: 00369 print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) 00370 fileCount += 1 00371 # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles") 00372 if fileCount > skipFiles: 00373 filePaths.append( filePath ) 00374 elif debug: 00375 print '%s DEBUG: File \'%s\' found again'%( self._label, filePath ) 00376 if validVersion > 0: 00377 if numberOfFiles == 0 and debug: 00378 print '%s DEBUG: No files requested'%( self._label ) 00379 break 00380 else: 00381 if debug: 00382 print '%s DEBUG: Using DBS query'%( self._label ) 00383 for version in range( maxVersions, 0, -1 ): 00384 filePaths = [] 00385 fileCount = 0 00386 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) 00387 dbsQuery = 'find file where dataset = %s'%( dataset ) 00388 if debug: 00389 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) 00390 print ' \'%s\''%( dbsQuery ) 00391 foundSE = False 00392 for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ): 00393 if line.find( '.root' ) != -1: 00394 if validVersion != version: 00395 if not foundSE: 00396 dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE ) 00397 if debug: 00398 print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE ) 00399 print ' \'%s\''%( dbsSiteQuery ) 00400 for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ): 00401 if lineSite.find( dataset ) != -1: 00402 foundSE = True 00403 break 00404 if not foundSE: 00405 if debug: 00406 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) 00407 break 00408 validVersion = version 00409 if debug: 00410 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) 00411 if numberOfFiles == 0: 00412 break 00413 filePath = line.replace( '\n', '' ) 00414 if debug: 00415 print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) 00416 fileCount += 1 00417 if fileCount > skipFiles: 00418 filePaths.append( filePath ) 00419 if not numberOfFiles < 0: 00420 if numberOfFiles <= len( filePaths ): 00421 break 00422 if validVersion > 0: 00423 if numberOfFiles == 0 and debug: 00424 print '%s DEBUG: No files requested'%( self._label ) 00425 break 00426 00427 # Check output and return 00428 if validVersion == 0: 00429 print '%s INFO : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ) 00430 if debug: 00431 self.messageEmptyList() 00432 elif len( filePaths ) == 0: 00433 print '%s INFO : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ) 00434 if debug: 00435 self.messageEmptyList() 00436 elif len( filePaths ) < numberOfFiles: 00437 print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ) 00438 00439 if debug: 00440 print '%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ) 00441 return filePaths
def cmsswVersionTools::PickRelValInputFiles::getDefaultParameters | ( | self | ) |
Definition at line 92 of file cmsswVersionTools.py.
def cmsswVersionTools::PickRelValInputFiles::messageEmptyList | ( | self | ) |
Definition at line 159 of file cmsswVersionTools.py.
Definition at line 95 of file cmsswVersionTools.py.
tuple cmsswVersionTools::PickRelValInputFiles::_defaultParameters = dicttypes.SortedKeysDict() [static, private] |
Definition at line 90 of file cmsswVersionTools.py.
string cmsswVersionTools::PickRelValInputFiles::_label = 'pickRelValInputFiles' [static, private] |
Definition at line 89 of file cmsswVersionTools.py.
Definition at line 95 of file cmsswVersionTools.py.