------------------------------------------------------ Automatic pick-up of RelVal input files ------------------------------------------------------ More...
Inherits FWCore::GuiBrowsers::ConfigToolBase::ConfigToolBase.
Public Member Functions | |
def | __call__ |
def | __init__ |
def | apply |
def | getDefaultParameters |
def | messageEmptyList |
Private Attributes | |
_comment | |
_parameters | |
Static Private Attributes | |
tuple | _defaultParameters = dicttypes.SortedKeysDict() |
string | _label = 'pickRelValInputFiles' |
------------------------------------------------------ Automatic pick-up of RelVal input files ------------------------------------------------------
Picks up RelVal input files automatically and returns a vector of strings with the paths to be used in [PoolSource].fileNames PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug ) - useDAS : switch to perform query in DAS rather than in DBS optional; default: False - cmsswVersion : CMSSW release to pick up the RelVal files from optional; default: the current release (determined automatically from environment) - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from applies also, if 'cmsswVersion' is set explicitly optional; default: False - relVal : RelVal sample to be used optional; default: 'RelValTTbar' - dataTier : data tier to be used optional; default: 'GEN-SIM-RECO' - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py possibly overwritten, if 'globalTag' is set explicitly optional; default: 'startup' - globalTag : name of GlobalTag as it is used in the data path of the RelVals optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in. !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path, but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B' - maxVersions : max. versioning number of RelVal to check optional; default: 9 - skipFiles : number of files to skip for a found RelVal sample optional; default: 0 - numberOfFiles: number of files to pick up setting it to negative values, returns all found ('skipFiles' remains active though) optional; default: -1 - debug : switch to enable enhanced messages in 'stdout' optional; default: False
Definition at line 19 of file cmsswVersionTools.py.
def cmsswVersionTools::PickRelValInputFiles::__init__ | ( | self | ) |
Definition at line 59 of file cmsswVersionTools.py.
00060 : 00061 ConfigToolBase.__init__( self ) 00062 self.addParameter( self._defaultParameters, 'useDAS' , False , '' ) 00063 self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' ) 00064 self.addParameter( self._defaultParameters, 'formerVersion', False , '' ) 00065 self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' ) 00066 self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' ) 00067 self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' ) 00068 self.addParameter( self._defaultParameters, 'globalTag' , autoCond[ self.getDefaultParameters()[ 'condition' ].value ][ : -5 ], 'auto from \'condition\'' ) 00069 self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' ) 00070 self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' ) 00071 self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' ) 00072 self.addParameter( self._defaultParameters, 'debug' , False , '' ) 00073 self._parameters = copy.deepcopy( self._defaultParameters ) 00074 self._comment = ""
def cmsswVersionTools::PickRelValInputFiles::__call__ | ( | self, | |
useDAS = None , |
|||
cmsswVersion = None , |
|||
formerVersion = None , |
|||
relVal = None , |
|||
dataTier = None , |
|||
condition = None , |
|||
globalTag = None , |
|||
maxVersions = None , |
|||
skipFiles = None , |
|||
numberOfFiles = None , |
|||
debug = None |
|||
) |
Definition at line 75 of file cmsswVersionTools.py.
00088 : 00089 if useDAS is None: 00090 useDAS = self.getDefaultParameters()[ 'useDAS' ].value 00091 if cmsswVersion is None: 00092 cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value 00093 if formerVersion is None: 00094 formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value 00095 if relVal is None: 00096 relVal = self.getDefaultParameters()[ 'relVal' ].value 00097 if dataTier is None: 00098 dataTier = self.getDefaultParameters()[ 'dataTier' ].value 00099 if condition is None: 00100 condition = self.getDefaultParameters()[ 'condition' ].value 00101 if globalTag is None: 00102 globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition' 00103 if maxVersions is None: 00104 maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value 00105 if skipFiles is None: 00106 skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value 00107 if numberOfFiles is None: 00108 numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value 00109 if debug is None: 00110 debug = self.getDefaultParameters()[ 'debug' ].value 00111 self.setParameter( 'useDAS' , useDAS ) 00112 self.setParameter( 'cmsswVersion' , cmsswVersion ) 00113 self.setParameter( 'formerVersion', formerVersion ) 00114 self.setParameter( 'relVal' , relVal ) 00115 self.setParameter( 'dataTier' , dataTier ) 00116 self.setParameter( 'condition' , condition ) 00117 self.setParameter( 'globalTag' , globalTag ) 00118 self.setParameter( 'maxVersions' , maxVersions ) 00119 self.setParameter( 'skipFiles' , skipFiles ) 00120 self.setParameter( 'numberOfFiles', numberOfFiles ) 00121 self.setParameter( 'debug' , debug ) 00122 return self.apply()
def cmsswVersionTools::PickRelValInputFiles::apply | ( | self | ) |
Definition at line 127 of file cmsswVersionTools.py.
00128 : 00129 useDAS = self._parameters[ 'useDAS' ].value 00130 cmsswVersion = self._parameters[ 'cmsswVersion' ].value 00131 formerVersion = self._parameters[ 'formerVersion' ].value 00132 relVal = self._parameters[ 'relVal' ].value 00133 dataTier = self._parameters[ 'dataTier' ].value 00134 condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given 00135 globalTag = self._parameters[ 'globalTag' ].value 00136 maxVersions = self._parameters[ 'maxVersions' ].value 00137 skipFiles = self._parameters[ 'skipFiles' ].value 00138 numberOfFiles = self._parameters[ 'numberOfFiles' ].value 00139 debug = self._parameters[ 'debug' ].value 00140 00141 filePaths = [] 00142 00143 # Determine corresponding CMSSW version for RelVals 00144 preId = '_pre' 00145 patchId = '_patch' # patch releases 00146 hltPatchId = '_hltpatch' # HLT patch releases 00147 dqmPatchId = '_dqmpatch' # DQM patch releases 00148 slhcId = '_SLHC' # SLHC releases 00149 rootId = '_root' # ROOT test releases 00150 ibId = '_X_' # IBs 00151 if patchId in cmsswVersion: 00152 cmsswVersion = cmsswVersion.split( patchId )[ 0 ] 00153 elif hltPatchId in cmsswVersion: 00154 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ] 00155 elif dqmPatchId in cmsswVersion: 00156 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ] 00157 elif rootId in cmsswVersion: 00158 cmsswVersion = cmsswVersion.split( rootId )[ 0 ] 00159 elif slhcId in cmsswVersion: 00160 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ] 00161 elif ibId in cmsswVersion or formerVersion: 00162 outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate() 00163 if len( outputTuple[ 1 ] ) != 0: 00164 print '%s INFO : SCRAM error'%( self._label ) 00165 if debug: 00166 print ' from trying to determine last valid releases before \'%s\''%( cmsswVersion ) 00167 print 00168 print outputTuple[ 1 ] 00169 print 00170 self.messageEmptyList() 00171 return filePaths 00172 versions = { 'last' :'' 00173 , 'lastToLast':'' 00174 } 00175 for line in outputTuple[ 0 ].splitlines(): 00176 version = line.split()[ 1 ] 00177 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version: 00178 if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ): 00179 versions[ 'lastToLast' ] = versions[ 'last' ] 00180 versions[ 'last' ] = version 00181 if version == cmsswVersion: 00182 break 00183 # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1') 00184 if formerVersion: 00185 # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0 00186 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ): 00187 versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-) 00188 # Use pre-release as "former version" for CMSSW_X_Y_0 00189 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ): 00190 versions[ 'lastToLast' ] = '' 00191 for line in outputTuple[ 0 ].splitlines(): 00192 version = line.split()[ 1 ] 00193 versionParts = version.partition( preId ) 00194 if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId: 00195 versions[ 'lastToLast' ] = version 00196 elif versions[ 'lastToLast' ] != '': 00197 break 00198 # Don't use CMSSW_X_Y_0 as "former version" for pre-releases 00199 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ): 00200 versions[ 'lastToLast' ] = '' # no alternative :-( 00201 cmsswVersion = versions[ 'lastToLast' ] 00202 else: 00203 cmsswVersion = versions[ 'last' ] 00204 00205 # Debugging output 00206 if debug: 00207 print '%s DEBUG: Called with...'%( self._label ) 00208 for key in self._parameters.keys(): 00209 print ' %s:\t'%( key ), 00210 print self._parameters[ key ].value, 00211 if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value: 00212 print ' (default)' 00213 else: 00214 print 00215 if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value: 00216 if formerVersion: 00217 print ' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ) 00218 else: 00219 print ' ==> modified to last valid release %s'%( cmsswVersion ) 00220 00221 # Check domain 00222 domain = socket.getfqdn().split( '.' ) 00223 domainSE = '' 00224 if len( domain ) == 0: 00225 print '%s INFO : Cannot determine domain of this computer'%( self._label ) 00226 if debug: 00227 self.messageEmptyList() 00228 return filePaths 00229 elif os.uname()[0] == "Darwin": 00230 print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ) 00231 if debug: 00232 self.messageEmptyList() 00233 return filePaths 00234 elif len( domain ) == 1: 00235 print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ) 00236 if debug: 00237 self.messageEmptyList() 00238 return filePaths 00239 if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ): 00240 print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ) 00241 if debug: 00242 self.messageEmptyList() 00243 return filePaths 00244 if domain[ -2 ] == 'cern': 00245 domainSE = 'T2_CH_CERN' 00246 elif domain[ -2 ] == 'fnal': 00247 domainSE = 'T1_US_FNAL_MSS' 00248 if debug: 00249 print '%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ) 00250 print '%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ) 00251 00252 # Find files 00253 validVersion = 0 00254 dataset = '' 00255 datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier ) 00256 if useDAS: 00257 if debug: 00258 print '%s DEBUG: Using DAS query'%( self._label ) 00259 dasLimit = numberOfFiles 00260 if dasLimit <= 0: 00261 dasLimit += 1 00262 for version in range( maxVersions, 0, -1 ): 00263 filePaths = [] 00264 filePathsTmp = [] 00265 fileCount = 0 00266 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) 00267 dasQuery = 'file dataset=%s | grep file.name'%( dataset ) 00268 if debug: 00269 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) 00270 print ' \'%s\''%( dasQuery ) 00271 # partially stolen from das_client.py for option '--format=plain', needs filter ("grep") in the query 00272 dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False ) 00273 jsondict = json.loads( dasData ) 00274 if debug: 00275 print '%s DEBUG: Received DAS data:'%( self._label ) 00276 print ' \'%s\''%( dasData ) 00277 print '%s DEBUG: Determined JSON dictionary:'%( self._label ) 00278 print ' \'%s\''%( jsondict ) 00279 if jsondict[ 'status' ] != 'ok': 00280 print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, dasData) 00281 exit( 1 ) 00282 mongo_query = jsondict[ 'mongo_query' ] 00283 filters = mongo_query[ 'filters' ] 00284 data = jsondict[ 'data' ] 00285 if debug: 00286 print '%s DEBUG: Query in JSON dictionary:'%( self._label ) 00287 print ' \'%s\''%( mongo_query ) 00288 print '%s DEBUG: Filters in query:'%( self._label ) 00289 print ' \'%s\''%( filters ) 00290 print '%s DEBUG: Data in JSON dictionary:'%( self._label ) 00291 print ' \'%s\''%( data ) 00292 for row in data: 00293 filePath = [ r for r in das_client.get_value( row, filters ) ][ 0 ] 00294 if debug: 00295 print '%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ) 00296 if len( filePath ) > 0: 00297 if validVersion != version: 00298 dasTest = das_client.get_data( 'https://cmsweb.cern.ch', 'site dataset=%s | grep site.name'%( dataset ), 0, 999, False ) 00299 jsontestdict = json.loads( dasTest ) 00300 mongo_testquery = jsontestdict[ 'mongo_query' ] 00301 testfilters = mongo_testquery[ 'filters' ] 00302 testdata = jsontestdict[ 'data' ] 00303 if debug: 00304 print '%s DEBUG: Received DAS data (site test):'%( self._label ) 00305 print ' \'%s\''%( dasTest ) 00306 print '%s DEBUG: Determined JSON dictionary (site test):'%( self._label ) 00307 print ' \'%s\''%( jsontestdict ) 00308 print '%s DEBUG: Query in JSON dictionary (site test):'%( self._label ) 00309 print ' \'%s\''%( mongo_testquery ) 00310 print '%s DEBUG: Filters in query (site test):'%( self._label ) 00311 print ' \'%s\''%( testfilters ) 00312 print '%s DEBUG: Data in JSON dictionary (site test):'%( self._label ) 00313 print ' \'%s\''%( testdata ) 00314 foundSE = False 00315 for testrow in testdata: 00316 siteName = [ tr for tr in das_client.get_value( testrow, testfilters ) ][ 0 ] 00317 if siteName == domainSE: 00318 foundSE = True 00319 break 00320 if not foundSE: 00321 if debug: 00322 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) 00323 break 00324 validVersion = version 00325 if debug: 00326 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) 00327 if numberOfFiles == 0: 00328 break 00329 # protect from double entries ( 'unique' flag in query does not work here) 00330 if not filePath in filePathsTmp: 00331 filePathsTmp.append( filePath ) 00332 if debug: 00333 print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) 00334 fileCount += 1 00335 # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles") 00336 if fileCount > skipFiles: 00337 filePaths.append( filePath ) 00338 elif debug: 00339 print '%s DEBUG: File \'%s\' found again'%( self._label, filePath ) 00340 if validVersion > 0: 00341 if numberOfFiles == 0 and debug: 00342 print '%s DEBUG: No files requested'%( self._label ) 00343 break 00344 else: 00345 if debug: 00346 print '%s DEBUG: Using DBS query'%( self._label ) 00347 for version in range( maxVersions, 0, -1 ): 00348 filePaths = [] 00349 fileCount = 0 00350 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier ) 00351 dbsQuery = 'find file where dataset = %s'%( dataset ) 00352 if debug: 00353 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ) 00354 print ' \'%s\''%( dbsQuery ) 00355 foundSE = False 00356 for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ): 00357 if line.find( '.root' ) != -1: 00358 if validVersion != version: 00359 if not foundSE: 00360 dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE ) 00361 if debug: 00362 print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE ) 00363 print ' \'%s\''%( dbsSiteQuery ) 00364 for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ): 00365 if lineSite.find( dataset ) != -1: 00366 foundSE = True 00367 break 00368 if not foundSE: 00369 if debug: 00370 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ) 00371 break 00372 validVersion = version 00373 if debug: 00374 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ) 00375 if numberOfFiles == 0: 00376 break 00377 filePath = line.replace( '\n', '' ) 00378 if debug: 00379 print '%s DEBUG: File \'%s\' found'%( self._label, filePath ) 00380 fileCount += 1 00381 if fileCount > skipFiles: 00382 filePaths.append( filePath ) 00383 if not numberOfFiles < 0: 00384 if numberOfFiles <= len( filePaths ): 00385 break 00386 if validVersion > 0: 00387 if numberOfFiles == 0 and debug: 00388 print '%s DEBUG: No files requested'%( self._label ) 00389 break 00390 00391 # Check output and return 00392 if validVersion == 0: 00393 print '%s INFO : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ) 00394 if debug: 00395 self.messageEmptyList() 00396 elif len( filePaths ) == 0: 00397 print '%s INFO : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ) 00398 if debug: 00399 self.messageEmptyList() 00400 elif len( filePaths ) < numberOfFiles: 00401 print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ) 00402 00403 if debug: 00404 print '%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ) 00405 return filePaths
def cmsswVersionTools::PickRelValInputFiles::getDefaultParameters | ( | self | ) |
Definition at line 56 of file cmsswVersionTools.py.
def cmsswVersionTools::PickRelValInputFiles::messageEmptyList | ( | self | ) |
Definition at line 123 of file cmsswVersionTools.py.
Definition at line 59 of file cmsswVersionTools.py.
tuple cmsswVersionTools::PickRelValInputFiles::_defaultParameters = dicttypes.SortedKeysDict() [static, private] |
Definition at line 54 of file cmsswVersionTools.py.
string cmsswVersionTools::PickRelValInputFiles::_label = 'pickRelValInputFiles' [static, private] |
Definition at line 53 of file cmsswVersionTools.py.
Definition at line 59 of file cmsswVersionTools.py.