00001 import FWCore.ParameterSet.Config as cms
00002
00003 from FWCore.GuiBrowsers.ConfigToolBase import *
00004 from PhysicsTools.PatAlgos.tools.helpers import *
00005 from PhysicsTools.PatAlgos.tools.jetTools import *
00006 from Configuration.AlCa.autoCond import autoCond
00007
00008 import os
00009 import socket
00010 from subprocess import *
00011 import json
00012 import das_client
00013
00014
00015
00016
00017
00018
00019 class PickRelValInputFiles( ConfigToolBase ):
00020 """ Picks up RelVal input files automatically and
00021 returns a vector of strings with the paths to be used in [PoolSource].fileNames
00022 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
00023 - useDAS : switch to perform query in DAS rather than in DBS
00024 optional; default: False
00025 - cmsswVersion : CMSSW release to pick up the RelVal files from
00026 optional; default: the current release (determined automatically from environment)
00027 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
00028 applies also, if 'cmsswVersion' is set explicitly
00029 optional; default: False
00030 - relVal : RelVal sample to be used
00031 optional; default: 'RelValTTbar'
00032 - dataTier : data tier to be used
00033 optional; default: 'GEN-SIM-RECO'
00034 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
00035 possibly overwritten, if 'globalTag' is set explicitly
00036 optional; default: 'startup'
00037 - globalTag : name of GlobalTag as it is used in the data path of the RelVals
00038 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
00039 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
00040 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
00041 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
00042 - maxVersions : max. versioning number of RelVal to check
00043 optional; default: 9
00044 - skipFiles : number of files to skip for a found RelVal sample
00045 optional; default: 0
00046 - numberOfFiles: number of files to pick up
00047 setting it to negative values, returns all found ('skipFiles' remains active though)
00048 optional; default: -1
00049 - debug : switch to enable enhanced messages in 'stdout'
00050 optional; default: False
00051 """
00052
00053 _label = 'pickRelValInputFiles'
00054 _defaultParameters = dicttypes.SortedKeysDict()
00055
00056 def getDefaultParameters( self ):
00057 return self._defaultParameters
00058
00059 def __init__( self ):
00060 ConfigToolBase.__init__( self )
00061 self.addParameter( self._defaultParameters, 'useDAS' , False , '' )
00062 self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' )
00063 self.addParameter( self._defaultParameters, 'formerVersion', False , '' )
00064 self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' )
00065 self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' )
00066 self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' )
00067 self.addParameter( self._defaultParameters, 'globalTag' , autoCond[ self.getDefaultParameters()[ 'condition' ].value ][ : -5 ], 'auto from \'condition\'' )
00068 self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' )
00069 self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' )
00070 self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' )
00071 self.addParameter( self._defaultParameters, 'debug' , False , '' )
00072 self._parameters = copy.deepcopy( self._defaultParameters )
00073 self._comment = ""
00074
00075 def __call__( self
00076 , useDAS = None
00077 , cmsswVersion = None
00078 , formerVersion = None
00079 , relVal = None
00080 , dataTier = None
00081 , condition = None
00082 , globalTag = None
00083 , maxVersions = None
00084 , skipFiles = None
00085 , numberOfFiles = None
00086 , debug = None
00087 ):
00088 if useDAS is None:
00089 useDAS = self.getDefaultParameters()[ 'useDAS' ].value
00090 if cmsswVersion is None:
00091 cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
00092 if formerVersion is None:
00093 formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
00094 if relVal is None:
00095 relVal = self.getDefaultParameters()[ 'relVal' ].value
00096 if dataTier is None:
00097 dataTier = self.getDefaultParameters()[ 'dataTier' ].value
00098 if condition is None:
00099 condition = self.getDefaultParameters()[ 'condition' ].value
00100 if globalTag is None:
00101 globalTag = autoCond[ condition ][ : -5 ]
00102 if maxVersions is None:
00103 maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
00104 if skipFiles is None:
00105 skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
00106 if numberOfFiles is None:
00107 numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
00108 if debug is None:
00109 debug = self.getDefaultParameters()[ 'debug' ].value
00110 self.setParameter( 'useDAS' , useDAS )
00111 self.setParameter( 'cmsswVersion' , cmsswVersion )
00112 self.setParameter( 'formerVersion', formerVersion )
00113 self.setParameter( 'relVal' , relVal )
00114 self.setParameter( 'dataTier' , dataTier )
00115 self.setParameter( 'condition' , condition )
00116 self.setParameter( 'globalTag' , globalTag )
00117 self.setParameter( 'maxVersions' , maxVersions )
00118 self.setParameter( 'skipFiles' , skipFiles )
00119 self.setParameter( 'numberOfFiles', numberOfFiles )
00120 self.setParameter( 'debug' , debug )
00121 return self.apply()
00122
00123 def messageEmptyList( self ):
00124 print '%s DEBUG: Empty file list returned'%( self._label )
00125 print ' This might be overwritten by providing input files explicitly to the source module in the main configuration file.'
00126
00127 def apply( self ):
00128 useDAS = self._parameters[ 'useDAS' ].value
00129 cmsswVersion = self._parameters[ 'cmsswVersion' ].value
00130 formerVersion = self._parameters[ 'formerVersion' ].value
00131 relVal = self._parameters[ 'relVal' ].value
00132 dataTier = self._parameters[ 'dataTier' ].value
00133 condition = self._parameters[ 'condition' ].value
00134 globalTag = self._parameters[ 'globalTag' ].value
00135 maxVersions = self._parameters[ 'maxVersions' ].value
00136 skipFiles = self._parameters[ 'skipFiles' ].value
00137 numberOfFiles = self._parameters[ 'numberOfFiles' ].value
00138 debug = self._parameters[ 'debug' ].value
00139
00140 filePaths = []
00141
00142
00143 preId = '_pre'
00144 patchId = '_patch'
00145 hltPatchId = '_hltpatch'
00146 dqmPatchId = '_dqmpatch'
00147 slhcId = '_SLHC'
00148 rootId = '_root'
00149 ibId = '_X_'
00150 if patchId in cmsswVersion:
00151 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
00152 elif hltPatchId in cmsswVersion:
00153 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
00154 elif dqmPatchId in cmsswVersion:
00155 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
00156 elif rootId in cmsswVersion:
00157 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
00158 elif slhcId in cmsswVersion:
00159 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
00160 elif ibId in cmsswVersion or formerVersion:
00161 outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
00162 if len( outputTuple[ 1 ] ) != 0:
00163 print '%s INFO : SCRAM error'%( self._label )
00164 if debug:
00165 print ' from trying to determine last valid releases before \'%s\''%( cmsswVersion )
00166 print
00167 print outputTuple[ 1 ]
00168 print
00169 self.messageEmptyList()
00170 return filePaths
00171 versions = { 'last' :''
00172 , 'lastToLast':''
00173 }
00174 for line in outputTuple[ 0 ].splitlines():
00175 version = line.split()[ 1 ]
00176 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
00177 if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
00178 versions[ 'lastToLast' ] = versions[ 'last' ]
00179 versions[ 'last' ] = version
00180 if version == cmsswVersion:
00181 break
00182
00183 if formerVersion:
00184
00185 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
00186 versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ]
00187
00188 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
00189 versions[ 'lastToLast' ] = ''
00190 for line in outputTuple[ 0 ].splitlines():
00191 version = line.split()[ 1 ]
00192 versionParts = version.partition( preId )
00193 if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
00194 versions[ 'lastToLast' ] = version
00195 elif versions[ 'lastToLast' ] != '':
00196 break
00197
00198 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
00199 versions[ 'lastToLast' ] = ''
00200 cmsswVersion = versions[ 'lastToLast' ]
00201 else:
00202 cmsswVersion = versions[ 'last' ]
00203
00204
00205 if debug:
00206 print '%s DEBUG: Called with...'%( self._label )
00207 for key in self._parameters.keys():
00208 print ' %s:\t'%( key ),
00209 print self._parameters[ key ].value,
00210 if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
00211 print ' (default)'
00212 else:
00213 print
00214 if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
00215 if formerVersion:
00216 print ' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )
00217 else:
00218 print ' ==> modified to last valid release %s'%( cmsswVersion )
00219
00220
00221 domain = socket.getfqdn().split( '.' )
00222 domainSE = ''
00223 if len( domain ) == 0:
00224 print '%s INFO : Cannot determine domain of this computer'%( self._label )
00225 if debug:
00226 self.messageEmptyList()
00227 return filePaths
00228 elif os.uname()[0] == "Darwin":
00229 print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label )
00230 if debug:
00231 self.messageEmptyList()
00232 return filePaths
00233 elif len( domain ) == 1:
00234 print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] )
00235 if debug:
00236 self.messageEmptyList()
00237 return filePaths
00238 if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
00239 print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] )
00240 if debug:
00241 self.messageEmptyList()
00242 return filePaths
00243 if domain[ -2 ] == 'cern':
00244 domainSE = 'T2_CH_CERN'
00245 elif domain[ -2 ] == 'fnal':
00246 domainSE = 'T1_US_FNAL_MSS'
00247 if debug:
00248 print '%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] )
00249 print '%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE )
00250
00251
00252 validVersion = 0
00253 dataset = ''
00254 datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
00255 if useDAS:
00256 if debug:
00257 print '%s DEBUG: Using DAS query'%( self._label )
00258 dasLimit = numberOfFiles
00259 if dasLimit <= 0:
00260 dasLimit += 1
00261 for version in range( maxVersions, 0, -1 ):
00262 filePaths = []
00263 filePathsTmp = []
00264 fileCount = 0
00265 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
00266 dasQuery = 'file dataset=%s | grep file.name'%( dataset )
00267 if debug:
00268 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
00269 print ' \'%s\''%( dasQuery )
00270
00271 dasData = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False )
00272 jsondict = json.loads( dasData )
00273 if debug:
00274 print '%s DEBUG: Received DAS data:'%( self._label )
00275 print ' \'%s\''%( dasData )
00276 print '%s DEBUG: Determined JSON dictionary:'%( self._label )
00277 print ' \'%s\''%( jsondict )
00278 if jsondict[ 'status' ] != 'ok':
00279 print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, dasData)
00280 exit( 1 )
00281 mongo_query = jsondict[ 'mongo_query' ]
00282 filters = mongo_query[ 'filters' ]
00283 data = jsondict[ 'data' ]
00284 if debug:
00285 print '%s DEBUG: Query in JSON dictionary:'%( self._label )
00286 print ' \'%s\''%( mongo_query )
00287 print '%s DEBUG: Filters in query:'%( self._label )
00288 print ' \'%s\''%( filters )
00289 print '%s DEBUG: Data in JSON dictionary:'%( self._label )
00290 print ' \'%s\''%( data )
00291 for row in data:
00292 filePath = [ r for r in das_client.get_value( row, filters ) ][ 0 ]
00293 if debug:
00294 print '%s DEBUG: Testing file entry \'%s\''%( self._label, filePath )
00295 if len( filePath ) > 0:
00296 if validVersion != version:
00297 dasTest = das_client.get_data( 'https://cmsweb.cern.ch', 'site dataset=%s | grep site.name'%( dataset ), 0, 999, False )
00298 jsontestdict = json.loads( dasTest )
00299 mongo_testquery = jsontestdict[ 'mongo_query' ]
00300 testfilters = mongo_testquery[ 'filters' ]
00301 testdata = jsontestdict[ 'data' ]
00302 if debug:
00303 print '%s DEBUG: Received DAS data (site test):'%( self._label )
00304 print ' \'%s\''%( dasTest )
00305 print '%s DEBUG: Determined JSON dictionary (site test):'%( self._label )
00306 print ' \'%s\''%( jsontestdict )
00307 print '%s DEBUG: Query in JSON dictionary (site test):'%( self._label )
00308 print ' \'%s\''%( mongo_testquery )
00309 print '%s DEBUG: Filters in query (site test):'%( self._label )
00310 print ' \'%s\''%( testfilters )
00311 print '%s DEBUG: Data in JSON dictionary (site test):'%( self._label )
00312 print ' \'%s\''%( testdata )
00313 foundSE = False
00314 for testrow in testdata:
00315 siteName = [ tr for tr in das_client.get_value( testrow, testfilters ) ][ 0 ]
00316 if siteName == domainSE:
00317 foundSE = True
00318 break
00319 if not foundSE:
00320 if debug:
00321 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
00322 break
00323 validVersion = version
00324 if debug:
00325 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
00326 if numberOfFiles == 0:
00327 break
00328
00329 if not filePath in filePathsTmp:
00330 filePathsTmp.append( filePath )
00331 if debug:
00332 print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
00333 fileCount += 1
00334
00335 if fileCount > skipFiles:
00336 filePaths.append( filePath )
00337 elif debug:
00338 print '%s DEBUG: File \'%s\' found again'%( self._label, filePath )
00339 if validVersion > 0:
00340 if numberOfFiles == 0 and debug:
00341 print '%s DEBUG: No files requested'%( self._label )
00342 break
00343 else:
00344 if debug:
00345 print '%s DEBUG: Using DBS query'%( self._label )
00346 for version in range( maxVersions, 0, -1 ):
00347 filePaths = []
00348 fileCount = 0
00349 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
00350 dbsQuery = 'find file where dataset = %s'%( dataset )
00351 if debug:
00352 print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
00353 print ' \'%s\''%( dbsQuery )
00354 foundSE = False
00355 for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ):
00356 if line.find( '.root' ) != -1:
00357 if validVersion != version:
00358 if not foundSE:
00359 dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
00360 if debug:
00361 print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
00362 print ' \'%s\''%( dbsSiteQuery )
00363 for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ):
00364 if lineSite.find( dataset ) != -1:
00365 foundSE = True
00366 break
00367 if not foundSE:
00368 if debug:
00369 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
00370 break
00371 validVersion = version
00372 if debug:
00373 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
00374 if numberOfFiles == 0:
00375 break
00376 filePath = line.replace( '\n', '' )
00377 if debug:
00378 print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
00379 fileCount += 1
00380 if fileCount > skipFiles:
00381 filePaths.append( filePath )
00382 if not numberOfFiles < 0:
00383 if numberOfFiles <= len( filePaths ):
00384 break
00385 if validVersion > 0:
00386 if numberOfFiles == 0 and debug:
00387 print '%s DEBUG: No files requested'%( self._label )
00388 break
00389
00390
00391 if validVersion == 0:
00392 print '%s INFO : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE )
00393 if debug:
00394 self.messageEmptyList()
00395 elif len( filePaths ) == 0:
00396 print '%s INFO : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset )
00397 if debug:
00398 self.messageEmptyList()
00399 elif len( filePaths ) < numberOfFiles:
00400 print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset )
00401
00402 if debug:
00403 print '%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths )
00404 return filePaths
00405
00406 pickRelValInputFiles = PickRelValInputFiles()