1 from __future__
import print_function
2 import FWCore.ParameterSet.Config
as cms
8 import Utilities.General.cmssw_das_client
as das_client
18 """ Picks up RelVal input files automatically and 19 returns a vector of strings with the paths to be used in [PoolSource].fileNames 20 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug ) 21 - useDAS : switch to perform query in DAS rather than in DBS 22 optional; default: False 23 - cmsswVersion : CMSSW release to pick up the RelVal files from 24 optional; default: the current release (determined automatically from environment) 25 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from 26 applies also, if 'cmsswVersion' is set explicitly 27 optional; default: False 28 - relVal : RelVal sample to be used 29 optional; default: 'RelValTTbar' 30 - dataTier : data tier to be used 31 optional; default: 'GEN-SIM-RECO' 32 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py 33 possibly overwritten, if 'globalTag' is set explicitly 34 optional; default: 'startup' 35 - globalTag : name of GlobalTag as it is used in the data path of the RelVals 36 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py 37 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in. 38 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path, 39 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B' 40 - maxVersions : max. versioning number of RelVal to check 42 - skipFiles : number of files to skip for a found RelVal sample 44 - numberOfFiles: number of files to pick up 45 setting it to negative values, returns all found ('skipFiles' remains active though) 47 - debug : switch to enable enhanced messages in 'stdout' 48 optional; default: False 51 _label =
'pickRelValInputFiles' 52 _defaultParameters = dicttypes.SortedKeysDict()
58 ConfigToolBase.__init__( self )
60 self.addParameter( self.
_defaultParameters,
'cmsswVersion' , os.getenv(
"CMSSW_VERSION" ) ,
'auto from environment' )
66 if isinstance(gt,tuple)
or isinstance(gt,list):
68 self.addParameter( self.
_defaultParameters,
'globalTag' , gt[ : -5 ] ,
'auto from \'condition\'' )
79 , formerVersion =
None 86 , numberOfFiles =
None 91 if cmsswVersion
is None:
93 if formerVersion
is None:
101 if globalTag
is None:
102 globalTag = autoCond[ condition ][ : -5 ]
103 if maxVersions
is None:
105 if skipFiles
is None:
107 if numberOfFiles
is None:
111 self.setParameter(
'useDAS' , useDAS )
112 self.setParameter(
'cmsswVersion' , cmsswVersion )
113 self.setParameter(
'formerVersion', formerVersion )
114 self.setParameter(
'relVal' , relVal )
115 self.setParameter(
'dataTier' , dataTier )
116 self.setParameter(
'condition' , condition )
117 self.setParameter(
'globalTag' , globalTag )
118 self.setParameter(
'maxVersions' , maxVersions )
119 self.setParameter(
'skipFiles' , skipFiles )
120 self.setParameter(
'numberOfFiles', numberOfFiles )
121 self.setParameter(
'debug' , debug )
125 print(
'%s DEBUG: Empty file list returned'%( self.
_label ))
126 print(
' This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
130 cmsswVersion = self.
_parameters[
'cmsswVersion' ].value
131 formerVersion = self.
_parameters[
'formerVersion' ].value
136 maxVersions = self.
_parameters[
'maxVersions' ].value
138 numberOfFiles = self.
_parameters[
'numberOfFiles' ].value
146 hltPatchId =
'_hltpatch' 147 dqmPatchId =
'_dqmpatch' 151 if patchId
in cmsswVersion:
152 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
153 elif hltPatchId
in cmsswVersion:
154 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
155 elif dqmPatchId
in cmsswVersion:
156 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
157 elif rootId
in cmsswVersion:
158 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
159 elif slhcId
in cmsswVersion:
160 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
161 elif ibId
in cmsswVersion
or formerVersion:
162 outputTuple = Popen( [
'scram',
'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).
communicate()
163 if len( outputTuple[ 1 ] ) != 0:
166 print(
' from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
168 print(outputTuple[ 1 ])
172 versions = {
'last' :
'' 175 for line
in outputTuple[ 0 ].splitlines():
176 version = line.split()[ 1 ]
177 if cmsswVersion.split( ibId )[ 0 ]
in version
or cmsswVersion.rpartition(
'_' )[ 0 ]
in version:
178 if not ( patchId
in version
or hltPatchId
in version
or dqmPatchId
in version
or slhcId
in version
or ibId
in version
or rootId
in version ):
179 versions[
'lastToLast' ] = versions[
'last' ]
180 versions[
'last' ] = version
181 if version == cmsswVersion:
186 if preId
in versions[
'lastToLast' ]
and not preId
in versions[
'last' ]
and not versions[
'last' ].endswith(
'_0' ):
187 versions[
'lastToLast' ] = versions[
'lastToLast' ].
split( preId )[ 0 ]
189 elif versions[
'last' ].endswith(
'_0' )
and not ( preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].startswith( versions[
'last' ] ) ):
190 versions[
'lastToLast' ] =
'' 191 for line
in outputTuple[ 0 ].splitlines():
192 version = line.split()[ 1 ]
193 versionParts = version.partition( preId )
194 if versionParts[ 0 ] == versions[
'last' ]
and versionParts[ 1 ] == preId:
195 versions[
'lastToLast' ] = version
196 elif versions[
'lastToLast' ] !=
'':
199 elif preId
in versions[
'last' ]
and not preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].endswith(
'_0' ):
200 versions[
'lastToLast' ] =
'' 201 cmsswVersion = versions[
'lastToLast' ]
203 cmsswVersion = versions[
'last' ]
208 for key
in self._parameters.keys():
209 print(
' %s:\t'%( key ), end=
' ')
215 if key ==
'cmsswVersion' and cmsswVersion != self.
_parameters[ key ].value:
217 print(
' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
219 print(
' ==> modified to last valid release %s'%( cmsswVersion ))
222 domain = socket.getfqdn().
split(
'.' )
224 if len( domain ) == 0:
225 print(
'%s INFO : Cannot determine domain of this computer'%( self.
_label ))
229 elif os.uname()[0] ==
"Darwin":
230 print(
'%s INFO : Running on MacOSX without direct access to RelVal files.'%( self.
_label ))
234 elif len( domain ) == 1:
235 print(
'%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self.
_label, domain[ 0 ] ))
239 if not ( ( domain[ -2 ] ==
'cern' and domain[ -1 ] ==
'ch' )
or ( domain[ -2 ] ==
'fnal' and domain[ -1 ] ==
'gov' ) ):
240 print(
'%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self.
_label, domain[ -2 ], domain[ -1 ] ))
244 if domain[ -2 ] ==
'cern':
245 domainSE =
'T2_CH_CERN' 246 elif domain[ -2 ] ==
'fnal':
247 domainSE =
'T1_US_FNAL_MSS' 249 print(
'%s DEBUG: Running at site \'%s.%s\''%( self.
_label, domain[ -2 ], domain[ -1 ] ))
250 print(
'%s DEBUG: Looking for SE \'%s\''%( self.
_label, domainSE ))
255 datasetAll =
'/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
259 dasLimit = numberOfFiles
262 for version
in range( maxVersions, 0, -1 ):
266 dataset =
'/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
267 dasQuery =
'file dataset=%s | grep file.name'%( dataset )
269 print(
'%s DEBUG: Querying dataset \'%s\' with'%( self.
_label, dataset ))
270 print(
' \'%s\''%( dasQuery ))
271 jsondict = das_client.get_data(dasQuery,dasLimit)
273 print(
'%s DEBUG: Received DAS JSON dictionary:'%( self.
_label ))
274 print(
' \'%s\''%( jsondict ))
275 if jsondict[
'status' ] !=
'ok':
276 print(
'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
278 mongo_query = jsondict[
'mongo_query' ]
279 filters = mongo_query[
'filters' ]
280 data = jsondict[
'data' ]
282 print(
'%s DEBUG: Query in JSON dictionary:'%( self.
_label ))
283 print(
' \'%s\''%( mongo_query ))
284 print(
'%s DEBUG: Filters in query:'%( self.
_label ))
285 print(
' \'%s\''%( filters ))
286 print(
'%s DEBUG: Data in JSON dictionary:'%( self.
_label ))
287 print(
' \'%s\''%( data ))
289 filePath = [ r
for r
in das_client.get_value( row, filters[
'grep' ] ) ][ 0 ]
291 print(
'%s DEBUG: Testing file entry \'%s\''%( self.
_label, filePath ))
292 if len( filePath ) > 0:
293 if validVersion != version:
294 jsontestdict = das_client.get_data(
'site dataset=%s | grep site.name' % ( dataset ), 999)
295 mongo_testquery = jsontestdict[
'mongo_query' ]
296 testfilters = mongo_testquery[
'filters' ]
297 testdata = jsontestdict[
'data' ]
299 print(
'%s DEBUG: Received DAS JSON dictionary (site test):'%( self.
_label ))
300 print(
' \'%s\''%( jsontestdict ))
301 print(
'%s DEBUG: Query in JSON dictionary (site test):'%( self.
_label ))
302 print(
' \'%s\''%( mongo_testquery ))
303 print(
'%s DEBUG: Filters in query (site test):'%( self.
_label ))
304 print(
' \'%s\''%( testfilters ))
305 print(
'%s DEBUG: Data in JSON dictionary (site test):'%( self.
_label ))
306 print(
' \'%s\''%( testdata ))
308 for testrow
in testdata:
309 siteName = [ tr
for tr
in das_client.get_value( testrow, testfilters[
'grep' ] ) ][ 0 ]
310 if siteName == domainSE:
315 print(
'%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self.
_label, version, domainSE ))
317 validVersion = version
319 print(
'%s DEBUG: Valid version set to \'v%i\''%( self.
_label, validVersion ))
320 if numberOfFiles == 0:
323 if not filePath
in filePathsTmp:
324 filePathsTmp.append( filePath )
326 print(
'%s DEBUG: File \'%s\' found'%( self.
_label, filePath ))
329 if fileCount > skipFiles:
330 filePaths.append( filePath )
332 print(
'%s DEBUG: File \'%s\' found again'%( self.
_label, filePath ))
334 if numberOfFiles == 0
and debug:
335 print(
'%s DEBUG: No files requested'%( self.
_label ))
340 print(
'%s WARNING: DBS query disabled for DBS3 transition to new API'%( self.
_label ))
386 if validVersion == 0:
387 print(
'%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self.
_label, datasetAll, domainSE ))
390 elif len( filePaths ) == 0:
391 print(
'%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self.
_label, dataset ))
394 elif len( filePaths ) < numberOfFiles:
395 print(
'%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self.
_label, len( filePaths ), numberOfFiles, dataset ))
398 print(
'%s DEBUG: returning %i file(s):\n%s'%( self.
_label, len( filePaths ), filePaths ))
S & print(S &os, JobReport::InputFile const &f)
static void * communicate(void *obj)