1 from __future__
import print_function
2 from builtins
import range
3 import FWCore.ParameterSet.Config
as cms
8 from Configuration.AlCa.autoCond
import autoCond
9 import Utilities.General.cmssw_das_client
as das_client
19 """ Picks up RelVal input files automatically and
20 returns a vector of strings with the paths to be used in [PoolSource].fileNames
21 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
22 - useDAS : switch to perform query in DAS rather than in DBS
23 optional; default: False
24 - cmsswVersion : CMSSW release to pick up the RelVal files from
25 optional; default: the current release (determined automatically from environment)
26 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
27 applies also, if 'cmsswVersion' is set explicitly
28 optional; default: False
29 - relVal : RelVal sample to be used
30 optional; default: 'RelValTTbar'
31 - dataTier : data tier to be used
32 optional; default: 'GEN-SIM-RECO'
33 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
34 possibly overwritten, if 'globalTag' is set explicitly
35 optional; default: 'startup'
36 - globalTag : name of GlobalTag as it is used in the data path of the RelVals
37 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
38 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
39 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
40 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
41 - maxVersions : max. versioning number of RelVal to check
43 - skipFiles : number of files to skip for a found RelVal sample
45 - numberOfFiles: number of files to pick up
46 setting it to negative values, returns all found ('skipFiles' remains active though)
48 - debug : switch to enable enhanced messages in 'stdout'
49 optional; default: False
52 _label =
'pickRelValInputFiles'
53 _defaultParameters = dicttypes.SortedKeysDict()
59 ConfigToolBase.__init__( self )
61 self.addParameter( self.
_defaultParameters,
'cmsswVersion' , os.getenv(
"CMSSW_VERSION" ) ,
'auto from environment' )
67 if isinstance(gt,tuple)
or isinstance(gt,list):
69 self.addParameter( self.
_defaultParameters,
'globalTag' , gt[ : -5 ] ,
'auto from \'condition\'' )
80 , formerVersion = None
87 , numberOfFiles = None
92 if cmsswVersion
is None:
94 if formerVersion
is None:
100 if condition
is None:
102 if globalTag
is None:
103 globalTag = autoCond[ condition ][ : -5 ]
104 if maxVersions
is None:
106 if skipFiles
is None:
108 if numberOfFiles
is None:
112 self.setParameter(
'useDAS' , useDAS )
113 self.setParameter(
'cmsswVersion' , cmsswVersion )
114 self.setParameter(
'formerVersion', formerVersion )
115 self.setParameter(
'relVal' , relVal )
116 self.setParameter(
'dataTier' , dataTier )
117 self.setParameter(
'condition' , condition )
118 self.setParameter(
'globalTag' , globalTag )
119 self.setParameter(
'maxVersions' , maxVersions )
120 self.setParameter(
'skipFiles' , skipFiles )
121 self.setParameter(
'numberOfFiles', numberOfFiles )
122 self.setParameter(
'debug' , debug )
126 print(
'%s DEBUG: Empty file list returned'%( self.
_label ))
127 print(
' This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
131 cmsswVersion = self.
_parameters[
'cmsswVersion' ].value
132 formerVersion = self.
_parameters[
'formerVersion' ].value
137 maxVersions = self.
_parameters[
'maxVersions' ].value
139 numberOfFiles = self.
_parameters[
'numberOfFiles' ].value
147 hltPatchId =
'_hltpatch'
148 dqmPatchId =
'_dqmpatch'
152 if patchId
in cmsswVersion:
153 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
154 elif hltPatchId
in cmsswVersion:
155 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
156 elif dqmPatchId
in cmsswVersion:
157 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
158 elif rootId
in cmsswVersion:
159 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
160 elif slhcId
in cmsswVersion:
161 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
162 elif ibId
in cmsswVersion
or formerVersion:
163 outputTuple = Popen( [
'scram',
'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).
communicate()
164 if len( outputTuple[ 1 ] ) != 0:
167 print(
' from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
169 print(outputTuple[ 1 ])
173 versions = {
'last' :
''
176 for line
in outputTuple[ 0 ].splitlines():
177 version = line.split()[ 1 ]
178 if cmsswVersion.split( ibId )[ 0 ]
in version
or cmsswVersion.rpartition(
'_' )[ 0 ]
in version:
179 if not ( patchId
in version
or hltPatchId
in version
or dqmPatchId
in version
or slhcId
in version
or ibId
in version
or rootId
in version ):
180 versions[
'lastToLast' ] = versions[
'last' ]
181 versions[
'last' ] = version
182 if version == cmsswVersion:
187 if preId
in versions[
'lastToLast' ]
and not preId
in versions[
'last' ]
and not versions[
'last' ].endswith(
'_0' ):
188 versions[
'lastToLast' ] = versions[
'lastToLast' ].
split( preId )[ 0 ]
190 elif versions[
'last' ].endswith(
'_0' )
and not ( preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].startswith( versions[
'last' ] ) ):
191 versions[
'lastToLast' ] =
''
192 for line
in outputTuple[ 0 ].splitlines():
193 version = line.split()[ 1 ]
194 versionParts = version.partition( preId )
195 if versionParts[ 0 ] == versions[
'last' ]
and versionParts[ 1 ] == preId:
196 versions[
'lastToLast' ] = version
197 elif versions[
'lastToLast' ] !=
'':
200 elif preId
in versions[
'last' ]
and not preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].endswith(
'_0' ):
201 versions[
'lastToLast' ] =
''
202 cmsswVersion = versions[
'lastToLast' ]
204 cmsswVersion = versions[
'last' ]
210 print(
' %s:\t'%( key ), end=
' ')
216 if key ==
'cmsswVersion' and cmsswVersion != self.
_parameters[ key ].value:
218 print(
' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
220 print(
' ==> modified to last valid release %s'%( cmsswVersion ))
223 domain = socket.getfqdn().
split(
'.' )
225 if len( domain ) == 0:
226 print(
'%s INFO : Cannot determine domain of this computer'%( self.
_label ))
230 elif os.uname()[0] ==
"Darwin":
231 print(
'%s INFO : Running on MacOSX without direct access to RelVal files.'%( self.
_label ))
235 elif len( domain ) == 1:
236 print(
'%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self.
_label, domain[ 0 ] ))
240 if not ( ( domain[ -2 ] ==
'cern' and domain[ -1 ] ==
'ch' )
or ( domain[ -2 ] ==
'fnal' and domain[ -1 ] ==
'gov' ) ):
241 print(
'%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self.
_label, domain[ -2 ], domain[ -1 ] ))
245 if domain[ -2 ] ==
'cern':
246 domainSE =
'T2_CH_CERN'
247 elif domain[ -2 ] ==
'fnal':
248 domainSE =
'T1_US_FNAL_MSS'
250 print(
'%s DEBUG: Running at site \'%s.%s\''%( self.
_label, domain[ -2 ], domain[ -1 ] ))
251 print(
'%s DEBUG: Looking for SE \'%s\''%( self.
_label, domainSE ))
256 datasetAll =
'/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
260 dasLimit = numberOfFiles
263 for version
in range( maxVersions, 0, -1 ):
267 dataset =
'/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
268 dasQuery =
'file dataset=%s | grep file.name'%( dataset )
270 print(
'%s DEBUG: Querying dataset \'%s\' with'%( self.
_label, dataset ))
271 print(
' \'%s\''%( dasQuery ))
274 print(
'%s DEBUG: Received DAS JSON dictionary:'%( self.
_label ))
275 print(
' \'%s\''%( jsondict ))
276 if jsondict[
'status' ] !=
'ok':
277 print(
'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
279 mongo_query = jsondict[
'mongo_query' ]
280 filters = mongo_query[
'filters' ]
281 data = jsondict[
'data' ]
283 print(
'%s DEBUG: Query in JSON dictionary:'%( self.
_label ))
284 print(
' \'%s\''%( mongo_query ))
285 print(
'%s DEBUG: Filters in query:'%( self.
_label ))
286 print(
' \'%s\''%( filters ))
287 print(
'%s DEBUG: Data in JSON dictionary:'%( self.
_label ))
288 print(
' \'%s\''%( data ))
292 print(
'%s DEBUG: Testing file entry \'%s\''%( self.
_label, filePath ))
293 if len( filePath ) > 0:
294 if validVersion != version:
296 mongo_testquery = jsontestdict[
'mongo_query' ]
297 testfilters = mongo_testquery[
'filters' ]
298 testdata = jsontestdict[
'data' ]
300 print(
'%s DEBUG: Received DAS JSON dictionary (site test):'%( self.
_label ))
301 print(
' \'%s\''%( jsontestdict ))
302 print(
'%s DEBUG: Query in JSON dictionary (site test):'%( self.
_label ))
303 print(
' \'%s\''%( mongo_testquery ))
304 print(
'%s DEBUG: Filters in query (site test):'%( self.
_label ))
305 print(
' \'%s\''%( testfilters ))
306 print(
'%s DEBUG: Data in JSON dictionary (site test):'%( self.
_label ))
307 print(
' \'%s\''%( testdata ))
309 for testrow
in testdata:
311 if siteName == domainSE:
316 print(
'%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self.
_label, version, domainSE ))
318 validVersion = version
320 print(
'%s DEBUG: Valid version set to \'v%i\''%( self.
_label, validVersion ))
321 if numberOfFiles == 0:
324 if not filePath
in filePathsTmp:
325 filePathsTmp.append( filePath )
327 print(
'%s DEBUG: File \'%s\' found'%( self.
_label, filePath ))
330 if fileCount > skipFiles:
331 filePaths.append( filePath )
333 print(
'%s DEBUG: File \'%s\' found again'%( self.
_label, filePath ))
335 if numberOfFiles == 0
and debug:
336 print(
'%s DEBUG: No files requested'%( self.
_label ))
341 print(
'%s WARNING: DBS query disabled for DBS3 transition to new API'%( self.
_label ))
387 if validVersion == 0:
388 print(
'%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self.
_label, datasetAll, domainSE ))
391 elif len( filePaths ) == 0:
392 print(
'%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self.
_label, dataset ))
395 elif len( filePaths ) < numberOfFiles:
396 print(
'%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self.
_label, len( filePaths ), numberOfFiles, dataset ))
399 print(
'%s DEBUG: returning %i file(s):\n%s'%( self.
_label, len( filePaths ), filePaths ))