6 from Configuration.AlCa.autoCond
import autoCond
10 from subprocess
import *
20 """ Picks up RelVal input files automatically and
21 returns a vector of strings with the paths to be used in [PoolSource].fileNames
22 PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
23 - useDAS : switch to perform query in DAS rather than in DBS
24 optional; default: False
25 - cmsswVersion : CMSSW release to pick up the RelVal files from
26 optional; default: the current release (determined automatically from environment)
27 - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
28 applies also, if 'cmsswVersion' is set explicitly
29 optional; default: False
30 - relVal : RelVal sample to be used
31 optional; default: 'RelValTTbar'
32 - dataTier : data tier to be used
33 optional; default: 'GEN-SIM-RECO'
34 - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
35 possibly overwritten, if 'globalTag' is set explicitly
36 optional; default: 'startup'
37 - globalTag : name of GlobalTag as it is used in the data path of the RelVals
38 optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
39 !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
40 !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
41 but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
42 - maxVersions : max. versioning number of RelVal to check
44 - skipFiles : number of files to skip for a found RelVal sample
46 - numberOfFiles: number of files to pick up
47 setting it to negative values, returns all found ('skipFiles' remains active though)
49 - debug : switch to enable enhanced messages in 'stdout'
50 optional; default: False
53 _label =
'pickRelValInputFiles'
54 _defaultParameters = dicttypes.SortedKeysDict()
60 ConfigToolBase.__init__( self )
62 self.addParameter( self.
_defaultParameters,
'cmsswVersion' , os.getenv(
"CMSSW_VERSION" ) ,
'auto from environment' )
78 , formerVersion =
None
85 , numberOfFiles =
None
90 if cmsswVersion
is None:
92 if formerVersion
is None:
100 if globalTag
is None:
101 globalTag = autoCond[ condition ][ : -5 ]
102 if maxVersions
is None:
104 if skipFiles
is None:
106 if numberOfFiles
is None:
110 self.setParameter(
'useDAS' , useDAS )
111 self.setParameter(
'cmsswVersion' , cmsswVersion )
112 self.setParameter(
'formerVersion', formerVersion )
113 self.setParameter(
'relVal' , relVal )
114 self.setParameter(
'dataTier' , dataTier )
115 self.setParameter(
'condition' , condition )
116 self.setParameter(
'globalTag' , globalTag )
117 self.setParameter(
'maxVersions' , maxVersions )
118 self.setParameter(
'skipFiles' , skipFiles )
119 self.setParameter(
'numberOfFiles', numberOfFiles )
120 self.setParameter(
'debug' , debug )
124 print '%s DEBUG: Empty file list returned'%( self.
_label )
125 print ' This might be overwritten by providing input files explicitly to the source module in the main configuration file.'
129 cmsswVersion = self.
_parameters[
'cmsswVersion' ].value
130 formerVersion = self.
_parameters[
'formerVersion' ].value
135 maxVersions = self.
_parameters[
'maxVersions' ].value
137 numberOfFiles = self.
_parameters[
'numberOfFiles' ].value
145 hltPatchId =
'_hltpatch'
146 dqmPatchId =
'_dqmpatch'
150 if patchId
in cmsswVersion:
151 cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
152 elif hltPatchId
in cmsswVersion:
153 cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
154 elif dqmPatchId
in cmsswVersion:
155 cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
156 elif rootId
in cmsswVersion:
157 cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
158 elif slhcId
in cmsswVersion:
159 cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
160 elif ibId
in cmsswVersion
or formerVersion:
161 outputTuple = Popen( [
'scram',
'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).
communicate()
162 if len( outputTuple[ 1 ] ) != 0:
163 print '%s INFO : SCRAM error'%( self.
_label )
165 print ' from trying to determine last valid releases before \'%s\''%( cmsswVersion )
167 print outputTuple[ 1 ]
171 versions = {
'last' :
''
174 for line
in outputTuple[ 0 ].splitlines():
175 version = line.split()[ 1 ]
176 if cmsswVersion.split( ibId )[ 0 ]
in version
or cmsswVersion.rpartition(
'_' )[ 0 ]
in version:
177 if not ( patchId
in version
or hltPatchId
in version
or dqmPatchId
in version
or slhcId
in version
or ibId
in version
or rootId
in version ):
178 versions[
'lastToLast' ] = versions[
'last' ]
179 versions[
'last' ] = version
180 if version == cmsswVersion:
185 if preId
in versions[
'lastToLast' ]
and not preId
in versions[
'last' ]
and not versions[
'last' ].endswith(
'_0' ):
186 versions[
'lastToLast' ] = versions[
'lastToLast' ].
split( preId )[ 0 ]
188 elif versions[
'last' ].endswith(
'_0' )
and not ( preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].startswith( versions[
'last' ] ) ):
189 versions[
'lastToLast' ] =
''
190 for line
in outputTuple[ 0 ].splitlines():
191 version = line.split()[ 1 ]
192 versionParts = version.partition( preId )
193 if versionParts[ 0 ] == versions[
'last' ]
and versionParts[ 1 ] == preId:
194 versions[
'lastToLast' ] = version
195 elif versions[
'lastToLast' ] !=
'':
198 elif preId
in versions[
'last' ]
and not preId
in versions[
'lastToLast' ]
and versions[
'lastToLast' ].endswith(
'_0' ):
199 versions[
'lastToLast' ] =
''
200 cmsswVersion = versions[
'lastToLast' ]
202 cmsswVersion = versions[
'last' ]
206 print '%s DEBUG: Called with...'%( self.
_label )
207 for key
in self._parameters.keys():
208 print ' %s:\t'%( key ),
214 if key ==
'cmsswVersion' and cmsswVersion != self.
_parameters[ key ].value:
216 print ' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )
218 print ' ==> modified to last valid release %s'%( cmsswVersion )
221 domain = socket.getfqdn().
split(
'.' )
223 if len( domain ) == 0:
224 print '%s INFO : Cannot determine domain of this computer'%( self.
_label )
228 elif os.uname()[0] ==
"Darwin":
229 print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self.
_label )
233 elif len( domain ) == 1:
234 print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self.
_label, domain[ 0 ] )
238 if not ( ( domain[ -2 ] ==
'cern' and domain[ -1 ] ==
'ch' )
or ( domain[ -2 ] ==
'fnal' and domain[ -1 ] ==
'gov' ) ):
239 print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self.
_label, domain[ -2 ], domain[ -1 ] )
243 if domain[ -2 ] ==
'cern':
244 domainSE =
'T2_CH_CERN'
245 elif domain[ -2 ] ==
'fnal':
246 domainSE =
'T1_US_FNAL_MSS'
248 print '%s DEBUG: Running at site \'%s.%s\''%( self.
_label, domain[ -2 ], domain[ -1 ] )
249 print '%s DEBUG: Looking for SE \'%s\''%( self.
_label, domainSE )
254 datasetAll =
'/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
257 print '%s DEBUG: Using DAS query'%( self.
_label )
258 dasLimit = numberOfFiles
261 for version
in range( maxVersions, 0, -1 ):
265 dataset =
'/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
266 dasQuery =
'file dataset=%s | grep file.name'%( dataset )
268 print '%s DEBUG: Querying dataset \'%s\' with'%( self.
_label, dataset )
269 print ' \'%s\''%( dasQuery )
271 dasData = das_client.get_data(
'https://cmsweb.cern.ch', dasQuery, 0, dasLimit,
False )
272 jsondict = json.loads( dasData )
274 print '%s DEBUG: Received DAS data:'%( self.
_label )
275 print ' \'%s\''%( dasData )
276 print '%s DEBUG: Determined JSON dictionary:'%( self.
_label )
277 print ' \'%s\''%( jsondict )
278 if jsondict[
'status' ] !=
'ok':
279 print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, dasData)
281 mongo_query = jsondict[
'mongo_query' ]
282 filters = mongo_query[
'filters' ]
283 data = jsondict[
'data' ]
285 print '%s DEBUG: Query in JSON dictionary:'%( self.
_label )
286 print ' \'%s\''%( mongo_query )
287 print '%s DEBUG: Filters in query:'%( self.
_label )
288 print ' \'%s\''%( filters )
289 print '%s DEBUG: Data in JSON dictionary:'%( self.
_label )
290 print ' \'%s\''%( data )
292 filePath = [ r
for r
in das_client.get_value( row, filters ) ][ 0 ]
294 print '%s DEBUG: Testing file entry \'%s\''%( self.
_label, filePath )
295 if len( filePath ) > 0:
296 if validVersion != version:
297 dasTest = das_client.get_data(
'https://cmsweb.cern.ch',
'site dataset=%s | grep site.name'%( dataset ), 0, 999,
False )
298 jsontestdict = json.loads( dasTest )
299 mongo_testquery = jsontestdict[
'mongo_query' ]
300 testfilters = mongo_testquery[
'filters' ]
301 testdata = jsontestdict[
'data' ]
303 print '%s DEBUG: Received DAS data (site test):'%( self.
_label )
304 print ' \'%s\''%( dasTest )
305 print '%s DEBUG: Determined JSON dictionary (site test):'%( self.
_label )
306 print ' \'%s\''%( jsontestdict )
307 print '%s DEBUG: Query in JSON dictionary (site test):'%( self.
_label )
308 print ' \'%s\''%( mongo_testquery )
309 print '%s DEBUG: Filters in query (site test):'%( self.
_label )
310 print ' \'%s\''%( testfilters )
311 print '%s DEBUG: Data in JSON dictionary (site test):'%( self.
_label )
312 print ' \'%s\''%( testdata )
314 for testrow
in testdata:
315 siteName = [ tr
for tr
in das_client.get_value( testrow, testfilters ) ][ 0 ]
316 if siteName == domainSE:
321 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self.
_label, version, domainSE )
323 validVersion = version
325 print '%s DEBUG: Valid version set to \'v%i\''%( self.
_label, validVersion )
326 if numberOfFiles == 0:
329 if not filePath
in filePathsTmp:
330 filePathsTmp.append( filePath )
332 print '%s DEBUG: File \'%s\' found'%( self.
_label, filePath )
335 if fileCount > skipFiles:
336 filePaths.append( filePath )
338 print '%s DEBUG: File \'%s\' found again'%( self.
_label, filePath )
340 if numberOfFiles == 0
and debug:
341 print '%s DEBUG: No files requested'%( self.
_label )
345 print '%s DEBUG: Using DBS query'%( self.
_label )
346 for version
in range( maxVersions, 0, -1 ):
349 dataset =
'/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
350 dbsQuery =
'find file where dataset = %s'%( dataset )
352 print '%s DEBUG: Querying dataset \'%s\' with'%( self.
_label, dataset )
353 print ' \'%s\''%( dbsQuery )
355 for line
in os.popen(
'dbs search --query="%s"'%( dbsQuery ) ):
356 if line.find(
'.root' ) != -1:
357 if validVersion != version:
359 dbsSiteQuery =
'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
361 print '%s DEBUG: Querying site \'%s\' with'%( self.
_label, domainSE )
362 print ' \'%s\''%( dbsSiteQuery )
363 for lineSite
in os.popen(
'dbs search --query="%s"'%( dbsSiteQuery ) ):
364 if lineSite.find( dataset ) != -1:
369 print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self.
_label, version, domainSE )
371 validVersion = version
373 print '%s DEBUG: Valid version set to \'v%i\''%( self.
_label, validVersion )
374 if numberOfFiles == 0:
376 filePath = line.replace(
'\n',
'' )
378 print '%s DEBUG: File \'%s\' found'%( self.
_label, filePath )
380 if fileCount > skipFiles:
381 filePaths.append( filePath )
382 if not numberOfFiles < 0:
383 if numberOfFiles <= len( filePaths ):
386 if numberOfFiles == 0
and debug:
387 print '%s DEBUG: No files requested'%( self.
_label )
391 if validVersion == 0:
392 print '%s INFO : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self.
_label, datasetAll, domainSE )
395 elif len( filePaths ) == 0:
396 print '%s INFO : No RelVal file(s) picked up in dataset \'%s\''%( self.
_label, dataset )
399 elif len( filePaths ) < numberOfFiles:
400 print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self.
_label, len( filePaths ), numberOfFiles, dataset )
403 print '%s DEBUG: returning %i file(s):\n%s'%( self.
_label, len( filePaths ), filePaths )
static void * communicate(void *obj)