CMS 3D CMS Logo

cmsswVersionTools.py
Go to the documentation of this file.
1 from __future__ import print_function
2 from builtins import range
3 import FWCore.ParameterSet.Config as cms
4 
8 from Configuration.AlCa.autoCond import autoCond
9 import Utilities.General.cmssw_das_client as das_client
10 import os
11 import socket
12 
13 
14 ## ------------------------------------------------------
15 ## Automatic pick-up of RelVal input files
16 ## ------------------------------------------------------
17 
19  """ Picks up RelVal input files automatically and
20  returns a vector of strings with the paths to be used in [PoolSource].fileNames
21  PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
22  - useDAS : switch to perform query in DAS rather than in DBS
23  optional; default: False
24  - cmsswVersion : CMSSW release to pick up the RelVal files from
25  optional; default: the current release (determined automatically from environment)
26  - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
27  applies also, if 'cmsswVersion' is set explicitly
28  optional; default: False
29  - relVal : RelVal sample to be used
30  optional; default: 'RelValTTbar'
31  - dataTier : data tier to be used
32  optional; default: 'GEN-SIM-RECO'
33  - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
34  possibly overwritten, if 'globalTag' is set explicitly
35  optional; default: 'startup'
36  - globalTag : name of GlobalTag as it is used in the data path of the RelVals
37  optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
38  !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
39  !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
40  but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
41  - maxVersions : max. versioning number of RelVal to check
42  optional; default: 9
43  - skipFiles : number of files to skip for a found RelVal sample
44  optional; default: 0
45  - numberOfFiles: number of files to pick up
46  setting it to negative values, returns all found ('skipFiles' remains active though)
47  optional; default: -1
48  - debug : switch to enable enhanced messages in 'stdout'
49  optional; default: False
50  """
51 
52  _label = 'pickRelValInputFiles'
53  _defaultParameters = dicttypes.SortedKeysDict()
54 
55  def getDefaultParameters( self ):
56  return self._defaultParameters
57 
58  def __init__( self ):
59  ConfigToolBase.__init__( self )
60  self.addParameter( self._defaultParameters, 'useDAS' , False , '' )
61  self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' )
62  self.addParameter( self._defaultParameters, 'formerVersion', False , '' )
63  self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' )
64  self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' )
65  self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' )
66  gt = autoCond[ self.getDefaultParameters()[ 'condition' ].value ]
67  if isinstance(gt,tuple) or isinstance(gt,list):
68  gt = gt[0]
69  self.addParameter( self._defaultParameters, 'globalTag' , gt[ : -5 ] , 'auto from \'condition\'' )
70  self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' )
71  self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' )
72  self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' )
73  self.addParameter( self._defaultParameters, 'debug' , False , '' )
74  self._parameters = copy.deepcopy( self._defaultParameters )
75  self._comment = ""
76 
77  def __call__( self
78  , useDAS = None
79  , cmsswVersion = None
80  , formerVersion = None
81  , relVal = None
82  , dataTier = None
83  , condition = None
84  , globalTag = None
85  , maxVersions = None
86  , skipFiles = None
87  , numberOfFiles = None
88  , debug = None
89  ):
90  if useDAS is None:
91  useDAS = self.getDefaultParameters()[ 'useDAS' ].value
92  if cmsswVersion is None:
93  cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
94  if formerVersion is None:
95  formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
96  if relVal is None:
97  relVal = self.getDefaultParameters()[ 'relVal' ].value
98  if dataTier is None:
99  dataTier = self.getDefaultParameters()[ 'dataTier' ].value
100  if condition is None:
101  condition = self.getDefaultParameters()[ 'condition' ].value
102  if globalTag is None:
103  globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition'
104  if maxVersions is None:
105  maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
106  if skipFiles is None:
107  skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
108  if numberOfFiles is None:
109  numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
110  if debug is None:
111  debug = self.getDefaultParameters()[ 'debug' ].value
112  self.setParameter( 'useDAS' , useDAS )
113  self.setParameter( 'cmsswVersion' , cmsswVersion )
114  self.setParameter( 'formerVersion', formerVersion )
115  self.setParameter( 'relVal' , relVal )
116  self.setParameter( 'dataTier' , dataTier )
117  self.setParameter( 'condition' , condition )
118  self.setParameter( 'globalTag' , globalTag )
119  self.setParameter( 'maxVersions' , maxVersions )
120  self.setParameter( 'skipFiles' , skipFiles )
121  self.setParameter( 'numberOfFiles', numberOfFiles )
122  self.setParameter( 'debug' , debug )
123  return self.apply()
124 
125  def messageEmptyList( self ):
126  print('%s DEBUG: Empty file list returned'%( self._label ))
127  print(' This might be overwritten by providing input files explicitly to the source module in the main configuration file.')
128 
129  def apply( self ):
130  useDAS = self._parameters[ 'useDAS' ].value
131  cmsswVersion = self._parameters[ 'cmsswVersion' ].value
132  formerVersion = self._parameters[ 'formerVersion' ].value
133  relVal = self._parameters[ 'relVal' ].value
134  dataTier = self._parameters[ 'dataTier' ].value
135  condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given
136  globalTag = self._parameters[ 'globalTag' ].value
137  maxVersions = self._parameters[ 'maxVersions' ].value
138  skipFiles = self._parameters[ 'skipFiles' ].value
139  numberOfFiles = self._parameters[ 'numberOfFiles' ].value
140  debug = self._parameters[ 'debug' ].value
141 
142  filePaths = []
143 
144  # Determine corresponding CMSSW version for RelVals
145  preId = '_pre'
146  patchId = '_patch' # patch releases
147  hltPatchId = '_hltpatch' # HLT patch releases
148  dqmPatchId = '_dqmpatch' # DQM patch releases
149  slhcId = '_SLHC' # SLHC releases
150  rootId = '_root' # ROOT test releases
151  ibId = '_X_' # IBs
152  if patchId in cmsswVersion:
153  cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
154  elif hltPatchId in cmsswVersion:
155  cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
156  elif dqmPatchId in cmsswVersion:
157  cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
158  elif rootId in cmsswVersion:
159  cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
160  elif slhcId in cmsswVersion:
161  cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
162  elif ibId in cmsswVersion or formerVersion:
163  outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
164  if len( outputTuple[ 1 ] ) != 0:
165  print('%s INFO : SCRAM error'%( self._label ))
166  if debug:
167  print(' from trying to determine last valid releases before \'%s\''%( cmsswVersion ))
168  print()
169  print(outputTuple[ 1 ])
170  print()
171  self.messageEmptyList()
172  return filePaths
173  versions = { 'last' :''
174  , 'lastToLast':''
175  }
176  for line in outputTuple[ 0 ].splitlines():
177  version = line.split()[ 1 ]
178  if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
179  if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
180  versions[ 'lastToLast' ] = versions[ 'last' ]
181  versions[ 'last' ] = version
182  if version == cmsswVersion:
183  break
184  # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
185  if formerVersion:
186  # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
187  if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
188  versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
189  # Use pre-release as "former version" for CMSSW_X_Y_0
190  elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
191  versions[ 'lastToLast' ] = ''
192  for line in outputTuple[ 0 ].splitlines():
193  version = line.split()[ 1 ]
194  versionParts = version.partition( preId )
195  if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
196  versions[ 'lastToLast' ] = version
197  elif versions[ 'lastToLast' ] != '':
198  break
199  # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
200  elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
201  versions[ 'lastToLast' ] = '' # no alternative :-(
202  cmsswVersion = versions[ 'lastToLast' ]
203  else:
204  cmsswVersion = versions[ 'last' ]
205 
206  # Debugging output
207  if debug:
208  print('%s DEBUG: Called with...'%( self._label ))
209  for key in self._parameters.keys():
210  print(' %s:\t'%( key ), end=' ')
211  print(self._parameters[ key ].value, end=' ')
212  if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
213  print(' (default)')
214  else:
215  print()
216  if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
217  if formerVersion:
218  print(' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion ))
219  else:
220  print(' ==> modified to last valid release %s'%( cmsswVersion ))
221 
222  # Check domain
223  domain = socket.getfqdn().split( '.' )
224  domainSE = ''
225  if len( domain ) == 0:
226  print('%s INFO : Cannot determine domain of this computer'%( self._label ))
227  if debug:
228  self.messageEmptyList()
229  return filePaths
230  elif os.uname()[0] == "Darwin":
231  print('%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label ))
232  if debug:
233  self.messageEmptyList()
234  return filePaths
235  elif len( domain ) == 1:
236  print('%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] ))
237  if debug:
238  self.messageEmptyList()
239  return filePaths
240  if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
241  print('%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] ))
242  if debug:
243  self.messageEmptyList()
244  return filePaths
245  if domain[ -2 ] == 'cern':
246  domainSE = 'T2_CH_CERN'
247  elif domain[ -2 ] == 'fnal':
248  domainSE = 'T1_US_FNAL_MSS'
249  if debug:
250  print('%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] ))
251  print('%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE ))
252 
253  # Find files
254  validVersion = 0
255  dataset = ''
256  datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
257  if useDAS:
258  if debug:
259  print('%s DEBUG: Using DAS query'%( self._label ))
260  dasLimit = numberOfFiles
261  if dasLimit <= 0:
262  dasLimit = 1
263  for version in range( maxVersions, 0, -1 ):
264  filePaths = []
265  filePathsTmp = []
266  fileCount = 0
267  dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
268  dasQuery = 'file dataset=%s | grep file.name'%( dataset )
269  if debug:
270  print('%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset ))
271  print(' \'%s\''%( dasQuery ))
272  jsondict = das_client.get_data(dasQuery,dasLimit)
273  if debug:
274  print('%s DEBUG: Received DAS JSON dictionary:'%( self._label ))
275  print(' \'%s\''%( jsondict ))
276  if jsondict[ 'status' ] != 'ok':
277  print('There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict))
278  exit( 1 )
279  mongo_query = jsondict[ 'mongo_query' ]
280  filters = mongo_query[ 'filters' ]
281  data = jsondict[ 'data' ]
282  if debug:
283  print('%s DEBUG: Query in JSON dictionary:'%( self._label ))
284  print(' \'%s\''%( mongo_query ))
285  print('%s DEBUG: Filters in query:'%( self._label ))
286  print(' \'%s\''%( filters ))
287  print('%s DEBUG: Data in JSON dictionary:'%( self._label ))
288  print(' \'%s\''%( data ))
289  for row in data:
290  filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
291  if debug:
292  print('%s DEBUG: Testing file entry \'%s\''%( self._label, filePath ))
293  if len( filePath ) > 0:
294  if validVersion != version:
295  jsontestdict = das_client.get_data('site dataset=%s | grep site.name' % ( dataset ), 999)
296  mongo_testquery = jsontestdict[ 'mongo_query' ]
297  testfilters = mongo_testquery[ 'filters' ]
298  testdata = jsontestdict[ 'data' ]
299  if debug:
300  print('%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label ))
301  print(' \'%s\''%( jsontestdict ))
302  print('%s DEBUG: Query in JSON dictionary (site test):'%( self._label ))
303  print(' \'%s\''%( mongo_testquery ))
304  print('%s DEBUG: Filters in query (site test):'%( self._label ))
305  print(' \'%s\''%( testfilters ))
306  print('%s DEBUG: Data in JSON dictionary (site test):'%( self._label ))
307  print(' \'%s\''%( testdata ))
308  foundSE = False
309  for testrow in testdata:
310  siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
311  if siteName == domainSE:
312  foundSE = True
313  break
314  if not foundSE:
315  if debug:
316  print('%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE ))
317  break
318  validVersion = version
319  if debug:
320  print('%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion ))
321  if numberOfFiles == 0:
322  break
323  # protect from double entries ( 'unique' flag in query does not work here)
324  if not filePath in filePathsTmp:
325  filePathsTmp.append( filePath )
326  if debug:
327  print('%s DEBUG: File \'%s\' found'%( self._label, filePath ))
328  fileCount += 1
329  # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
330  if fileCount > skipFiles:
331  filePaths.append( filePath )
332  elif debug:
333  print('%s DEBUG: File \'%s\' found again'%( self._label, filePath ))
334  if validVersion > 0:
335  if numberOfFiles == 0 and debug:
336  print('%s DEBUG: No files requested'%( self._label ))
337  break
338  else:
339  if debug:
340  print('%s DEBUG: Using DBS query'%( self._label ))
341  print('%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label ))
342  #for version in range( maxVersions, 0, -1 ):
343  #filePaths = []
344  #fileCount = 0
345  #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
346  #dbsQuery = 'find file where dataset = %s'%( dataset )
347  #if debug:
348  #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
349  #print ' \'%s\''%( dbsQuery )
350  #foundSE = False
351  #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines():
352  #if line.find( '.root' ) != -1:
353  #if validVersion != version:
354  #if not foundSE:
355  #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
356  #if debug:
357  #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
358  #print ' \'%s\''%( dbsSiteQuery )
359  #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines():
360  #if lineSite.find( dataset ) != -1:
361  #foundSE = True
362  #break
363  #if not foundSE:
364  #if debug:
365  #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
366  #break
367  #validVersion = version
368  #if debug:
369  #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
370  #if numberOfFiles == 0:
371  #break
372  #filePath = line.replace( '\n', '' )
373  #if debug:
374  #print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
375  #fileCount += 1
376  #if fileCount > skipFiles:
377  #filePaths.append( filePath )
378  #if not numberOfFiles < 0:
379  #if numberOfFiles <= len( filePaths ):
380  #break
381  #if validVersion > 0:
382  #if numberOfFiles == 0 and debug:
383  #print '%s DEBUG: No files requested'%( self._label )
384  #break
385 
386  # Check output and return
387  if validVersion == 0:
388  print('%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE ))
389  if debug:
390  self.messageEmptyList()
391  elif len( filePaths ) == 0:
392  print('%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset ))
393  if debug:
394  self.messageEmptyList()
395  elif len( filePaths ) < numberOfFiles:
396  print('%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset ))
397 
398  if debug:
399  print('%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths ))
400  return filePaths
401 
402 pickRelValInputFiles = PickRelValInputFiles()
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
static void * communicate(void *obj)
Definition: DQMNet.cc:1251
def __call__(self, useDAS=None, cmsswVersion=None, formerVersion=None, relVal=None, dataTier=None, condition=None, globalTag=None, maxVersions=None, skipFiles=None, numberOfFiles=None, debug=None)
Automatic pick-up of RelVal input files
double split
Definition: MVATrainer.cc:139