test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
cmsswVersionTools.py
Go to the documentation of this file.
1 import FWCore.ParameterSet.Config as cms
2 
6 from Configuration.AlCa.autoCond import autoCond
7 
8 import os
9 import socket
10 from subprocess import *
11 import json
12 import das_client
13 
14 
15 ## ------------------------------------------------------
16 ## Automatic pick-up of RelVal input files
17 ## ------------------------------------------------------
18 
20  """ Picks up RelVal input files automatically and
21  returns a vector of strings with the paths to be used in [PoolSource].fileNames
22  PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
23  - useDAS : switch to perform query in DAS rather than in DBS
24  optional; default: False
25  - cmsswVersion : CMSSW release to pick up the RelVal files from
26  optional; default: the current release (determined automatically from environment)
27  - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
28  applies also, if 'cmsswVersion' is set explicitly
29  optional; default: False
30  - relVal : RelVal sample to be used
31  optional; default: 'RelValTTbar'
32  - dataTier : data tier to be used
33  optional; default: 'GEN-SIM-RECO'
34  - condition : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
35  possibly overwritten, if 'globalTag' is set explicitly
36  optional; default: 'startup'
37  - globalTag : name of GlobalTag as it is used in the data path of the RelVals
38  optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
39  !!! Determination is done for the release one runs in, not for the release the RelVals have been produced in.
40  !!! Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
41  but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
42  - maxVersions : max. versioning number of RelVal to check
43  optional; default: 9
44  - skipFiles : number of files to skip for a found RelVal sample
45  optional; default: 0
46  - numberOfFiles: number of files to pick up
47  setting it to negative values, returns all found ('skipFiles' remains active though)
48  optional; default: -1
49  - debug : switch to enable enhanced messages in 'stdout'
50  optional; default: False
51  """
52 
53  _label = 'pickRelValInputFiles'
54  _defaultParameters = dicttypes.SortedKeysDict()
55 
56  def getDefaultParameters( self ):
57  return self._defaultParameters
58 
59  def __init__( self ):
60  ConfigToolBase.__init__( self )
61  self.addParameter( self._defaultParameters, 'useDAS' , False , '' )
62  self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" ) , 'auto from environment' )
63  self.addParameter( self._defaultParameters, 'formerVersion', False , '' )
64  self.addParameter( self._defaultParameters, 'relVal' , 'RelValTTbar' , '' )
65  self.addParameter( self._defaultParameters, 'dataTier' , 'GEN-SIM-RECO' , '' )
66  self.addParameter( self._defaultParameters, 'condition' , 'startup' , '' )
67  gt = autoCond[ self.getDefaultParameters()[ 'condition' ].value ]
68  if isinstance(gt,tuple) or isinstance(gt,list):
69  gt = gt[0]
70  self.addParameter( self._defaultParameters, 'globalTag' , gt[ : -5 ] , 'auto from \'condition\'' )
71  self.addParameter( self._defaultParameters, 'maxVersions' , 3 , '' )
72  self.addParameter( self._defaultParameters, 'skipFiles' , 0 , '' )
73  self.addParameter( self._defaultParameters, 'numberOfFiles', -1 , 'all' )
74  self.addParameter( self._defaultParameters, 'debug' , False , '' )
75  self._parameters = copy.deepcopy( self._defaultParameters )
76  self._comment = ""
77 
78  def __call__( self
79  , useDAS = None
80  , cmsswVersion = None
81  , formerVersion = None
82  , relVal = None
83  , dataTier = None
84  , condition = None
85  , globalTag = None
86  , maxVersions = None
87  , skipFiles = None
88  , numberOfFiles = None
89  , debug = None
90  ):
91  if useDAS is None:
92  useDAS = self.getDefaultParameters()[ 'useDAS' ].value
93  if cmsswVersion is None:
94  cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
95  if formerVersion is None:
96  formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
97  if relVal is None:
98  relVal = self.getDefaultParameters()[ 'relVal' ].value
99  if dataTier is None:
100  dataTier = self.getDefaultParameters()[ 'dataTier' ].value
101  if condition is None:
102  condition = self.getDefaultParameters()[ 'condition' ].value
103  if globalTag is None:
104  globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition'
105  if maxVersions is None:
106  maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
107  if skipFiles is None:
108  skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
109  if numberOfFiles is None:
110  numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
111  if debug is None:
112  debug = self.getDefaultParameters()[ 'debug' ].value
113  self.setParameter( 'useDAS' , useDAS )
114  self.setParameter( 'cmsswVersion' , cmsswVersion )
115  self.setParameter( 'formerVersion', formerVersion )
116  self.setParameter( 'relVal' , relVal )
117  self.setParameter( 'dataTier' , dataTier )
118  self.setParameter( 'condition' , condition )
119  self.setParameter( 'globalTag' , globalTag )
120  self.setParameter( 'maxVersions' , maxVersions )
121  self.setParameter( 'skipFiles' , skipFiles )
122  self.setParameter( 'numberOfFiles', numberOfFiles )
123  self.setParameter( 'debug' , debug )
124  return self.apply()
125 
126  def messageEmptyList( self ):
127  print '%s DEBUG: Empty file list returned'%( self._label )
128  print ' This might be overwritten by providing input files explicitly to the source module in the main configuration file.'
129 
130  def apply( self ):
131  useDAS = self._parameters[ 'useDAS' ].value
132  cmsswVersion = self._parameters[ 'cmsswVersion' ].value
133  formerVersion = self._parameters[ 'formerVersion' ].value
134  relVal = self._parameters[ 'relVal' ].value
135  dataTier = self._parameters[ 'dataTier' ].value
136  condition = self._parameters[ 'condition' ].value # only used for GT determination in initialization, if GT not explicitly given
137  globalTag = self._parameters[ 'globalTag' ].value
138  maxVersions = self._parameters[ 'maxVersions' ].value
139  skipFiles = self._parameters[ 'skipFiles' ].value
140  numberOfFiles = self._parameters[ 'numberOfFiles' ].value
141  debug = self._parameters[ 'debug' ].value
142 
143  filePaths = []
144 
145  # Determine corresponding CMSSW version for RelVals
146  preId = '_pre'
147  patchId = '_patch' # patch releases
148  hltPatchId = '_hltpatch' # HLT patch releases
149  dqmPatchId = '_dqmpatch' # DQM patch releases
150  slhcId = '_SLHC' # SLHC releases
151  rootId = '_root' # ROOT test releases
152  ibId = '_X_' # IBs
153  if patchId in cmsswVersion:
154  cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
155  elif hltPatchId in cmsswVersion:
156  cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
157  elif dqmPatchId in cmsswVersion:
158  cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
159  elif rootId in cmsswVersion:
160  cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
161  elif slhcId in cmsswVersion:
162  cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
163  elif ibId in cmsswVersion or formerVersion:
164  outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
165  if len( outputTuple[ 1 ] ) != 0:
166  print '%s INFO : SCRAM error'%( self._label )
167  if debug:
168  print ' from trying to determine last valid releases before \'%s\''%( cmsswVersion )
169  print
170  print outputTuple[ 1 ]
171  print
172  self.messageEmptyList()
173  return filePaths
174  versions = { 'last' :''
175  , 'lastToLast':''
176  }
177  for line in outputTuple[ 0 ].splitlines():
178  version = line.split()[ 1 ]
179  if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
180  if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
181  versions[ 'lastToLast' ] = versions[ 'last' ]
182  versions[ 'last' ] = version
183  if version == cmsswVersion:
184  break
185  # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
186  if formerVersion:
187  # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
188  if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
189  versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
190  # Use pre-release as "former version" for CMSSW_X_Y_0
191  elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
192  versions[ 'lastToLast' ] = ''
193  for line in outputTuple[ 0 ].splitlines():
194  version = line.split()[ 1 ]
195  versionParts = version.partition( preId )
196  if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
197  versions[ 'lastToLast' ] = version
198  elif versions[ 'lastToLast' ] != '':
199  break
200  # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
201  elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
202  versions[ 'lastToLast' ] = '' # no alternative :-(
203  cmsswVersion = versions[ 'lastToLast' ]
204  else:
205  cmsswVersion = versions[ 'last' ]
206 
207  # Debugging output
208  if debug:
209  print '%s DEBUG: Called with...'%( self._label )
210  for key in self._parameters.keys():
211  print ' %s:\t'%( key ),
212  print self._parameters[ key ].value,
213  if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
214  print ' (default)'
215  else:
216  print
217  if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
218  if formerVersion:
219  print ' ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )
220  else:
221  print ' ==> modified to last valid release %s'%( cmsswVersion )
222 
223  # Check domain
224  domain = socket.getfqdn().split( '.' )
225  domainSE = ''
226  if len( domain ) == 0:
227  print '%s INFO : Cannot determine domain of this computer'%( self._label )
228  if debug:
229  self.messageEmptyList()
230  return filePaths
231  elif os.uname()[0] == "Darwin":
232  print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label )
233  if debug:
234  self.messageEmptyList()
235  return filePaths
236  elif len( domain ) == 1:
237  print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] )
238  if debug:
239  self.messageEmptyList()
240  return filePaths
241  if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
242  print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] )
243  if debug:
244  self.messageEmptyList()
245  return filePaths
246  if domain[ -2 ] == 'cern':
247  domainSE = 'T2_CH_CERN'
248  elif domain[ -2 ] == 'fnal':
249  domainSE = 'T1_US_FNAL_MSS'
250  if debug:
251  print '%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] )
252  print '%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE )
253 
254  # Find files
255  validVersion = 0
256  dataset = ''
257  datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
258  if useDAS:
259  if debug:
260  print '%s DEBUG: Using DAS query'%( self._label )
261  dasLimit = numberOfFiles
262  if dasLimit <= 0:
263  dasLimit += 1
264  for version in range( maxVersions, 0, -1 ):
265  filePaths = []
266  filePathsTmp = []
267  fileCount = 0
268  dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
269  dasQuery = 'file dataset=%s | grep file.name'%( dataset )
270  if debug:
271  print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
272  print ' \'%s\''%( dasQuery )
273  # partially stolen from das_client.py for option '--format=plain', needs filter ("grep") in the query
274  jsondict = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False )
275  if debug:
276  print '%s DEBUG: Received DAS JSON dictionary:'%( self._label )
277  print ' \'%s\''%( jsondict )
278  if jsondict[ 'status' ] != 'ok':
279  print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, jsondict)
280  exit( 1 )
281  mongo_query = jsondict[ 'mongo_query' ]
282  filters = mongo_query[ 'filters' ]
283  data = jsondict[ 'data' ]
284  if debug:
285  print '%s DEBUG: Query in JSON dictionary:'%( self._label )
286  print ' \'%s\''%( mongo_query )
287  print '%s DEBUG: Filters in query:'%( self._label )
288  print ' \'%s\''%( filters )
289  print '%s DEBUG: Data in JSON dictionary:'%( self._label )
290  print ' \'%s\''%( data )
291  for row in data:
292  filePath = [ r for r in das_client.get_value( row, filters[ 'grep' ] ) ][ 0 ]
293  if debug:
294  print '%s DEBUG: Testing file entry \'%s\''%( self._label, filePath )
295  if len( filePath ) > 0:
296  if validVersion != version:
297  jsontestdict = das_client.get_data( 'https://cmsweb.cern.ch', 'site dataset=%s | grep site.name'%( dataset ), 0, 999, False )
298  mongo_testquery = jsontestdict[ 'mongo_query' ]
299  testfilters = mongo_testquery[ 'filters' ]
300  testdata = jsontestdict[ 'data' ]
301  if debug:
302  print '%s DEBUG: Received DAS JSON dictionary (site test):'%( self._label )
303  print ' \'%s\''%( jsontestdict )
304  print '%s DEBUG: Query in JSON dictionary (site test):'%( self._label )
305  print ' \'%s\''%( mongo_testquery )
306  print '%s DEBUG: Filters in query (site test):'%( self._label )
307  print ' \'%s\''%( testfilters )
308  print '%s DEBUG: Data in JSON dictionary (site test):'%( self._label )
309  print ' \'%s\''%( testdata )
310  foundSE = False
311  for testrow in testdata:
312  siteName = [ tr for tr in das_client.get_value( testrow, testfilters[ 'grep' ] ) ][ 0 ]
313  if siteName == domainSE:
314  foundSE = True
315  break
316  if not foundSE:
317  if debug:
318  print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
319  break
320  validVersion = version
321  if debug:
322  print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
323  if numberOfFiles == 0:
324  break
325  # protect from double entries ( 'unique' flag in query does not work here)
326  if not filePath in filePathsTmp:
327  filePathsTmp.append( filePath )
328  if debug:
329  print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
330  fileCount += 1
331  # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
332  if fileCount > skipFiles:
333  filePaths.append( filePath )
334  elif debug:
335  print '%s DEBUG: File \'%s\' found again'%( self._label, filePath )
336  if validVersion > 0:
337  if numberOfFiles == 0 and debug:
338  print '%s DEBUG: No files requested'%( self._label )
339  break
340  else:
341  if debug:
342  print '%s DEBUG: Using DBS query'%( self._label )
343  print '%s WARNING: DBS query disabled for DBS3 transition to new API'%( self._label )
344  #for version in range( maxVersions, 0, -1 ):
345  #filePaths = []
346  #fileCount = 0
347  #dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
348  #dbsQuery = 'find file where dataset = %s'%( dataset )
349  #if debug:
350  #print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
351  #print ' \'%s\''%( dbsQuery )
352  #foundSE = False
353  #for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ).readlines():
354  #if line.find( '.root' ) != -1:
355  #if validVersion != version:
356  #if not foundSE:
357  #dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
358  #if debug:
359  #print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
360  #print ' \'%s\''%( dbsSiteQuery )
361  #for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ).readlines():
362  #if lineSite.find( dataset ) != -1:
363  #foundSE = True
364  #break
365  #if not foundSE:
366  #if debug:
367  #print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
368  #break
369  #validVersion = version
370  #if debug:
371  #print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
372  #if numberOfFiles == 0:
373  #break
374  #filePath = line.replace( '\n', '' )
375  #if debug:
376  #print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
377  #fileCount += 1
378  #if fileCount > skipFiles:
379  #filePaths.append( filePath )
380  #if not numberOfFiles < 0:
381  #if numberOfFiles <= len( filePaths ):
382  #break
383  #if validVersion > 0:
384  #if numberOfFiles == 0 and debug:
385  #print '%s DEBUG: No files requested'%( self._label )
386  #break
387 
388  # Check output and return
389  if validVersion == 0:
390  print '%s WARNING : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE )
391  if debug:
392  self.messageEmptyList()
393  elif len( filePaths ) == 0:
394  print '%s WARNING : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset )
395  if debug:
396  self.messageEmptyList()
397  elif len( filePaths ) < numberOfFiles:
398  print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset )
399 
400  if debug:
401  print '%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths )
402  return filePaths
403 
404 pickRelValInputFiles = PickRelValInputFiles()
static void * communicate(void *obj)
Definition: DQMNet.cc:1246
Automatic pick-up of RelVal input files
double split
Definition: MVATrainer.cc:139