CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_4/src/PhysicsTools/PatAlgos/python/tools/cmsswVersionTools.py

Go to the documentation of this file.
00001 import FWCore.ParameterSet.Config as cms
00002 
00003 from FWCore.GuiBrowsers.ConfigToolBase import *
00004 from PhysicsTools.PatAlgos.tools.helpers import *
00005 from PhysicsTools.PatAlgos.tools.jetTools import *
00006 from Configuration.AlCa.autoCond import autoCond
00007 
00008 import os
00009 import socket
00010 from subprocess import *
00011 import json
00012 import das_client
00013 
00014 
00015 ## ---------------------------------------------
00016 ## Adjust trigger content in AOD for CMSSW_5_2_X
00017 ## ---------------------------------------------
00018 
00019 class Run52xOn51xTrigger( ConfigToolBase ):
00020     """ Adjust trigger content in AOD for CMSSW_5_2_X
00021     """
00022     _label             = 'run52xOn51xTrigger'
00023     _defaultParameters = dicttypes.SortedKeysDict()
00024 
00025     def __init__( self ):
00026         ConfigToolBase.__init__( self )
00027         self.addParameter( self._defaultParameters, 'sequence', 'patDefaultSequence', "Name of sequence to use, default: 'patDefaultSequence'" )
00028         self._parameters = copy.deepcopy( self._defaultParameters )
00029 
00030     def getDefaultParameters( self ):
00031         return self._defaultParameters
00032 
00033     def __call__( self, process
00034                 , sequence     = None
00035                 ):
00036         if sequence is None:
00037             sequence = self._defaultParameters[ 'sequence' ].value
00038         self.setParameter( 'sequence', sequence )
00039         return self.apply( process )
00040 
00041     def apply( self, process ):
00042         sequence = self._parameters[ 'sequence' ].value
00043 
00044         from L1Trigger.GlobalTrigger.convertObjectMapRecord_cfi import convertObjectMapRecord
00045         process.l1L1GtObjectMap = convertObjectMapRecord.clone()
00046         getattr( process, sequence ).insert( 0, getattr( process, 'l1L1GtObjectMap' ) )
00047 
00048 run52xOn51xTrigger = Run52xOn51xTrigger()
00049 
00050 
00051 ## ------------------------------------------------------
00052 ## Automatic pick-up of RelVal input files
00053 ## ------------------------------------------------------
00054 
00055 class PickRelValInputFiles( ConfigToolBase ):
00056     """  Picks up RelVal input files automatically and
00057   returns a vector of strings with the paths to be used in [PoolSource].fileNames
00058     PickRelValInputFiles( cmsswVersion, relVal, dataTier, condition, globalTag, maxVersions, skipFiles, numberOfFiles, debug )
00059     - useDAS       : switch to perform query in DAS rather than in DBS
00060                      optional; default: False
00061     - cmsswVersion : CMSSW release to pick up the RelVal files from
00062                      optional; default: the current release (determined automatically from environment)
00063     - formerVersion: use the last before the last valid CMSSW release to pick up the RelVal files from
00064                      applies also, if 'cmsswVersion' is set explicitly
00065                      optional; default: False
00066     - relVal       : RelVal sample to be used
00067                      optional; default: 'RelValTTbar'
00068     - dataTier     : data tier to be used
00069                      optional; default: 'GEN-SIM-RECO'
00070     - condition    : identifier of GlobalTag as defined in Configurations/PyReleaseValidation/python/autoCond.py
00071                      possibly overwritten, if 'globalTag' is set explicitly
00072                      optional; default: 'startup'
00073     - globalTag    : name of GlobalTag as it is used in the data path of the RelVals
00074                      optional; default: determined automatically as defined by 'condition' in Configurations/PyReleaseValidation/python/autoCond.py
00075       !!!            Determination is done for the release one runs in, not for the release the RelVals have been produced in.
00076       !!!            Example of deviation: data RelVals (CMSSW_4_1_X) might not only have the pure name of the GlobalTag 'GR_R_311_V2' in the full path,
00077                      but also an extension identifying the data: 'GR_R_311_V2_RelVal_wzMu2010B'
00078     - maxVersions  : max. versioning number of RelVal to check
00079                      optional; default: 9
00080     - skipFiles    : number of files to skip for a found RelVal sample
00081                      optional; default: 0
00082     - numberOfFiles: number of files to pick up
00083                      setting it to negative values, returns all found ('skipFiles' remains active though)
00084                      optional; default: -1
00085     - debug        : switch to enable enhanced messages in 'stdout'
00086                      optional; default: False
00087     """
00088 
00089     _label             = 'pickRelValInputFiles'
00090     _defaultParameters = dicttypes.SortedKeysDict()
00091 
00092     def getDefaultParameters( self ):
00093         return self._defaultParameters
00094 
00095     def __init__( self ):
00096         ConfigToolBase.__init__( self )
00097         self.addParameter( self._defaultParameters, 'useDAS'       , False                                                               , '' )
00098         self.addParameter( self._defaultParameters, 'cmsswVersion' , os.getenv( "CMSSW_VERSION" )                                        , 'auto from environment' )
00099         self.addParameter( self._defaultParameters, 'formerVersion', False                                                               , '' )
00100         self.addParameter( self._defaultParameters, 'relVal'       , 'RelValTTbar'                                                       , '' )
00101         self.addParameter( self._defaultParameters, 'dataTier'     , 'GEN-SIM-RECO'                                                      , '' )
00102         self.addParameter( self._defaultParameters, 'condition'    , 'startup'                                                           , '' )
00103         self.addParameter( self._defaultParameters, 'globalTag'    , autoCond[ self.getDefaultParameters()[ 'condition' ].value ][ : -5 ], 'auto from \'condition\'' )
00104         self.addParameter( self._defaultParameters, 'maxVersions'  , 3                                                                   , '' )
00105         self.addParameter( self._defaultParameters, 'skipFiles'    , 0                                                                   , '' )
00106         self.addParameter( self._defaultParameters, 'numberOfFiles', -1                                                                  , 'all' )
00107         self.addParameter( self._defaultParameters, 'debug'        , False                                                               , '' )
00108         self._parameters = copy.deepcopy( self._defaultParameters )
00109         self._comment = ""
00110 
00111     def __call__( self
00112                 , useDAS        = None
00113                 , cmsswVersion  = None
00114                 , formerVersion = None
00115                 , relVal        = None
00116                 , dataTier      = None
00117                 , condition     = None
00118                 , globalTag     = None
00119                 , maxVersions   = None
00120                 , skipFiles     = None
00121                 , numberOfFiles = None
00122                 , debug         = None
00123                 ):
00124         if useDAS is None:
00125             useDAS = self.getDefaultParameters()[ 'useDAS' ].value
00126         if cmsswVersion is None:
00127             cmsswVersion = self.getDefaultParameters()[ 'cmsswVersion' ].value
00128         if formerVersion is None:
00129             formerVersion = self.getDefaultParameters()[ 'formerVersion' ].value
00130         if relVal is None:
00131             relVal = self.getDefaultParameters()[ 'relVal' ].value
00132         if dataTier is None:
00133             dataTier = self.getDefaultParameters()[ 'dataTier' ].value
00134         if condition is None:
00135             condition = self.getDefaultParameters()[ 'condition' ].value
00136         if globalTag is None:
00137             globalTag = autoCond[ condition ][ : -5 ] # auto from 'condition'
00138         if maxVersions is None:
00139             maxVersions = self.getDefaultParameters()[ 'maxVersions' ].value
00140         if skipFiles is None:
00141             skipFiles = self.getDefaultParameters()[ 'skipFiles' ].value
00142         if numberOfFiles is None:
00143             numberOfFiles = self.getDefaultParameters()[ 'numberOfFiles' ].value
00144         if debug is None:
00145             debug = self.getDefaultParameters()[ 'debug' ].value
00146         self.setParameter( 'useDAS'       , useDAS )
00147         self.setParameter( 'cmsswVersion' , cmsswVersion )
00148         self.setParameter( 'formerVersion', formerVersion )
00149         self.setParameter( 'relVal'       , relVal )
00150         self.setParameter( 'dataTier'     , dataTier )
00151         self.setParameter( 'condition'    , condition )
00152         self.setParameter( 'globalTag'    , globalTag )
00153         self.setParameter( 'maxVersions'  , maxVersions )
00154         self.setParameter( 'skipFiles'    , skipFiles )
00155         self.setParameter( 'numberOfFiles', numberOfFiles )
00156         self.setParameter( 'debug'        , debug )
00157         return self.apply()
00158 
00159     def messageEmptyList( self ):
00160         print '%s DEBUG: Empty file list returned'%( self._label )
00161         print '    This might be overwritten by providing input files explicitly to the source module in the main configuration file.'
00162 
00163     def apply( self ):
00164         useDAS        = self._parameters[ 'useDAS'        ].value
00165         cmsswVersion  = self._parameters[ 'cmsswVersion'  ].value
00166         formerVersion = self._parameters[ 'formerVersion' ].value
00167         relVal        = self._parameters[ 'relVal'        ].value
00168         dataTier      = self._parameters[ 'dataTier'      ].value
00169         condition     = self._parameters[ 'condition'     ].value # only used for GT determination in initialization, if GT not explicitly given
00170         globalTag     = self._parameters[ 'globalTag'     ].value
00171         maxVersions   = self._parameters[ 'maxVersions'   ].value
00172         skipFiles     = self._parameters[ 'skipFiles'     ].value
00173         numberOfFiles = self._parameters[ 'numberOfFiles' ].value
00174         debug         = self._parameters[ 'debug'         ].value
00175 
00176         filePaths = []
00177 
00178         # Determine corresponding CMSSW version for RelVals
00179         preId      = '_pre'
00180         patchId    = '_patch'    # patch releases
00181         hltPatchId = '_hltpatch' # HLT patch releases
00182         dqmPatchId = '_dqmpatch' # DQM patch releases
00183         slhcId     = '_SLHC'     # SLHC releases
00184         rootId     = '_root'     # ROOT test releases
00185         ibId       = '_X_'       # IBs
00186         if patchId in cmsswVersion:
00187             cmsswVersion = cmsswVersion.split( patchId )[ 0 ]
00188         elif hltPatchId in cmsswVersion:
00189             cmsswVersion = cmsswVersion.split( hltPatchId )[ 0 ]
00190         elif dqmPatchId in cmsswVersion:
00191             cmsswVersion = cmsswVersion.split( dqmPatchId )[ 0 ]
00192         elif rootId in cmsswVersion:
00193             cmsswVersion = cmsswVersion.split( rootId )[ 0 ]
00194         elif slhcId in cmsswVersion:
00195             cmsswVersion = cmsswVersion.split( slhcId )[ 0 ]
00196         elif ibId in cmsswVersion or formerVersion:
00197             outputTuple = Popen( [ 'scram', 'l -c CMSSW' ], stdout = PIPE, stderr = PIPE ).communicate()
00198             if len( outputTuple[ 1 ] ) != 0:
00199                 print '%s INFO : SCRAM error'%( self._label )
00200                 if debug:
00201                     print '    from trying to determine last valid releases before \'%s\''%( cmsswVersion )
00202                     print
00203                     print outputTuple[ 1 ]
00204                     print
00205                     self.messageEmptyList()
00206                 return filePaths
00207             versions = { 'last'      :''
00208                        , 'lastToLast':''
00209                        }
00210             for line in outputTuple[ 0 ].splitlines():
00211                 version = line.split()[ 1 ]
00212                 if cmsswVersion.split( ibId )[ 0 ] in version or cmsswVersion.rpartition( '_' )[ 0 ] in version:
00213                     if not ( patchId in version or hltPatchId in version or dqmPatchId in version or slhcId in version or ibId in version or rootId in version ):
00214                         versions[ 'lastToLast' ] = versions[ 'last' ]
00215                         versions[ 'last' ]       = version
00216                         if version == cmsswVersion:
00217                             break
00218             # FIXME: ordering of output problematic ('XYZ_pre10' before 'XYZ_pre2', no "formerVersion" for 'XYZ_pre1')
00219             if formerVersion:
00220                 # Don't use pre-releases as "former version" for other releases than CMSSW_X_Y_0
00221                 if preId in versions[ 'lastToLast' ] and not preId in versions[ 'last' ] and not versions[ 'last' ].endswith( '_0' ):
00222                     versions[ 'lastToLast' ] = versions[ 'lastToLast' ].split( preId )[ 0 ] # works only, if 'CMSSW_X_Y_0' esists ;-)
00223                 # Use pre-release as "former version" for CMSSW_X_Y_0
00224                 elif versions[ 'last' ].endswith( '_0' ) and not ( preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].startswith( versions[ 'last' ] ) ):
00225                     versions[ 'lastToLast' ] = ''
00226                     for line in outputTuple[ 0 ].splitlines():
00227                         version      = line.split()[ 1 ]
00228                         versionParts = version.partition( preId )
00229                         if versionParts[ 0 ] == versions[ 'last' ] and versionParts[ 1 ] == preId:
00230                             versions[ 'lastToLast' ] = version
00231                         elif versions[ 'lastToLast' ] != '':
00232                             break
00233                 # Don't use CMSSW_X_Y_0 as "former version" for pre-releases
00234                 elif preId in versions[ 'last' ] and not preId in versions[ 'lastToLast' ] and versions[ 'lastToLast' ].endswith( '_0' ):
00235                     versions[ 'lastToLast' ] = '' # no alternative :-(
00236                 cmsswVersion = versions[ 'lastToLast' ]
00237             else:
00238                 cmsswVersion = versions[ 'last' ]
00239 
00240         # Debugging output
00241         if debug:
00242             print '%s DEBUG: Called with...'%( self._label )
00243             for key in self._parameters.keys():
00244                print '    %s:\t'%( key ),
00245                print self._parameters[ key ].value,
00246                if self._parameters[ key ].value is self.getDefaultParameters()[ key ].value:
00247                    print ' (default)'
00248                else:
00249                    print
00250                if key == 'cmsswVersion' and cmsswVersion != self._parameters[ key ].value:
00251                    if formerVersion:
00252                        print '    ==> modified to last to last valid release %s (s. \'formerVersion\' parameter)'%( cmsswVersion )
00253                    else:
00254                        print '    ==> modified to last valid release %s'%( cmsswVersion )
00255 
00256         # Check domain
00257         domain = socket.getfqdn().split( '.' )
00258         domainSE = ''
00259         if len( domain ) == 0:
00260             print '%s INFO : Cannot determine domain of this computer'%( self._label )
00261             if debug:
00262                 self.messageEmptyList()
00263             return filePaths
00264         elif os.uname()[0] == "Darwin":
00265             print '%s INFO : Running on MacOSX without direct access to RelVal files.'%( self._label )
00266             if debug:
00267                 self.messageEmptyList()
00268             return filePaths
00269         elif len( domain ) == 1:
00270             print '%s INFO : Running on local host \'%s\' without direct access to RelVal files'%( self._label, domain[ 0 ] )
00271             if debug:
00272                 self.messageEmptyList()
00273             return filePaths
00274         if not ( ( domain[ -2 ] == 'cern' and domain[ -1 ] == 'ch' ) or ( domain[ -2 ] == 'fnal' and domain[ -1 ] == 'gov' ) ):
00275             print '%s INFO : Running on site \'%s.%s\' without direct access to RelVal files'%( self._label, domain[ -2 ], domain[ -1 ] )
00276             if debug:
00277                 self.messageEmptyList()
00278             return filePaths
00279         if domain[ -2 ] == 'cern':
00280             domainSE = 'T2_CH_CERN'
00281         elif domain[ -2 ] == 'fnal':
00282             domainSE = 'T1_US_FNAL_MSS'
00283         if debug:
00284             print '%s DEBUG: Running at site \'%s.%s\''%( self._label, domain[ -2 ], domain[ -1 ] )
00285             print '%s DEBUG: Looking for SE \'%s\''%( self._label, domainSE )
00286 
00287         # Find files
00288         validVersion = 0
00289         dataset    = ''
00290         datasetAll = '/%s/%s-%s-v*/%s'%( relVal, cmsswVersion, globalTag, dataTier )
00291         if useDAS:
00292             if debug:
00293                 print '%s DEBUG: Using DAS query'%( self._label )
00294             dasLimit = numberOfFiles
00295             if dasLimit <= 0:
00296                 dasLimit += 1
00297             for version in range( maxVersions, 0, -1 ):
00298                 filePaths    = []
00299                 filePathsTmp = []
00300                 fileCount    = 0
00301                 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
00302                 dasQuery = 'file dataset=%s | grep file.name'%( dataset )
00303                 if debug:
00304                     print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
00305                     print '    \'%s\''%( dasQuery )
00306                 # partially stolen from das_client.py for option '--format=plain', needs filter ("grep") in the query
00307                 dasData     = das_client.get_data( 'https://cmsweb.cern.ch', dasQuery, 0, dasLimit, False )
00308                 jsondict    = json.loads( dasData )
00309                 if debug:
00310                     print '%s DEBUG: Received DAS data:'%( self._label )
00311                     print '    \'%s\''%( dasData )
00312                     print '%s DEBUG: Determined JSON dictionary:'%( self._label )
00313                     print '    \'%s\''%( jsondict )
00314                 if jsondict[ 'status' ] != 'ok':
00315                     print 'There was a problem while querying DAS with query \'%s\'. Server reply was:\n %s' % (dasQuery, dasData)
00316                     exit( 1 )
00317                 mongo_query = jsondict[ 'mongo_query' ]
00318                 filters     = mongo_query[ 'filters' ]
00319                 data        = jsondict[ 'data' ]
00320                 if debug:
00321                     print '%s DEBUG: Query in JSON dictionary:'%( self._label )
00322                     print '    \'%s\''%( mongo_query )
00323                     print '%s DEBUG: Filters in query:'%( self._label )
00324                     print '    \'%s\''%( filters )
00325                     print '%s DEBUG: Data in JSON dictionary:'%( self._label )
00326                     print '    \'%s\''%( data )
00327                 for row in data:
00328                     filePath = [ r for r in das_client.get_value( row, filters ) ][ 0 ]
00329                     if debug:
00330                         print '%s DEBUG: Testing file entry \'%s\''%( self._label, filePath )
00331                     if len( filePath ) > 0:
00332                         if validVersion != version:
00333                             dasTest         = das_client.get_data( 'https://cmsweb.cern.ch', 'site dataset=%s | grep site.name'%( dataset ), 0, 999, False )
00334                             jsontestdict    = json.loads( dasTest )
00335                             mongo_testquery = jsontestdict[ 'mongo_query' ]
00336                             testfilters = mongo_testquery[ 'filters' ]
00337                             testdata    = jsontestdict[ 'data' ]
00338                             if debug:
00339                                 print '%s DEBUG: Received DAS data (site test):'%( self._label )
00340                                 print '    \'%s\''%( dasTest )
00341                                 print '%s DEBUG: Determined JSON dictionary (site test):'%( self._label )
00342                                 print '    \'%s\''%( jsontestdict )
00343                                 print '%s DEBUG: Query in JSON dictionary (site test):'%( self._label )
00344                                 print '    \'%s\''%( mongo_testquery )
00345                                 print '%s DEBUG: Filters in query (site test):'%( self._label )
00346                                 print '    \'%s\''%( testfilters )
00347                                 print '%s DEBUG: Data in JSON dictionary (site test):'%( self._label )
00348                                 print '    \'%s\''%( testdata )
00349                             foundSE = False
00350                             for testrow in testdata:
00351                                 siteName = [ tr for tr in das_client.get_value( testrow, testfilters ) ][ 0 ]
00352                                 if siteName == domainSE:
00353                                     foundSE = True
00354                                     break
00355                             if not foundSE:
00356                                 if debug:
00357                                     print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
00358                                 break
00359                             validVersion = version
00360                             if debug:
00361                                 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
00362                         if numberOfFiles == 0:
00363                             break
00364                         # protect from double entries ( 'unique' flag in query does not work here)
00365                         if not filePath in filePathsTmp:
00366                             filePathsTmp.append( filePath )
00367                             if debug:
00368                                 print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
00369                             fileCount += 1
00370                             # needed, since and "limit" overrides "idx" in 'get_data' (==> "idx" set to '0' rather than "skipFiles")
00371                             if fileCount > skipFiles:
00372                                 filePaths.append( filePath )
00373                         elif debug:
00374                             print '%s DEBUG: File \'%s\' found again'%( self._label, filePath )
00375                 if validVersion > 0:
00376                     if numberOfFiles == 0 and debug:
00377                         print '%s DEBUG: No files requested'%( self._label )
00378                     break
00379         else:
00380             if debug:
00381                 print '%s DEBUG: Using DBS query'%( self._label )
00382             for version in range( maxVersions, 0, -1 ):
00383                 filePaths = []
00384                 fileCount = 0
00385                 dataset = '/%s/%s-%s-v%i/%s'%( relVal, cmsswVersion, globalTag, version, dataTier )
00386                 dbsQuery = 'find file where dataset = %s'%( dataset )
00387                 if debug:
00388                     print '%s DEBUG: Querying dataset \'%s\' with'%( self._label, dataset )
00389                     print '    \'%s\''%( dbsQuery )
00390                 foundSE = False
00391                 for line in os.popen( 'dbs search --query="%s"'%( dbsQuery ) ):
00392                     if line.find( '.root' ) != -1:
00393                         if validVersion != version:
00394                             if not foundSE:
00395                                 dbsSiteQuery = 'find dataset where dataset = %s and site = %s'%( dataset, domainSE )
00396                                 if debug:
00397                                     print '%s DEBUG: Querying site \'%s\' with'%( self._label, domainSE )
00398                                     print '    \'%s\''%( dbsSiteQuery )
00399                                 for lineSite in os.popen( 'dbs search --query="%s"'%( dbsSiteQuery ) ):
00400                                     if lineSite.find( dataset ) != -1:
00401                                         foundSE = True
00402                                         break
00403                             if not foundSE:
00404                                 if debug:
00405                                     print '%s DEBUG: Possible version \'v%s\' not available on SE \'%s\''%( self._label, version, domainSE )
00406                                 break
00407                             validVersion = version
00408                             if debug:
00409                                 print '%s DEBUG: Valid version set to \'v%i\''%( self._label, validVersion )
00410                         if numberOfFiles == 0:
00411                             break
00412                         filePath = line.replace( '\n', '' )
00413                         if debug:
00414                             print '%s DEBUG: File \'%s\' found'%( self._label, filePath )
00415                         fileCount += 1
00416                         if fileCount > skipFiles:
00417                             filePaths.append( filePath )
00418                         if not numberOfFiles < 0:
00419                             if numberOfFiles <= len( filePaths ):
00420                                 break
00421                 if validVersion > 0:
00422                     if numberOfFiles == 0 and debug:
00423                         print '%s DEBUG: No files requested'%( self._label )
00424                     break
00425 
00426         # Check output and return
00427         if validVersion == 0:
00428             print '%s INFO : No RelVal file(s) found at all in datasets \'%s*\' on SE \'%s\''%( self._label, datasetAll, domainSE )
00429             if debug:
00430                 self.messageEmptyList()
00431         elif len( filePaths ) == 0:
00432             print '%s INFO : No RelVal file(s) picked up in dataset \'%s\''%( self._label, dataset )
00433             if debug:
00434                 self.messageEmptyList()
00435         elif len( filePaths ) < numberOfFiles:
00436             print '%s INFO : Only %i RelVal file(s) instead of %i picked up in dataset \'%s\''%( self._label, len( filePaths ), numberOfFiles, dataset )
00437 
00438         if debug:
00439             print '%s DEBUG: returning %i file(s):\n%s'%( self._label, len( filePaths ), filePaths )
00440         return filePaths
00441 
00442 pickRelValInputFiles = PickRelValInputFiles()