d2/d13/submitPVResolutionJobs_8py_source.html

 #!/usr/bin/env python3
 '''
 Submits per run Primary Vertex Resoltion Alignment validation using the split vertex method,
 usage:

 submitPVResolutionJobs.py -i PVResolutionExample.ini -D /JetHT/Run2018C-TkAlMinBias-12Nov2019_UL2018-v2/ALCARECO
 '''

 from __future__ import print_function

 __author__ = 'Marco Musich'
 __copyright__ = 'Copyright 2020, CERN CMS'
 __credits__ = ['Ernesto Migliore', 'Salvatore Di Guida']
 __license__ = 'Unknown'
 __maintainer__ = 'Marco Musich'
 __email__ = 'marco.musich@cern.ch'
 __version__ = 1

 import os,sys
 import getopt
 import time
 import json
 import ROOT
 import urllib
 import string
 import subprocess
 import pprint
 import warnings
 from subprocess import Popen, PIPE
 import multiprocessing
 from optparse import OptionParser
 import os, shlex, shutil, getpass
 import configparser as ConfigParser

 CopyRights  = '##################################\n'
 CopyRights += '#    submitPVVResolutioJobs.py   #\n'
 CopyRights += '#      marco.musich@cern.ch      #\n'
 CopyRights += '#         October 2020           #\n'
 CopyRights += '##################################\n'


 def get_status_output(*args, **kwargs):

     p = subprocess.Popen(*args, **kwargs)
     stdout, stderr = p.communicate()
     return p.returncode, stdout, stderr


 def check_proxy():

     """Check if GRID proxy has been initialized."""

     try:
         with open(os.devnull, "w") as dump:
             subprocess.check_call(["voms-proxy-info", "--exists"],
                                   stdout = dump, stderr = dump)
     except subprocess.CalledProcessError:
         return False
     return True


 def forward_proxy(rundir):

     """Forward proxy to location visible from the batch system.
     Arguments:
     - `rundir`: directory for storing the forwarded proxy
     """

     if not check_proxy():
         print("Please create proxy via 'voms-proxy-init -voms cms -rfc'.")
         sys.exit(1)

     local_proxy = subprocess.check_output(["voms-proxy-info", "--path"]).strip()
     shutil.copyfile(local_proxy, os.path.join(rundir,".user_proxy"))


 def getFilesForRun(blob):

     """
     returns the list of list files associated with a given dataset for a certain run
     """
     cmd2 = ' dasgoclient -limit=0 -query \'file run='+blob[0]+' dataset='+blob[1]+'\''
     q = Popen(cmd2 , shell=True, stdout=PIPE, stderr=PIPE)
     out, err = q.communicate()
     outputList = out.decode().split('\n')
     outputList.pop()
     return outputList


 def write_HTCondor_submit_file(path, name, nruns, proxy_path=None):

     """Writes 'job.submit' file in `path`.
     Arguments:
     - `path`: job directory
     - `script`: script to be executed
     - `proxy_path`: path to proxy (only used in case of requested proxy forward)
     """

     job_submit_template="""\
 universe              = vanilla
 executable            = {script:s}
 output                = {jobm:s}/{out:s}.out
 error                 = {jobm:s}/{out:s}.err
 log                   = {jobm:s}/{out:s}.log
 transfer_output_files = ""
 +JobFlavour           = "{flavour:s}"
 queue {njobs:s}
 """
     if proxy_path is not None:
         job_submit_template += """\
 +x509userproxy        = "{proxy:s}"
 """

     job_submit_file = os.path.join(path, "job_"+name+".submit")
     with open(job_submit_file, "w") as f:
         f.write(job_submit_template.format(script = os.path.join(path,name+"_$(ProcId).sh"),
                                            out  = name+"_$(ProcId)",
                                            jobm = os.path.abspath(path),
                                            flavour = "tomorrow",
                                            njobs = str(nruns),
                                            proxy = proxy_path))

     return job_submit_file


 def getLuminosity(homedir,minRun,maxRun,isRunBased,verbose):

     """Expects something like
     +-------+------+--------+--------+-------------------+------------------+
     | nfill | nrun | nls    | ncms   | totdelivered(/fb) | totrecorded(/fb) |
     +-------+------+--------+--------+-------------------+------------------+
     | 73    | 327  | 142418 | 138935 | 19.562            | 18.036           |
     +-------+------+--------+--------+-------------------+------------------+
     And extracts the total recorded luminosity (/b).
     """
     myCachedLumi={}
     if(not isRunBased):
         return myCachedLumi

     try:


         output = subprocess.check_output([homedir+"/.local/bin/brilcalc", "lumi", "-b", "STABLE BEAMS","-u", "/pb", "--begin", str(minRun),"--end",str(maxRun),"--output-style","csv"])
     except:
         warnings.warn('ATTENTION! Impossible to query the BRIL DB!')
         return myCachedLumi

     if(verbose):
         print("INSIDE GET LUMINOSITY")
         print(output)

     for line in output.decode().split("\n"):
         if ("#" not in line):
             runToCache  = line.split(",")[0].split(":")[0]
             lumiToCache = line.split(",")[-1].replace("\r", "")
             #print("run",runToCache)
             #print("lumi",lumiToCache)
             myCachedLumi[runToCache] = lumiToCache

     if(verbose):
         print(myCachedLumi)
     return myCachedLumi


 def isInJSON(run,jsonfile):

     try:
         with open(jsonfile, 'r') as myJSON:            jsonDATA = json.load(myJSON)
             return (run in jsonDATA)
     except:
         warnings.warn('ATTENTION! Impossible to find lumi mask! All runs will be used.')
         return True


 def as_dict(config):

     dictionary = {}
     for section in config.sections():
         dictionary[section] = {}
         for option in config.options(section):
             dictionary[section][option] = config.get(section, option)

     return dictionary


 def batchScriptCERN(theCMSSW_BASE, cfgdir, runindex, eosdir, lumiToRun, key, config, tkCollection, isUnitTest=False):

     '''prepare the batch script, to run on HTCondor'''
     script = """#!/bin/bash
 CMSSW_DIR={CMSSW_BASE_DIR}/src/Alignment/OfflineValidation/test
 echo "The mother directory is $CMSSW_DIR"
 export X509_USER_PROXY=$CMSSW_DIR/.user_proxy
 #OUT_DIR=$CMSSW_DIR/harvest ## for local storage
 OUT_DIR={MYDIR}
 LOG_DIR=$CMSSW_DIR/out
 LXBATCH_DIR=$PWD
 # Check if CMSSW environment is set by checking CMSSW_BASE or other variables
 if [[ -z "$CMSSW_BASE" || -z "$CMSSW_VERSION" || -z "$SCRAM_ARCH" ]]; then
     echo "CMSSW environment not detected. Sourcing scramv1 runtime..."
     cd $CMSSW_DIR
     # Assuming you have a valid CMSSW release environment to source
     source /cvmfs/cms.cern.ch/cmsset_default.sh
     eval $(scramv1 runtime -sh)  # This sets the CMSSW environment
 else
     echo "CMSSW environment is already set. Continuing..."
 fi
 cd $LXBATCH_DIR
 cp -pr {CFGDIR}/PrimaryVertexResolution_{KEY}_{runindex}_cfg.py .
 cmsRun PrimaryVertexResolution_{KEY}_{runindex}_cfg.py TrackCollection={TRKS} GlobalTag={GT} lumi={LUMITORUN} {REC} {EXT} >& log_{KEY}_run{runindex}.out
 # Print the contents of the current directory using $PWD and echo
 echo "Contents of the current directory ($PWD):"
 echo "$(ls -lh "$PWD")"
 """.format(CMSSW_BASE_DIR=theCMSSW_BASE,
            CFGDIR=cfgdir,
            runindex=runindex,
            MYDIR=eosdir,
            KEY=key,
            LUMITORUN=lumiToRun,
            TRKS=tkCollection,
            GT=config['globaltag'],
            EXT="external="+config['external'] if 'external' in config.keys() else "",
            REC="records="+config['records'] if 'records' in config.keys() else "")

     if not isUnitTest:
         script += """for payloadOutput in $(ls *root ); do xrdcp -f $payloadOutput root://eoscms/$OUT_DIR/pvresolution_{KEY}_{runindex}.root ; done
 tar czf log_{KEY}_run{runindex}.tgz log_{KEY}_run{runindex}.out
 for logOutput in $(ls *tgz ); do cp $logOutput $LOG_DIR/ ; done
 """.format(KEY=key, runindex=runindex)

     return script


 def mkdir_eos(out_path):
     print("creating",out_path)
     newpath='/'
     for dir in out_path.split('/'):
         newpath=os.path.join(newpath,dir)
         # do not issue mkdir from very top of the tree
         if newpath.find('test_out') > 0:
             command="eos mkdir "+newpath
             p = subprocess.Popen(command,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             (out, err) = p.communicate()
             #print(out,err)
             p.wait()

     # now check that the directory exists
     command2="eos ls "+out_path
     p = subprocess.Popen(command2,shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     (out, err) = p.communicate()
     p.wait()
     if p.returncode !=0:
         print(out)


 def main():


     desc="""This is a description of %prog."""
     parser = OptionParser(description=desc,version='%prog version 0.1')
     parser.add_option('-s','--submit',  help='job submitted',       dest='submit',      action='store_true', default=False)
     parser.add_option('-j','--jobname', help='task name',           dest='taskname',    action='store',      default='myTask')
     parser.add_option('-i','--init',    help='ini file',            dest='iniPathName', action='store',      default="default.ini")
     parser.add_option('-b','--begin',   help='starting point',      dest='start',       action='store',      default='1')
     parser.add_option('-e','--end',     help='ending point',        dest='end',         action='store',      default='999999')
     parser.add_option('-D','--Dataset', help='dataset to run upon', dest='DATASET',     action='store',      default='/StreamExpressAlignment/Run2017F-TkAlMinBias-Express-v1/ALCARECO')
     parser.add_option('-v','--verbose', help='verbose output',      dest='verbose',     action='store_true', default=False)
     parser.add_option('-u','--unitTest',help='unit tests?',         dest='isUnitTest',  action='store_true', default=False)
     (opts, args) = parser.parse_args()

     global CopyRights
     print('\n'+CopyRights)

     input_CMSSW_BASE = os.environ.get('CMSSW_BASE')


     USER = os.environ.get('USER')
     HOME = os.environ.get('HOME')
     eosdir=os.path.join("/store/group/alca_trackeralign",USER,"test_out",opts.taskname)
     if opts.submit:
         mkdir_eos(eosdir)
     else:
         print("Not going to create EOS folder. -s option has not been chosen")


     try:
         config = ConfigParser.ConfigParser()
         config.read(opts.iniPathName)
     except ConfigParser.MissingSectionHeaderError as e:
         raise WrongIniFormatError(e)

     print("Parsed the following configuration \n\n")
     inputDict = as_dict(config)
     pprint.pprint(inputDict)

     if(not bool(inputDict)):
         raise SystemExit("\n\n ERROR! Could not parse any input file, perhaps you are submitting this from the wrong folder? \n\n")


     forward_proxy(".")

     #runs = commands.getstatusoutput("dasgoclient -query='run dataset="+opts.DATASET+"'")[1].split("\n")
     runs  = get_status_output("dasgoclient -query='run dataset="+opts.DATASET+"'",shell=True, stdout=PIPE, stderr=PIPE)[1].decode().split("\n")
     runs.pop()
     runs.sort()
     print("\n\n Will run on the following runs: \n",runs)

     # List of directories to create
     directories = ["cfg", "BASH", "harvest", "out"]

     for directory in directories:
         os.makedirs(directory, exist_ok=True)

     cwd = os.getcwd()
     bashdir = os.path.join(cwd,"BASH")
     cfgdir = os.path.join(cwd,"cfg")

     runs.sort()


     if(len(runs)==0):
         if(opts.isUnitTest):
             print('\n')
             print('=' * 70)
             print("|| WARNING: won't run on any run, probably DAS returned an empty query,\n|| but that's fine because this is a unit test!")
             print('=' * 70)
             print('\n')
             sys.exit(0)
         else:
             raise Exception('Will not run on any run.... please check again the configuration')
     else:
         # get from the DB the int luminosities
         myLumiDB = getLuminosity(HOME,runs[0],runs[-1],True,opts.verbose)

     if(opts.verbose):
         pprint.pprint(myLumiDB)

     lumimask = inputDict["Input"]["lumimask"]
     print("\n\n Using JSON file:",lumimask)

     tkCollection = inputDict["Input"]["trackcollection"]
     print("\n\n Using trackCollection:", tkCollection)

     mytuple=[]
     print("\n\n First run:",opts.start,"last run:",opts.end)

     for run in runs:
         if (int(run)<int(opts.start) or int(run)>int(opts.end)):
             print("excluding run",run)
             continue

         if not isInJSON(run,lumimask):
             continue

         else:
             print("'======> taking run",run)
             mytuple.append((run,opts.DATASET))

         #print mytuple

     pool = multiprocessing.Pool(processes=20)  # start 20 worker processes
     count = pool.map(getFilesForRun,mytuple)

     if(opts.verbose):
         print("printing count")
         pprint.pprint(count)

     # limit the runs in the dictionary to the filtered ones
     file_info = dict(zip([run for run, _ in mytuple], count))

     if(opts.verbose):
         print("printing file_info")
         pprint.pprint(file_info)

     count=0
     for run in runs:
         #if(count>10):
         #    continue
         #run = run.strip("[").strip("]")

         if (int(run)<int(opts.start) or int(run)>int(opts.end)):
             print("excluding",run)
             continue

         if not isInJSON(run,lumimask):
             print("=====> excluding run:",run)
             continue

         count=count+1
         files = file_info[run]
         if(opts.verbose):
             print(run, files)
         listOfFiles='['
         for ffile in files:
             listOfFiles=listOfFiles+"\""+str(ffile)+"\","
         listOfFiles+="]"

         #print(listOfFiles)

         theLumi='1'
         if (run) in myLumiDB:
             theLumi = myLumiDB[run]
             print("run",run," int. lumi:",theLumi,"/pb")
         else:
             print("=====> COULD NOT FIND LUMI, setting default = 1/pb")
             theLumi='1'
             print("run",run," int. lumi:",theLumi,"/pb")

         # loop on the dictionary
         for key, value in inputDict.items():
             #print(key,value)
             if "Input" in key:
                 continue
             else:
                 key = key.split(":", 1)[1]
                 print("dealing with",key)

             # Paths and variables
             template_file = os.path.join(input_CMSSW_BASE, "src/Alignment/OfflineValidation/test/PrimaryVertexResolution_templ_cfg.py")
             output_file = f"./cfg/PrimaryVertexResolution_{key}_{run}_cfg.py"

             # Copy the template file to the destination
             shutil.copy(template_file, output_file)

             # Read and replace placeholders in the copied file
             with open(output_file, 'r') as file:
                 content = file.read()

             # Replace placeholders with actual values
             content = content.replace("XXX_FILES_XXX", listOfFiles)
             content = content.replace("XXX_RUN_XXX", run)
             content = content.replace("YYY_KEY_YYY", key)

             # Write the modified content back to the file
             with open(output_file, 'w') as file:
                 file.write(content)

             scriptFileName = os.path.join(bashdir,"batchHarvester_"+key+"_"+str(count-1)+".sh")
             scriptFile = open(scriptFileName,'w')
             scriptFile.write(batchScriptCERN(input_CMSSW_BASE,cfgdir,run,eosdir,theLumi,key,value,tkCollection,opts.isUnitTest))
             scriptFile.close()
             #os.system('chmod +x %s' % scriptFileName)


     for key, value in inputDict.items():
         if "Input" in key:
             continue
         else:
             key = key.split(":", 1)[1]

         job_submit_file = write_HTCondor_submit_file(bashdir,"batchHarvester_"+key,count,None)
         os.system("chmod u+x "+bashdir+"/*.sh")

         if opts.submit:
             submissionCommand = "condor_submit "+job_submit_file
             print(submissionCommand)
             os.system(submissionCommand)


 if __name__ == "__main__":
     main()
 submitPVResolutionJobs.isInJSON
def isInJSON(run, jsonfile)
Definition: submitPVResolutionJobs.py:167

submitPVResolutionJobs.get_status_output
def get_status_output(args, kwargs)
Definition: submitPVResolutionJobs.py:42

nano_mu_digi_cff.strip
strip
Definition: nano_mu_digi_cff.py:40

submitPVResolutionJobs.getLuminosity
def getLuminosity(homedir, minRun, maxRun, isRunBased, verbose)
Definition: submitPVResolutionJobs.py:126

reco::zip
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const &tracks, int32_t i)
Definition: TracksSoA.h:90

python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444

submitPVResolutionJobs.mkdir_eos
def mkdir_eos(out_path)
method to create recursively directories on EOS
Definition: submitPVResolutionJobs.py:238

submitPVResolutionJobs.as_dict
def as_dict(config)
Definition: submitPVResolutionJobs.py:178

submitPVResolutionJobs.forward_proxy
def forward_proxy(rundir)
Definition: submitPVResolutionJobs.py:62

createfilelist.int
int
Definition: createfilelist.py:10

submitPVResolutionJobs.batchScriptCERN
def batchScriptCERN(theCMSSW_BASE, cfgdir, runindex, eosdir, lumiToRun, key, config, tkCollection, isUnitTest=False)
Definition: submitPVResolutionJobs.py:189

submitPVResolutionJobs.main
def main()
Definition: submitPVResolutionJobs.py:260

print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47

submitPVResolutionJobs.getFilesForRun
def getFilesForRun(blob)
Definition: submitPVResolutionJobs.py:77

submitPVResolutionJobs.check_proxy
def check_proxy()
Definition: submitPVResolutionJobs.py:49

submitPVValidationJobs.split
def split(sequence, size)
Definition: submitPVValidationJobs.py:353

nano_mu_local_reco_cff.bool
bool
Definition: nano_mu_local_reco_cff.py:14

Exception

main
Definition: main.py:1

format

edm::decode
bool decode(bool &, std::string_view)
Definition: types.cc:72

str
#define str(s)
Definition: TestProcessor.cc:56

ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::if
if(threadIdxLocalY==0 &&threadIdxLocalX==0)
Definition: CAPixelDoubletsAlgos.h:176

submitPVResolutionJobs.write_HTCondor_submit_file
def write_HTCondor_submit_file(path, name, nruns, proxy_path=None)
Definition: submitPVResolutionJobs.py:90