CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_4_1_8_patch9/src/RecoVertex/BeamSpotProducer/scripts/BeamSpotWorkflow.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 #____________________________________________________________
00003 #
00004 #  BeamSpotWorkflow
00005 #
00006 # A very complicate way to automatize the beam spot workflow
00007 #
00008 # Francisco Yumiceva, Lorenzo Uplegger
00009 # yumiceva@fnal.gov, uplegger@fnal.gov
00010 #
00011 # Fermilab, 2010
00012 #
00013 #____________________________________________________________
00014 
00015 """
00016    BeamSpotWorkflow.py
00017 
00018    A very complicate script to upload the results into the DB
00019 
00020    usage: %prog -d <data file/directory> -t <tag name>
00021    -c, --cfg = CFGFILE : Use a different configuration file than the default
00022    -l, --lock = LOCK   : Create a lock file to have just one script running 
00023    -o, --overwrite     : Overwrite results files when copying.
00024    -T, --Test          : Upload files to Test dropbox for data validation.   
00025    -u, --upload        : Upload files to offline drop box via scp.
00026    -z, --zlarge        : Enlarge sigmaZ to 10 +/- 0.005 cm.
00027 
00028    Francisco Yumiceva (yumiceva@fnal.gov)
00029    Lorenzo Uplegger   (send an email to Francisco)
00030    Fermilab 2010
00031    
00032 """
00033 
00034 
00035 import sys,os
00036 import commands, re, time
00037 import datetime
00038 import ConfigParser
00039 import xmlrpclib
00040 from BeamSpotObj import BeamSpot
00041 from IOVObj import IOV
00042 from CommonMethods import *
00043 
00044 try: # FUTURE: Python 2.6, prior to 2.6 requires simplejson
00045     import json
00046 except:
00047     try:
00048         import simplejson as json
00049     except:
00050         error = "Please set a crab environment in order to get the proper JSON lib"
00051         exit(error)
00052 
00053 #####################################################################################
00054 # General functions
00055 #####################################################################################
00056 def getLastUploadedIOV(tagName,destDB="oracle://cms_orcoff_prod/CMS_COND_31X_BEAMSPOT"):
00057     #return 582088327592295
00058     listIOVCommand = "cmscond_list_iov -c " + destDB + " -P /afs/cern.ch/cms/DB/conddb -t " + tagName 
00059     dbError = commands.getstatusoutput( listIOVCommand )
00060     if dbError[0] != 0 :
00061         if dbError[1].find("metadata entry \"" + tagName + "\" does not exist") != -1:
00062             print "Creating a new tag because I got the following error contacting the DB"
00063             print dbError[1]
00064             return 1
00065             #return 133928
00066         else:
00067             exit("ERROR: Can\'t connect to db because:\n" + dbError[1])
00068 
00069 
00070     aCommand = listIOVCommand + " | grep DB= | tail -1 | awk \'{print $1}\'"
00071     output = commands.getstatusoutput( aCommand )
00072     
00073     #WARNING when we pass to lumi IOV this should be long long
00074     if output[1] == '':
00075       exit("ERROR: The tag " + tagName + " exists but I can't get the value of the last IOV")
00076       
00077     return long(output[1])
00078 
00079 ########################################################################
00080 def getListOfFilesToProcess(dataSet,lastRun=-1):
00081     queryCommand = "dbs --search --query \"find file where dataset=" + dataSet
00082     if lastRun != -1:
00083         queryCommand = queryCommand + " and run > " + str(lastRun)
00084     queryCommand = queryCommand + "\" | grep .root"    
00085 #    print " >> " + queryCommand
00086     output = commands.getstatusoutput( queryCommand )
00087     return output[1].split('\n')
00088 
00089 ########################################################################
00090 def getNumberOfFilesToProcessForRun(dataSet,run):
00091     queryCommand = "dbs --search --query \"find file where dataset=" + dataSet + " and run = " + str(run) + "\" | grep .root"
00092     #print " >> " + queryCommand
00093     output = commands.getstatusoutput( queryCommand )
00094     if output[0] != 0:
00095         return 0
00096     else:
00097         return len(output[1].split('\n'))
00098 
00099 ########################################################################
00100 def getListOfRunsAndLumiFromDBS(dataSet,lastRun=-1):
00101     datasetList = dataSet.split(',')
00102     outputList = []
00103     for data in datasetList:
00104         queryCommand = "dbs --search --query \"find run,lumi where dataset=" + data
00105         if lastRun != -1:
00106             queryCommand = queryCommand + " and run > " + str(lastRun)
00107         queryCommand += "\""
00108         print " >> " + queryCommand
00109         output = []
00110         for i in range(0,3):
00111             output = commands.getstatusoutput( queryCommand )
00112             if output[0] == 0 and not (output[1].find("ERROR") != -1 or output[1].find("Error") != -1) :
00113                 break
00114         if output[0] != 0:
00115             exit("ERROR: I can't contact DBS for the following reason:\n" + output[1])
00116         #print output[1]
00117         tmpList = output[1].split('\n')
00118         for file in tmpList:
00119             outputList.append(file)
00120     runsAndLumis = {}
00121     for out in outputList:
00122         regExp = re.search('(\d+)\s+(\d+)',out)
00123         if regExp:
00124             run  = long(regExp.group(1))
00125             lumi = long(regExp.group(2))
00126             if not run in runsAndLumis:
00127                 runsAndLumis[run] = []
00128             runsAndLumis[run].append(lumi)
00129 
00130 #    print runsAndLumis
00131 #    exit("ok")
00132     return runsAndLumis
00133 
00134 ########################################################################
00135 def getListOfRunsAndLumiFromRR(lastRun=-1):
00136     RunReg  ="https://pccmsdqm04.cern.ch/runregistry"
00137     #RunReg  = "https://localhost:40010/runregistry"
00138     #Dataset=%Online%
00139     Group   = "Collisions10"
00140 
00141     # get handler to RR XML-RPC server
00142     FULLADDRESS=RunReg + "/xmlrpc"
00143     #print "RunRegistry from: ",FULLADDRESS
00144     server = xmlrpclib.ServerProxy(FULLADDRESS)
00145     #sel_runtable="{groupName} ='" + Group + "' and {runNumber} > " + str(lastRun) + " and {datasetName} LIKE '" + Dataset + "'"
00146     sel_runtable="{groupName} ='" + Group + "' and {runNumber} > " + str(lastRun) 
00147     sel_dcstable="{groupName} ='" + Group + "' and {runNumber} > " + str(lastRun) + " and {parDcsBpix} = 1 and {parDcsFpix} = 1 and {parDcsTibtid} = 1 and {parDcsTecM} = 1 and {parDcsTecP} = 1 and {parDcsTob} = 1 and {parDcsEbminus} = 1 and {parDcsEbplus} = 1 and {parDcsEeMinus} = 1 and {parDcsEePlus} = 1 and {parDcsEsMinus} = 1 and {parDcsEsPlus} = 1 and {parDcsHbheA} = 1 and {parDcsHbheB} = 1 and {parDcsHbheC} = 1 and {parDcsH0} = 1 and {parDcsHf} = 1"
00148 
00149     tries = 0;
00150     while tries<10:
00151         try:
00152             run_data = server.DataExporter.export('RUN'           , 'GLOBAL', 'csv_runs', sel_runtable)
00153             dcs_data = server.DataExporter.export('RUNLUMISECTION', 'GLOBAL', 'json'    , sel_dcstable)
00154             break
00155         except:
00156             print "Something wrong in accessing runregistry, retrying in 5s...."
00157             tries += 1
00158             time.sleep(5)
00159         if tries==10:
00160             error = "Run registry unaccessible.....exiting now"
00161             exit(error)
00162     
00163 
00164     listOfRuns=[]
00165     for line in run_data.split("\n"):
00166         run=line.split(',')[0]
00167         if run.isdigit():
00168             listOfRuns.append(run)
00169 
00170 
00171     selected_dcs={}
00172     jsonList=json.loads(dcs_data)
00173 
00174     #for element in jsonList:
00175     for element in listOfRuns:
00176         #if element in listOfRuns:
00177         if element in jsonList:
00178             selected_dcs[long(element)]=jsonList[element]
00179         else:
00180             print "WARNING: Run " + element + " is a collision10 run with 0 lumis in Run Registry!" 
00181             selected_dcs[long(element)]= [[]] 
00182     #print selected_dcs        
00183     return selected_dcs
00184 
00185 ########################################################################
00186 def getLastClosedRun(DBSListOfFiles):
00187     runs = []
00188     for file in DBSListOfFiles:
00189         runNumber = getRunNumberFromDBSName(file)
00190         if runs.count(runNumber) == 0: 
00191             runs.append(runNumber)
00192 
00193     if len(runs) <= 1: #No closed run
00194         return -1
00195     else:
00196         runs.sort()
00197         return long(runs[len(runs)-2])
00198     
00199 ########################################################################
00200 def getRunNumberFromFileName(fileName):
00201 #    regExp = re.search('(\D+)_(\d+)_(\d+)_(\d+)',fileName)
00202     regExp = re.search('(\D+)_(\d+)_(\d+)_',fileName)
00203     if not regExp:
00204         return -1
00205     return long(regExp.group(3))
00206 
00207 ########################################################################
00208 def getRunNumberFromDBSName(fileName):
00209     regExp = re.search('(\D+)/(\d+)/(\d+)/(\d+)/(\D+)',fileName)
00210     if not regExp:
00211         return -1
00212     return long(regExp.group(3)+regExp.group(4))
00213     
00214 ########################################################################
00215 def getNewRunList(fromDir,lastUploadedIOV):
00216     newRunList = []
00217     listOfFiles = ls(fromDir,".txt")
00218     runFileMap = {}
00219     for fileName in listOfFiles:
00220         runNumber = getRunNumberFromFileName(fileName) 
00221         if runNumber > lastUploadedIOV:
00222             newRunList.append(fileName)
00223     return newRunList        
00224 
00225 ########################################################################
00226 def selectFilesToProcess(listOfRunsAndLumiFromDBS,listOfRunsAndLumiFromRR,newRunList,runListDir,dataSet,mailList,dbsTolerance,dbsTolerancePercent,rrTolerance,missingFilesTolerance,missingLumisTimeout):
00227     runsAndLumisProcessed = {}
00228     runsAndFiles = {}
00229     for fileName in newRunList:
00230         file = open(runListDir+fileName)
00231         for line in file:
00232             if line.find("Runnumber") != -1:
00233                 run = long(line.replace('\n','').split(' ')[1])
00234             elif line.find("LumiRange") != -1:
00235                 lumiLine = line.replace('\n','').split(' ')
00236                 begLumi = long(lumiLine[1])
00237                 endLumi = long(lumiLine[3])
00238                 if begLumi != endLumi:
00239                     error = "The lumi range is greater than 1 for run " + str(run) + " " + line + " in file: " + runListDir + fileName
00240                     exit(error)
00241                 else:
00242                     if not run in runsAndLumisProcessed:
00243                         runsAndLumisProcessed[run] = []
00244                     if begLumi in runsAndLumisProcessed[run]:
00245                         print "Lumi " + str(begLumi) + " in event " + str(run) + " already exist. This MUST not happen but right now I will ignore this lumi!"
00246                     else:    
00247                         runsAndLumisProcessed[run].append(begLumi)
00248         if not run in runsAndFiles:
00249             runsAndFiles[run] = []
00250         runsAndFiles[run].append(fileName)    
00251         file.close()
00252 
00253     rrKeys = listOfRunsAndLumiFromRR.keys()
00254     rrKeys.sort()
00255     dbsKeys = listOfRunsAndLumiFromDBS.keys()
00256     dbsKeys.sort()
00257     #I remove the last entry from DBS since I am not sure it is an already closed run!
00258     lastUnclosedRun = dbsKeys.pop()
00259     #print "Last unclosed run: " + str(lastUnclosedRun)
00260     procKeys = runsAndLumisProcessed.keys()
00261     procKeys.sort()
00262     #print "Run Registry:"    
00263     #print rrKeys
00264     #print "DBS:"    
00265     #print dbsKeys
00266     #print "List:"    
00267     #print procKeys
00268     #print lastUnclosedRun
00269     filesToProcess = []
00270     for run in rrKeys:
00271         RRList = []
00272         for lumiRange in listOfRunsAndLumiFromRR[run]:
00273             if lumiRange != []: 
00274                 for l in range(lumiRange[0],lumiRange[1]+1):
00275                     RRList.append(long(l))
00276         if run in procKeys and run < lastUnclosedRun:
00277             #print "run " + str(run) + " is in procKeys"
00278             if not run in dbsKeys and run != lastUnclosedRun:
00279                 error = "Impossible but run " + str(run) + " has been processed and it is also in the run registry but it is not in DBS!" 
00280                 exit(error)
00281             print "Working on run " + str(run)
00282             nFiles = 0
00283             for data in dataSet.split(','):
00284                 nFiles = getNumberOfFilesToProcessForRun(data,run)
00285                 if nFiles != 0:
00286                     break
00287             if len(runsAndFiles[run]) < nFiles:
00288                 print "I haven't processed all files yet : " + str(len(runsAndFiles[run])) + " out of " + str(nFiles) + " for run: " + str(run) 
00289                 if nFiles - len(runsAndFiles[run]) <= missingFilesTolerance:
00290                     timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run)) # resetting this timeout
00291                     timeoutType = timeoutManager("DBS_MISMATCH_Run"+str(run),missingLumisTimeout)
00292                     if timeoutType == 1:
00293                         print "WARNING: I previously set a timeout that expired...I'll continue with the script even if I didn't process all the lumis!"
00294                     else:
00295                         if timeoutType == -1:
00296                             print "WARNING: Setting the DBS_MISMATCH_Run" + str(run) + " timeout because I haven't processed all files!"
00297                         else:
00298                             print "WARNING: Timeout DBS_MISMATCH_Run" + str(run) + " is in progress."
00299                         return filesToProcess
00300                 else:
00301                     timeoutType = timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run),missingLumisTimeout)
00302                     if timeoutType == 1:
00303                         error = "ERROR: I previously set a timeout that expired...I can't continue with the script because there are too many (" + str(nFiles - len(runsAndFiles[run])) + " files missing) and for too long " + str(missingLumisTimeout/3600) + " hours! I will process anyway the runs before this one (" + str(run) + ")"
00304                         sendEmail(mailList,error)
00305                         return filesToProcess
00306                         #exit(error)
00307                     else:
00308                         if timeoutType == -1:
00309                             print "WARNING: Setting the DBS_VERY_BIG_MISMATCH_Run" + str(run) + " timeout because I haven't processed all files!"
00310                         else:
00311                             print "WARNING: Timeout DBS_VERY_BIG_MISMATCH_Run" + str(run) + " is in progress."
00312                         return filesToProcess
00313                     
00314             else:
00315                 timeoutManager("DBS_VERY_BIG_MISMATCH_Run"+str(run))
00316                 timeoutManager("DBS_MISMATCH_Run"+str(run))
00317                 print "I have processed " + str(len(runsAndFiles[run])) + " out of " + str(nFiles) + " files that are in DBS. So I should have all the lumis!" 
00318             errors          = []
00319             badProcessed    = []
00320             badDBSProcessed = []
00321             badDBS          = []
00322             badRRProcessed  = []
00323             badRR           = []
00324             #It is important for runsAndLumisProcessed[run] to be the first because the comparision is not ==
00325             badDBSProcessed,badDBS = compareLumiLists(runsAndLumisProcessed[run],listOfRunsAndLumiFromDBS[run],errors)
00326             for i in range(0,len(errors)):
00327                 errors[i] = errors[i].replace("listA","the processed lumis")
00328                 errors[i] = errors[i].replace("listB","DBS")
00329             #print errors
00330             #print badProcessed
00331             #print badDBS
00332             #exit("ciao")
00333             if len(badDBS) != 0:
00334                 print "This is weird because I processed more lumis than the ones that are in DBS!"
00335             if len(badDBSProcessed) != 0 and run in rrKeys:
00336                 lastError = len(errors)
00337                 #print RRList            
00338                 #It is important for runsAndLumisProcessed[run] to be the first because the comparision is not ==
00339                 badRRProcessed,badRR = compareLumiLists(runsAndLumisProcessed[run],RRList,errors)
00340                 for i in range(0,len(errors)):
00341                     errors[i] = errors[i].replace("listA","the processed lumis")
00342                     errors[i] = errors[i].replace("listB","Run Registry")
00343                 #print errors
00344                 #print badProcessed
00345                 #print badRunRegistry
00346                     
00347                 if len(badRRProcessed) != 0:    
00348                     print "I have not processed some of the lumis that are in the run registry for run: " + str(run)
00349                     for lumi in badDBSProcessed:
00350                         if lumi in badRRProcessed:
00351                             badProcessed.append(lumi)
00352                     lenA = len(badProcessed)
00353                     lenB = len(RRList)
00354                     if 100.*lenA/lenB <= dbsTolerancePercent:
00355                         print "WARNING: I didn't process " + str(100.*lenA/lenB) + "% of the lumis but I am within the " + str(dbsTolerancePercent) + "% set in the configuration. Which corrispond to " + str(lenA) + " out of " + str(lenB) + " lumis"
00356                         #print errors
00357                         badProcessed = []
00358                     elif lenA <= dbsTolerance:
00359                         print "WARNING: I didn't process " + str(lenA) + " lumis but I am within the " + str(dbsTolerance) + " lumis set in the configuration. Which corrispond to " + str(lenA) + " out of " + str(lenB) + " lumis"
00360                         #print errors
00361                         badProcessed = []
00362                     else:    
00363                         error = "ERROR: For run " + str(run) + " I didn't process " + str(100.*lenA/lenB) + "% of the lumis and I am not within the " + str(dbsTolerancePercent) + "% set in the configuration. The number of lumis that I didn't process (" + str(lenA) + " out of " + str(lenB) + ") is greater also than the " + str(dbsTolerance) + " lumis that I can tolerate. I can't process runs >= " + str(run) + " but I'll process the runs before!"
00364                         sendEmail(mailList,error)
00365                         print error
00366                         return filesToProcess
00367                         #exit(errors)
00368                     #return filesToProcess
00369                 elif len(errors) != 0:
00370                     print "The number of lumi sections processed didn't match the one in DBS but they cover all the ones in the Run Registry, so it is ok!"
00371                     #print errors
00372 
00373             #If I get here it means that I passed or the DBS or the RR test            
00374             if len(badProcessed) == 0:
00375                 for file in runsAndFiles[run]:
00376                     filesToProcess.append(file)
00377             else:
00378                 #print errors
00379                 print "This should never happen because if I have errors I return or exit! Run: " + str(run)
00380         else:
00381             error = "Run " + str(run) + " is in the run registry but it has not been processed yet!"
00382             print error
00383             timeoutType = timeoutManager("MISSING_RUNREGRUN_Run"+str(run),missingLumisTimeout)
00384             if timeoutType == 1:
00385                 if len(RRList) <= rrTolerance:
00386                     error = "WARNING: I previously set the MISSING_RUNREGRUN_Run" + str(run) + " timeout that expired...I am missing run " + str(run) + " but it only had " + str(len(RRList)) + " <= " + str(rrTolerance) + " lumis. So I will continue and ignore it... "
00387                     #print listOfRunsAndLumiFromRR[run]
00388                     print error
00389                     #sendEmail(mailList,error)
00390                 else:
00391                     error = "ERROR: I previously set the MISSING_RUNREGRUN_Run" + str(run) + " timeout that expired...I am missing run " + str(run) + " which has " + str(len(RRList)) + " > " + str(rrTolerance) + " lumis. I can't continue but I'll process the runs before this one"
00392                     sendEmail(mailList,error)
00393                     return filesToProcess
00394                     #exit(error)
00395             else:
00396                 if timeoutType == -1:
00397                     print "WARNING: Setting the MISSING_RUNREGRUN_Run" + str(run) + " timeout because I haven't processed a run!"
00398                 else:
00399                     print "WARNING: Timeout MISSING_RUNREGRUN_Run" + str(run) + " is in progress."
00400                 return filesToProcess
00401                     
00402     return filesToProcess
00403 ########################################################################
00404 def compareLumiLists(listA,listB,errors=[],tolerance=0):
00405     lenA = len(listA)
00406     lenB = len(listB)
00407     if lenA < lenB-(lenB*float(tolerance)/100):
00408         errors.append("ERROR: The number of lumi sections is different: listA(" + str(lenA) + ")!=(" + str(lenB) + ")listB")
00409     #else:
00410         #errors.append("Lumi check ok!listA(" + str(lenA) + ")-(" + str(lenB) + ")listB")
00411     #print errors
00412     listA.sort()
00413     listB.sort()
00414     #shorter = lenA
00415     #if lenB < shorter:
00416     #    shorter = lenB
00417     #a = 0
00418     #b = 0
00419     badA = []
00420     badB = []
00421     #print listB
00422     #print listA
00423     #print len(listA)
00424     #print len(listB)
00425     #counter = 1
00426     for lumi in listA:
00427         #print str(counter) + "->" + str(lumi)
00428         #counter += 1
00429         if not lumi in listB:
00430             errors.append("Lumi (" + str(lumi) + ") is in listA but not in listB")
00431             badB.append(lumi)
00432             #print "Bad B: " + str(lumi)
00433     #exit("hola")
00434     for lumi in listB:
00435         if not lumi in listA:
00436             errors.append("Lumi (" + str(lumi) + ") is in listB but not in listA")
00437             badA.append(lumi)
00438             #print "Bad A: " + str(lumi)
00439             
00440     return badA,badB
00441 
00442 ########################################################################
00443 def removeUncompleteRuns(newRunList,dataSet):
00444     processedRuns = {}
00445     for fileName in newRunList:
00446         run = getRunNumberFromFileName(fileName)
00447         if not run in processedRuns:
00448             processedRuns[run] = 0
00449         processedRuns[run] += 1
00450 
00451     for run in processedRuns.keys():   
00452         nFiles = getNumberOfFilesToProcessForRun(dataSet,run)
00453         if processedRuns[run] < nFiles:
00454             print "I haven't processed all files yet : " + str(processedRuns[run]) + " out of " + str(nFiles) + " for run: " + str(run)
00455         else:
00456             print "All files have been processed for run: " + str(run) + " (" + str(processedRuns[run]) + " out of " + str(nFiles) + ")"
00457             
00458 ########################################################################
00459 def aselectFilesToProcess(listOfFilesToProcess,newRunList):
00460     selectedFiles = []
00461     runsToProcess = {}
00462     processedRuns = {}
00463     for file in listOfFilesToProcess:
00464         run = getRunNumberFromDBSName(file)
00465 #        print "To process: " + str(run) 
00466         if run not in runsToProcess:
00467             runsToProcess[run] = 1
00468         else:
00469             runsToProcess[run] = runsToProcess[run] + 1 
00470 
00471     for file in newRunList:
00472         run = getRunNumberFromFileName(file)
00473 #        print "Processed: " + str(run)
00474         if run not in processedRuns:
00475             processedRuns[run] = 1
00476         else:
00477             processedRuns[run] = processedRuns[run] + 1 
00478 
00479     #WARNING: getLastClosedRun MUST also have a timeout otherwise the last run will not be considered
00480     lastClosedRun = getLastClosedRun(listOfFilesToProcess)
00481 #    print "LastClosedRun:-" + str(lastClosedRun) + "-"
00482 
00483     processedRunsKeys = processedRuns.keys()
00484     processedRunsKeys.sort()
00485 
00486     for run in processedRunsKeys:
00487         if run <= lastClosedRun :
00488             print "For run " + str(run) + " I have processed " + str(processedRuns[run]) + " files and in DBS there are " + str(runsToProcess[run]) + " files!"
00489             if not run in runsToProcess:
00490                 exit("ERROR: I have a result file for run " + str(run) + " but it doesn't exist in DBS. Impossible but it happened!")
00491             lumiList = getDBSLumiListForRun(run)
00492             if processedRuns[run] == runsToProcess[run]:
00493                 for file in newRunList:
00494                     if run == getRunNumberFromFileName(file):
00495                         selectedFiles.append(file)
00496             else:
00497                 exit("ERROR: For run " + str(run) + " I have processed " + str(processedRuns[run]) + " files but in DBS there are " + str(runsToProcess[run]) + " files!")
00498     return selectedFiles            
00499 
00500 ########################################################################
00501 def main():
00502     ######### COMMAND LINE OPTIONS ##############
00503     option,args = parse(__doc__)
00504 
00505     ######### Check if there is already a megascript running ########
00506     if option.lock:
00507         setLockName('.' + option.lock)
00508         if checkLock():
00509             print "There is already a megascript runnning...exiting"
00510             return
00511         else:
00512             lock()
00513             
00514 
00515     destDB = 'oracle://cms_orcon_prod/CMS_COND_31X_BEAMSPOT'
00516     if option.Test:
00517         destDB = 'oracle://cms_orcoff_prep/CMS_COND_BEAMSPOT'
00518 
00519     ######### CONFIGURATION FILE ################
00520     cfgFile = "BeamSpotWorkflow.cfg"    
00521     if option.cfg:
00522         cfgFile = option.cfg
00523     configurationFile = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/scripts/" + cfgFile
00524     configuration     = ConfigParser.ConfigParser()
00525     print 'Reading configuration from ', configurationFile
00526     configuration.read(configurationFile)
00527 
00528     sourceDir           = configuration.get('Common','SOURCE_DIR')
00529     archiveDir            = configuration.get('Common','ARCHIVE_DIR')
00530     workingDir            = configuration.get('Common','WORKING_DIR')
00531     databaseTag           = configuration.get('Common','DBTAG')
00532     dataSet               = configuration.get('Common','DATASET')
00533     fileIOVBase           = configuration.get('Common','FILE_IOV_BASE')
00534     dbIOVBase             = configuration.get('Common','DB_IOV_BASE')
00535     dbsTolerance          = float(configuration.get('Common','DBS_TOLERANCE'))
00536     dbsTolerancePercent   = float(configuration.get('Common','DBS_TOLERANCE_PERCENT'))
00537     rrTolerance           = float(configuration.get('Common','RR_TOLERANCE'))
00538     missingFilesTolerance = float(configuration.get('Common','MISSING_FILES_TOLERANCE'))
00539     missingLumisTimeout   = float(configuration.get('Common','MISSING_LUMIS_TIMEOUT'))
00540     mailList              = configuration.get('Common','EMAIL')
00541 
00542     ######### DIRECTORIES SETUP #################
00543     if sourceDir[len(sourceDir)-1] != '/':
00544         sourceDir = sourceDir + '/'
00545     if not dirExists(sourceDir):
00546         error = "ERROR: The source directory " + sourceDir + " doesn't exist!"
00547         sendEmail(mailList,error)
00548         exit(error)
00549 
00550     if archiveDir[len(archiveDir)-1] != '/':
00551         archiveDir = archiveDir + '/'
00552     if not os.path.isdir(archiveDir):
00553         os.mkdir(archiveDir)
00554 
00555     if workingDir[len(workingDir)-1] != '/':
00556         workingDir = workingDir + '/'
00557     if not os.path.isdir(workingDir):
00558         os.mkdir(workingDir)
00559     else:
00560         os.system("rm -f "+ workingDir + "*") 
00561 
00562 
00563     print "Getting last IOV for tag: " + databaseTag
00564     lastUploadedIOV = 1
00565     if destDB == "oracle://cms_orcon_prod/CMS_COND_31X_BEAMSPOT": 
00566         lastUploadedIOV = getLastUploadedIOV(databaseTag)
00567     else:
00568         lastUploadedIOV = getLastUploadedIOV(databaseTag,destDB)
00569         
00570     #lastUploadedIOV = 133885
00571     #lastUploadedIOV = 575216380019329
00572     if dbIOVBase == "lumiid":
00573         lastUploadedIOV = unpackLumiid(lastUploadedIOV)["run"]
00574 
00575     ######### Get list of files processed after the last IOV  
00576     print "Getting list of files processed after IOV " + str(lastUploadedIOV)
00577     newProcessedRunList      = getNewRunList(sourceDir,lastUploadedIOV)
00578     if len(newProcessedRunList) == 0:
00579         exit("There are no new runs after " + str(lastUploadedIOV))
00580 
00581     ######### Copy files to archive directory
00582     print "Copying files to archive directory"
00583     copiedFiles = []
00584     for i in range(3):
00585         copiedFiles = cp(sourceDir,archiveDir,newProcessedRunList)    
00586         if len(copiedFiles) == len(newProcessedRunList):
00587             break;
00588     if len(copiedFiles) != len(newProcessedRunList):
00589         error = "ERROR: I can't copy more than " + str(len(copiedFiles)) + " files out of " + str(len(newProcessedRunList)) 
00590         sendEmail(mailList,error)
00591         exit(error)
00592 
00593 
00594     ######### Get from DBS the list of files after last IOV    
00595     #listOfFilesToProcess = getListOfFilesToProcess(dataSet,lastUploadedIOV) 
00596     print "Getting list of files from DBS"
00597     listOfRunsAndLumiFromDBS = getListOfRunsAndLumiFromDBS(dataSet,lastUploadedIOV)
00598     if len(listOfRunsAndLumiFromDBS) == 0:
00599        exit("There are no files in DBS to process") 
00600     print "Getting list of files from RR"
00601     listOfRunsAndLumiFromRR  = getListOfRunsAndLumiFromRR(lastUploadedIOV) 
00602     ######### Get list of files to process for DB
00603     #selectedFilesToProcess = selectFilesToProcess(listOfFilesToProcess,copiedFiles)
00604     #completeProcessedRuns = removeUncompleteRuns(copiedFiles,dataSet)
00605     #print copiedFiles
00606     #print completeProcessedRuns
00607     #exit("complete")
00608     print "Getting list of files to process"
00609     selectedFilesToProcess = selectFilesToProcess(listOfRunsAndLumiFromDBS,listOfRunsAndLumiFromRR,copiedFiles,archiveDir,dataSet,mailList,dbsTolerance,dbsTolerancePercent,rrTolerance,missingFilesTolerance,missingLumisTimeout)
00610     if len(selectedFilesToProcess) == 0:
00611        exit("There are no files to process")
00612         
00613     #print selectedFilesToProcess
00614     ######### Copy files to working directory
00615     print "Copying files from archive to working directory"
00616     copiedFiles = []
00617     for i in range(3):
00618         copiedFiles = cp(archiveDir,workingDir,selectedFilesToProcess)    
00619         if len(copiedFiles) == len(selectedFilesToProcess):
00620             break;
00621         else:
00622             commands.getstatusoutput("rm -rf " + workingDir)
00623     if len(copiedFiles) != len(selectedFilesToProcess):
00624         error = "ERROR: I can't copy more than " + str(len(copiedFiles)) + " files out of " + str(len(selectedFilesToProcess)) + " from " + archiveDir + " to " + workingDir 
00625         sendEmail(mailList,error)
00626         exit(error)
00627 
00628     print "Sorting and cleaning beamlist"
00629     beamSpotObjList = []
00630     for fileName in copiedFiles:
00631         readBeamSpotFile(workingDir+fileName,beamSpotObjList,fileIOVBase)
00632 
00633     sortAndCleanBeamList(beamSpotObjList,fileIOVBase)
00634 
00635     if len(beamSpotObjList) == 0:
00636         error = "WARNING: None of the processed and copied payloads has a valid fit so there are no results. This shouldn't happen since we are filtering using the run register, so there should be at least one good run."
00637         exit(error)
00638 
00639     payloadFileName = "PayloadFile.txt"
00640 
00641     runBased = False
00642     if dbIOVBase == "runnumber":
00643         runBased = True
00644         
00645     payloadList = createWeightedPayloads(workingDir+payloadFileName,beamSpotObjList,runBased)
00646     if len(payloadList) == 0:
00647         error = "WARNING: I wasn't able to create any payload even if I have some BeamSpot objects."
00648         exit(error)
00649        
00650 
00651     tmpPayloadFileName = workingDir + "SingleTmpPayloadFile.txt"
00652     tmpSqliteFileName  = workingDir + "SingleTmpSqliteFile.db"
00653 
00654     writeDBTemplate = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/test/write2DB_template.py"
00655     readDBTemplate  = os.getenv("CMSSW_BASE") + "/src/RecoVertex/BeamSpotProducer/test/readDB_template.py"
00656     payloadNumber = -1
00657     iovSinceFirst = '0';
00658     iovTillLast   = '0';
00659 
00660     #Creating the final name for the combined sqlite file
00661     uuid = commands.getstatusoutput('uuidgen -t')[1]
00662     final_sqlite_file_name = databaseTag + '@' + uuid
00663     sqlite_file     = workingDir + final_sqlite_file_name + ".db"
00664     metadata_file   = workingDir + final_sqlite_file_name + ".txt"
00665 
00666     for payload in payloadList:
00667         payloadNumber += 1
00668         if option.zlarge:
00669             payload.sigmaZ = 10
00670             payload.sigmaZerr = 2.5e-05
00671         tmpFile = file(tmpPayloadFileName,'w')
00672         dumpValues(payload,tmpFile)
00673         tmpFile.close()
00674         if not writeSqliteFile(tmpSqliteFileName,databaseTag,dbIOVBase,tmpPayloadFileName,writeDBTemplate,workingDir):
00675             error = "An error occurred while writing the sqlite file: " + tmpSqliteFileName
00676             exit(error)
00677         readSqliteFile(tmpSqliteFileName,databaseTag,readDBTemplate,workingDir)
00678         
00679         ##############################################################
00680         #WARNING I am not sure if I am packing the right values
00681         if dbIOVBase == "runnumber":
00682             iov_since = str(payload.Run)
00683             iov_till  = iov_since
00684         elif dbIOVBase == "lumiid":
00685             iov_since = str( pack(int(payload.Run), int(payload.IOVfirst)) )
00686             iov_till  = str( pack(int(payload.Run), int(payload.IOVlast)) )
00687         elif dbIOVBase == "timestamp":
00688             error = "ERROR: IOV " + dbIOVBase + " still not implemented."
00689             exit(error)
00690         else:
00691             error = "ERROR: IOV " + dbIOVBase + " unrecognized!"
00692             exit(error)
00693 
00694         if payloadNumber == 0:
00695             iovSinceFirst = iov_since
00696         if payloadNumber == len(payloadList)-1:
00697             iovTillLast   = iov_till
00698             
00699         appendSqliteFile(final_sqlite_file_name + ".db", tmpSqliteFileName, databaseTag, iov_since, iov_till ,workingDir)
00700         os.system("rm -f " + tmpPayloadFileName + " " + tmpSqliteFileName)
00701 
00702         
00703     #### CREATE payload for merged output
00704 
00705     print " create MERGED payload card for dropbox ..."
00706 
00707     dfile = open(metadata_file,'w')
00708 
00709     dfile.write('destDB '  + destDB        +'\n')
00710     dfile.write('tag '     + databaseTag   +'\n')
00711     dfile.write('inputtag'                 +'\n')
00712     dfile.write('since '   + iovSinceFirst +'\n')
00713     #dfile.write('till '    + iov_till      +'\n')
00714     dfile.write('Timetype '+ dbIOVBase     +'\n')
00715 
00716     ###################################################
00717     # WARNING tagType forced to offline
00718     print "WARNING TAG TYPE forced to be just offline"
00719     tagType = "offline"
00720     checkType = tagType
00721     if tagType == "express":
00722         checkType = "hlt"
00723     dfile.write('IOVCheck ' + checkType + '\n')
00724     dfile.write('usertext Beam spot position\n')
00725             
00726     dfile.close()
00727 
00728                                                                                                 
00729 
00730     if option.upload:
00731         print " scp files to offline Drop Box"
00732         dropbox = "/DropBox"
00733         if option.Test:
00734             dropbox = "/DropBox_test"
00735         print "UPLOADING TO TEST DB"
00736         uploadSqliteFile(workingDir, final_sqlite_file_name, dropbox)
00737                    
00738     archive_sqlite_file_name = "Payloads_" + iovSinceFirst + "_" + iovTillLast + "_" + final_sqlite_file_name
00739     archive_results_file_name = "Payloads_" + iovSinceFirst + "_" + iovTillLast + "_" + databaseTag + ".txt"
00740     if not os.path.isdir(archiveDir + 'payloads'):
00741         os.mkdir(archiveDir + 'payloads')
00742     commands.getstatusoutput('mv ' + sqlite_file   + ' ' + archiveDir + 'payloads/' + archive_sqlite_file_name + '.db')
00743     commands.getstatusoutput('mv ' + metadata_file + ' ' + archiveDir + 'payloads/' + archive_sqlite_file_name + '.txt')
00744     commands.getstatusoutput('cp ' + workingDir + payloadFileName + ' ' + archiveDir + 'payloads/' + archive_results_file_name)
00745   
00746     print archiveDir + "payloads/" + archive_sqlite_file_name + '.db'
00747     print archiveDir + "payloads/" + archive_sqlite_file_name + '.txt'
00748 
00749     rmLock()
00750     
00751 if __name__ == '__main__':
00752     main()