CMS 3D CMS Logo

JetHT.py
Go to the documentation of this file.
1 import copy
2 import os
3 import math
4 import re
5 from datetime import date
6 import Alignment.OfflineValidation.TkAlAllInOneTool.findAndChange as fnc
7 
8 # Find number of files on a file list. If the list defines a run number before each file, find the number of unique runs instead and return a list of runs with the number
9 def findNumberOfUnits(fileList):
10 
11  with open(fileList,"r") as inputFiles:
12 
13  fileContent = inputFiles.readlines()
14  firstLine = fileContent[0].rstrip()
15  runsInFiles = []
16 
17  # If each line only contains one file, return the number of files
18  if len(firstLine.split()) == 1:
19  nInputFiles = sum(1 for line in fileContent if line.rstrip())
20  return runsInFiles, nInputFiles
21 
22  # We now know that the input file is in format "run file". Return the number of unique runs together with the list
23  for line in fileContent:
24  run = line.split()[0]
25  if not run in runsInFiles:
26  runsInFiles.append(run)
27 
28  return runsInFiles, len(runsInFiles)
29 
30 def JetHT(config, validationDir):
31 
32  # List with all and merge jobs
33  jobs = []
34  mergeJobs = []
35  runType = "single"
36 
37  # Find today
38  today = date.today()
39  dayFormat = today.strftime("%Y-%m-%d")
40 
41  # Start with single JetHT jobs
42  if not runType in config["validations"]["JetHT"]:
43  raise Exception("No 'single' key word in config for JetHT")
44 
45  for datasetName in config["validations"]["JetHT"][runType]:
46 
47  for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
48  # Work directory for each alignment
49  workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
50 
51  # Write local config
52  local = {}
53  local["output"] = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
54  local["alignment"] = copy.deepcopy(config["alignments"][alignment])
55  local["validation"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName])
56  local["validation"].pop("alignments")
57 
58  useCMSdataset = False
59  nInputFiles = 1
60  runsInFiles = []
61  if "dataset" in config["validations"]["JetHT"][runType][datasetName]:
62  inputList = config["validations"]["JetHT"][runType][datasetName]["dataset"]
63 
64  # Check if the input is a CMS dataset instead of filelist
65  if re.match( r'^/[^/.]+/[^/.]+/[^/.]+$', inputList ):
66  useCMSdataset = True
67 
68  # If it is not, read the number of files in a given filelist
69  else:
70  runsInFiles, nInputFiles = findNumberOfUnits(inputList)
71  else:
72  inputList = "needToHaveSomeDefaultFileHere.txt"
73 
74  if "filesPerJob" in config["validations"]["JetHT"][runType][datasetName]:
75  filesPerJob = config["validations"]["JetHT"][runType][datasetName]["filesPerJob"]
76  else:
77  filesPerJob = 5
78 
79  # If we have defined which runs can be found from which files, we want to define one condor job for run number. Otherwise we do file based splitting.
80  oneJobForEachRun = (len(runsInFiles) > 0)
81  if oneJobForEachRun:
82  nCondorJobs = nInputFiles
83  local["runsInFiles"] = runsInFiles
84  else:
85  nCondorJobs = math.ceil(nInputFiles / filesPerJob)
86 
87  # Define lines that need to be changed from the template crab configuration
88  crabCustomConfiguration = {"overwrite":[], "remove":[], "add":[]}
89  crabCustomConfiguration["overwrite"].append("inputList = \'{}\'".format(inputList))
90  crabCustomConfiguration["overwrite"].append("jobTag = \'TkAlJetHTAnalysis_{}_{}_{}_{}\'".format(runType, datasetName, alignment, dayFormat))
91  crabCustomConfiguration["overwrite"].append("config.Data.unitsPerJob = {}".format(filesPerJob))
92 
93  # If there is a CMS dataset defined instead of input file list, make corresponding changes in the configuration file
94  if useCMSdataset:
95  crabCustomConfiguration["remove"].append("inputList")
96  crabCustomConfiguration["remove"].append("config.Data.userInputFiles")
97  crabCustomConfiguration["remove"].append("config.Data.totalUnits")
98  crabCustomConfiguration["remove"].append("config.Data.outputPrimaryDataset")
99  crabCustomConfiguration["overwrite"].pop(0) # Remove inputList from overwrite actions, it is removed for CMS dataset
100  crabCustomConfiguration["add"].append("config.Data.inputDataset = \'{}\'".format(inputList))
101  crabCustomConfiguration["add"].append("config.Data.inputDBS = \'global\'")
102 
103  local["crabCustomConfiguration"] = crabCustomConfiguration
104 
105  # Write job info
106  job = {
107  "name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
108  "dir": workDir,
109  "exe": "cmsRun",
110  "cms-config": "{}/src/Alignment/OfflineValidation/python/TkAlAllInOneTool/JetHT_cfg.py".format(os.environ["CMSSW_BASE"]),
111  "run-mode": "Condor",
112  "nCondorJobs": nCondorJobs,
113  "exeArguments": "validation_cfg.py config=validation.json jobNumber=$JOBNUMBER",
114  "dependencies": [],
115  "config": local,
116  }
117 
118  jobs.append(job)
119 
120  # Merge jobs for JetHT
121  if "merge" in config["validations"]["JetHT"]:
122 
123  runType = "merge"
124 
125 
126  for datasetName in config["validations"]["JetHT"][runType]:
127 
128  for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
129 
130  #Work directory for each alignment
131  workDir = "{}/JetHT/{}/{}/{}".format(validationDir, runType, datasetName, alignment)
132 
133  inputDirectory = "{}/{}/JetHT/single/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
134  outputDirectory = "{}/{}/JetHT/{}/{}/{}".format(config["LFS"], config["name"], runType, datasetName, alignment)
135 
136  # Configuration for validateAlignments script
137  local = {}
138  local["output"] = outputDirectory
139 
140  # For eos directory, remove /eos/cms from the beginning of LFS
141  eosInputDirectory = inputDirectory
142  eosOutputDirectory = outputDirectory
143 
144  if inputDirectory.startswith("/eos/cms"):
145  eosInputDirectory = inputDirectory[8:]
146  eosOutputDirectory = outputDirectory[8:]
147 
148  # If the directory name starts with /store, we must be working with eos files
149  localRun = "true"
150  if eosInputDirectory.startswith("/store"):
151  localRun = "false"
152 
153  #Write job info
154  job = {
155  "name": "JetHT_{}_{}_{}".format(runType, alignment, datasetName),
156  "dir": workDir,
157  "exe": "addHistograms.sh",
158  "exeArguments": "{} {} {} JetHTAnalysis_merged".format(localRun, eosInputDirectory, eosOutputDirectory),
159  "run-mode": "Condor",
160  "flavour": "espresso",
161  "config": local,
162  "dependencies": [],
163  }
164 
165 
166  for singleJob in jobs:
167 
168  singleAlignment, singleDatasetName = singleJob["name"].split("_")[2:]
169 
170  if singleDatasetName in config["validations"]["JetHT"][runType][datasetName]["singles"]:
171  if singleAlignment == alignment:
172  job["dependencies"].append(singleJob["name"])
173 
174  mergeJobs.append(job)
175 
176  jobs.extend(mergeJobs)
177 
178  # Plotting for JetHT
179  if "plot" in config["validations"]["JetHT"]:
180 
181  plotJobs = []
182  runType = "plot"
183 
184 
185  for datasetName in config["validations"]["JetHT"][runType]:
186 
187  #Work and output directories for each dataset
188  workDir = "{}/JetHT/{}/{}".format(validationDir, runType, datasetName)
189  outputDirectory = "{}/{}/JetHT/{}/{}".format(config["LFS"], config["name"], runType, datasetName)
190 
191  # Configuration for validateAlignments script
192  local = {}
193  if "jethtplot" in config["validations"]["JetHT"][runType][datasetName]:
194  local["jethtplot"] = copy.deepcopy(config["validations"]["JetHT"][runType][datasetName]["jethtplot"])
195  local["output"] = outputDirectory
196 
197  # If pT binning changed for validation job, need to change it for plotting also
198  if "profilePtBorders" in config["validations"]["JetHT"]["single"][datasetName]:
199  local["jethtplot"]["widePtBinBorders"] = config["validations"]["JetHT"]["single"][datasetName]["profilePtBorders"]
200 
201  local["jethtplot"]["alignments"] = {}
202 
203  # Draw all the alignments for each dataset to same plot
204  for alignment in config["validations"]["JetHT"][runType][datasetName]["alignments"]:
205 
206  inputDirectory = "{}/{}/JetHT/merge/{}/{}".format(config["LFS"], config["name"], datasetName, alignment)
207 
208  eosInputFile = inputDirectory + "/JetHTAnalysis_merged.root"
209 
210  # If eos file path is given, remove /eos/cms from the beginning of the file name
211  if eosInputFile.startswith("/eos/cms"):
212  eosInputFile = eosInputFile[8:]
213 
214  # If the file name starts with /store, add the CERN EOS path to the file name
215  if eosInputFile.startswith("/store"):
216  eosInputFile = "root://eoscms.cern.ch/" + eosInputFile
217 
218  local["jethtplot"]["alignments"][alignment] = copy.deepcopy(config["alignments"][alignment])
219  local["jethtplot"]["alignments"][alignment]["inputFile"] = eosInputFile
220  local["jethtplot"]["alignments"][alignment]["legendText"] = config["alignments"][alignment]["title"]
221 
222  # Check that luminosity per IOV file is defined
223  if not "lumiPerIovFile" in local["jethtplot"]:
224  local["jethtplot"]["lumiPerIovFile"] = fnc.digest_path("Alignment/OfflineValidation/data/lumiPerRun_Run2.txt")
225 
226  #Write job info
227  job = {
228  "name": "JetHT_{}_{}".format(runType, datasetName),
229  "dir": workDir,
230  "exe": "jetHtPlotter",
231  "run-mode": "Condor",
232  "flavour": "espresso",
233  "config": local,
234  "dependencies": [],
235  }
236 
237 
238  for mergeJob in mergeJobs:
239 
240  mergeAlignment, mergeDatasetName = mergeJob["name"].split("_")[2:]
241 
242  if mergeDatasetName in config["validations"]["JetHT"][runType][datasetName]["merges"]:
243  job["dependencies"].append(mergeJob["name"])
244 
245  plotJobs.append(job)
246 
247  jobs.extend(plotJobs)
248 
249  return jobs
def findNumberOfUnits(fileList)
Definition: JetHT.py:9
def JetHT(config, validationDir)
Definition: JetHT.py:30