CMS 3D CMS Logo

makePileupJSON.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import argparse
4 import RecoLuminosity.LumiDB.LumiConstants as LumiConstants
5 import re
6 from math import sqrt
7 import six
8 
9 
16 
17 # This script takes a .csv file containing the per-BX luminosity values from brilcalc and processes them to
18 # produce an output file containing the average and RMS pileup for each lumi section. This can then be fed
19 # into pileupCalc.py to calculate a final pileup histogram. For more documentation see:
20 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData
21 
22 # modified from the estimatePileup.py script in RecoLuminosity/LumiDB
23 # originally 5 Jan, 2012 Mike Hildreth
24 
25 if __name__ == '__main__':
27 
28  parser = argparse.ArgumentParser(description="Script to estimate average and RMS pileup using the per-bunch luminosity information provided by brilcalc. The output is a JSON file containing a dictionary by runs with one entry per lumi section.")
29  parser.add_argument('inputFile', help='CSV input file as produced from brilcalc')
30  parser.add_argument('outputFile', help='Name of JSON output file')
31  parser.add_argument('-b', '--selBX', metavar='BXLIST', help='Comma-separated list of BXs to use (will use all by default)')
32  parser.add_argument('-n', '--no-threshold', action='store_true', help='By default, to avoid including spurious luminosity from afterglow in the pileup calculation, bunches with luminosity below a given threshold are excluded. This threshold is 8.0/ub/LS for HFOC at nominal energy, 2.0/ub/LS for HFOC at low energy, and 1.2/ub/LS for other luminometers. If the input data has already been preprocessed (e.g. by using the --xingTr argument in brilcalc) to exclude these bunches, or if you are running on special data with low overall luminosity, then use this flag to disable the application of the threshold.')
33  args = parser.parse_args()
34 
35  output = args.outputFile
36 
37  sel_bx = set()
38  if args.selBX:
39  for ibx in args.selBX.split(","):
40  try:
41  bx = int(ibx)
42  sel_bx.add(bx)
43  except:
44  print(ibx,"is not an int")
45  print("Processing",args.inputFile,"with selected BXs:", sorted(sel_bx))
46  else:
47  print("Processing",args.inputFile,"with all BX")
48 
49  # The "CSV" file actually is a little complicated, since we also want to split on the colons separating
50  # run/fill as well as the spaces separating the per-BX information.
51  sepRE = re.compile(r'[\]\[\s,;:]+')
52  csv_input = open(args.inputFile, 'r')
53  last_run = -1
54 
55  last_valid_lumi = []
56 
57  output_line = '{'
58  for line in csv_input:
59  if line[0] == '#':
60  continue # skip comment lines
61 
62  pieces = sepRE.split(line.strip())
63 
64  if len(pieces) < 15:
65  # The most likely cause of this is that we're using a csv file without the bunch data, so might as well
66  # just give up now.
67  raise RuntimeError("Not enough fields in input line; maybe you forgot to include --xing in your brilcalc command?\n"+line)
68  try:
69  run = int(pieces[0])
70  lumi_section = int(pieces[2])
71  beam_energy = float(pieces[10])
72  #tot_del_lumi = float(pieces[11])
73  #tot_rec_lumi = float(pieces[12])
74  luminometer = pieces[14]
75 
76  if luminometer == "HFOC":
77  if beam_energy > 3000:
78  # Use higher threshold for nominal runs otherwise we'll get a lot of junk.
79  threshold = 8.0
80  else:
81  # Use lower threshold for 5.02 GeV runs since the overall lumi is quite low.
82  threshold = 2.0
83  else:
84  threshold = 1.2
85 
86  xing_lumi_array = []
87  for bxid, bunch_del_lumi, bunch_rec_lumi in zip(pieces[15::3], pieces[16::3], pieces[17::3]):
88  if sel_bx and int(bxid) not in sel_bx:
89  continue
90  if args.no_threshold or float(bunch_del_lumi) > threshold:
91  xing_lumi_array.append([int(bxid), float(bunch_del_lumi), float(bunch_rec_lumi)])
92  except:
93  print("Failed to parse line: check if the input format has changed")
94  print(pieces[0],pieces[1],pieces[2],pieces[3],pieces[4],pieces[5],pieces[6],pieces[7],pieces[8],pieces[9])
95  continue
96 
97  # In principle we could also have a check for if len(pieces) == 15 (i.e. no BX data is present) but
98  # luminosity is present, which implies we're using a luminometer without BX data. In this case we
99  # could just extrapolate from the previous good LS (just scaling the pileup by the ratio of
100  # luminosity). In practice this is an extremely small number of lumi sections, and in 2018 the 14
101  # lumisections in normtag_PHYSICS without BX data (all RAMSES) are all in periods with zero recorded
102  # lumi, so they don't affect the resulting pileup at all. So for run 2 I haven't bothered to implement
103  # this.
104 
105  if run != last_run:
106  # the script also used to add a dummy LS at the end of runs but this is not necessary in run 2
107  if last_run > 0:
108  output_line = output_line[:-1] + '], '
109  last_run = run
110  output_line += ('\n"%d":' % run )
111  output_line += ' ['
112 
113  # Now do the actual parsing.
114  total_lumi = 0
115  total_int = 0
116  total_int2 = 0
117  total_weight2 = 0
118 
119  # first loop to get sum for (weighted) mean
120  for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
121  total_lumi += bunch_rec_lumi
122  # this will eventually be_pileup*bunch_rec_lumi but it's quicker to apply the factor once outside the loop
123  total_int += bunch_del_lumi*bunch_rec_lumi
124  # filled_xings = len(xing_lumi_array)
125 
126  # convert sum to pileup and get the mean
127  total_int *= parameters.orbitLength / parameters.lumiSectionLength
128  if total_lumi > 0:
129  mean_int = total_int/total_lumi
130  else:
131  mean_int = 0
132 
133  # second loop to get (weighted) RMS
134  for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
135  mean_pileup = bunch_del_lumi * parameters.orbitLength / parameters.lumiSectionLength
136  if mean_pileup > 100:
137  print("mean number of pileup events > 100 for run %d, lum %d : m %f l %f" % \
138  (runNumber, lumi_section, mean_pileup, bunch_del_lumi))
139  #print "mean number of pileup events for lum %d: m %f idx %d l %f" % (lumi_section, mean_pileup, bxid, bunch_rec_lumi)
140 
141  total_int2 += bunch_rec_lumi*(mean_pileup-mean_int)*(mean_pileup-mean_int)
142  total_weight2 += bunch_rec_lumi*bunch_rec_lumi
143 
144  # compute final RMS and write it out
145  #print " LS, Total lumi, filled xings %d, %f, %d" %(lumi_section,total_lumi,filled_xings)
146  bunch_rms_lumi = 0
147  denom = total_lumi*total_lumi-total_weight2
148  if total_lumi > 0 and denom > 0:
149  bunch_rms_lumi = sqrt(total_lumi*total_int2/denom)
150 
151  output_line += "[%d,%2.4e,%2.4e,%2.4e]," % (lumi_section, total_lumi, bunch_rms_lumi, mean_int)
152  last_valid_lumi = [lumi_section, total_lumi, bunch_rms_lumi, mean_int]
153 
154  output_line = output_line[:-1] + ']}'
155  csv_input.close()
156 
157  outputfile = open(output,'w')
158  if not outputfile:
159  raise RuntimeError("Could not open '%s' as an output JSON file" % output)
160 
161  outputfile.write(output_line)
162  outputfile.close()
163  print("Output written to", output)
dqmMemoryStats.float
float
Definition: dqmMemoryStats.py:127
mathSSE::sqrt
T sqrt(T t)
Definition: SSEVec.h:19
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
LumiConstants.ParametersObject
Definition: LumiConstants.py:4
createfilelist.int
int
Definition: createfilelist.py:10
ComparisonHelper::zip
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
Definition: L1TStage2CaloLayer1.h:41