CMS 3D CMS Logo

makePileupJSON.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 from __future__ import print_function
3 import argparse
4 import RecoLuminosity.LumiDB.LumiConstants as LumiConstants
5 import re
6 from math import sqrt
7 
8 
15 
16 # This script takes a .csv file containing the per-BX luminosity values from brilcalc and processes them to
17 # produce an output file containing the average and RMS pileup for each lumi section. This can then be fed
18 # into pileupCalc.py to calculate a final pileup histogram. For more documentation see:
19 # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJSONFileforData
20 
21 # modified from the estimatePileup.py script in RecoLuminosity/LumiDB
22 # originally 5 Jan, 2012 Mike Hildreth
23 
24 if __name__ == '__main__':
26 
27  parser = argparse.ArgumentParser(description="Script to estimate average and RMS pileup using the per-bunch luminosity information provided by brilcalc. The output is a JSON file containing a dictionary by runs with one entry per lumi section.")
28  parser.add_argument('inputFile', help='CSV input file as produced from brilcalc')
29  parser.add_argument('outputFile', help='Name of JSON output file')
30  parser.add_argument('-b', '--selBX', metavar='BXLIST', help='Comma-separated list of BXs to use (will use all by default)')
31  parser.add_argument('-n', '--no-threshold', action='store_true', help='By default, to avoid including spurious luminosity from afterglow in the pileup calculation, bunches with luminosity below a given threshold are excluded. This threshold is 8.0/ub/LS for HFOC at nominal energy, 2.0/ub/LS for HFOC at low energy, and 1.2/ub/LS for other luminometers. If the input data has already been preprocessed (e.g. by using the --xingTr argument in brilcalc) to exclude these bunches, or if you are running on special data with low overall luminosity, then use this flag to disable the application of the threshold.')
32  args = parser.parse_args()
33 
34  output = args.outputFile
35 
36  sel_bx = set()
37  if args.selBX:
38  for ibx in args.selBX.split(","):
39  try:
40  bx = int(ibx)
41  sel_bx.add(bx)
42  except:
43  print(ibx,"is not an int")
44  print("Processing",args.inputFile,"with selected BXs:", sorted(sel_bx))
45  else:
46  print("Processing",args.inputFile,"with all BX")
47 
48  # The "CSV" file actually is a little complicated, since we also want to split on the colons separating
49  # run/fill as well as the spaces separating the per-BX information.
50  sepRE = re.compile(r'[\]\[\s,;:]+')
51  csv_input = open(args.inputFile, 'r')
52  last_run = -1
53 
54  last_valid_lumi = []
55 
56  output_line = '{'
57  for line in csv_input:
58  if line[0] == '#':
59  continue # skip comment lines
60 
61  pieces = sepRE.split(line.strip())
62 
63  if len(pieces) < 15:
64  # The most likely cause of this is that we're using a csv file without the bunch data, so might as well
65  # just give up now.
66  raise RuntimeError("Not enough fields in input line; maybe you forgot to include --xing in your brilcalc command?\n"+line)
67  try:
68  run = int(pieces[0])
69  lumi_section = int(pieces[2])
70  beam_energy = float(pieces[10])
71  #tot_del_lumi = float(pieces[11])
72  #tot_rec_lumi = float(pieces[12])
73  luminometer = pieces[14]
74 
75  if luminometer == "HFOC":
76  if beam_energy > 3000:
77  # Use higher threshold for nominal runs otherwise we'll get a lot of junk.
78  threshold = 8.0
79  else:
80  # Use lower threshold for 5.02 GeV runs since the overall lumi is quite low.
81  threshold = 2.0
82  else:
83  threshold = 1.2
84 
85  xing_lumi_array = []
86  for bxid, bunch_del_lumi, bunch_rec_lumi in zip(pieces[15::3], pieces[16::3], pieces[17::3]):
87  if sel_bx and int(bxid) not in sel_bx:
88  continue
89  if args.no_threshold or float(bunch_del_lumi) > threshold:
90  xing_lumi_array.append([int(bxid), float(bunch_del_lumi), float(bunch_rec_lumi)])
91  except:
92  print("Failed to parse line: check if the input format has changed")
93  print(pieces[0],pieces[1],pieces[2],pieces[3],pieces[4],pieces[5],pieces[6],pieces[7],pieces[8],pieces[9])
94  continue
95 
96  # In principle we could also have a check for if len(pieces) == 15 (i.e. no BX data is present) but
97  # luminosity is present, which implies we're using a luminometer without BX data. In this case we
98  # could just extrapolate from the previous good LS (just scaling the pileup by the ratio of
99  # luminosity). In practice this is an extremely small number of lumi sections, and in 2018 the 14
100  # lumisections in normtag_PHYSICS without BX data (all RAMSES) are all in periods with zero recorded
101  # lumi, so they don't affect the resulting pileup at all. So for run 2 I haven't bothered to implement
102  # this.
103 
104  if run != last_run:
105  # the script also used to add a dummy LS at the end of runs but this is not necessary in run 2
106  if last_run > 0:
107  output_line = output_line[:-1] + '], '
108  last_run = run
109  output_line += ('\n"%d":' % run )
110  output_line += ' ['
111 
112  # Now do the actual parsing.
113  total_lumi = 0
114  total_int = 0
115  total_int2 = 0
116  total_weight2 = 0
117 
118  # first loop to get sum for (weighted) mean
119  for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
120  total_lumi += bunch_rec_lumi
121  # this will eventually be_pileup*bunch_rec_lumi but it's quicker to apply the factor once outside the loop
122  total_int += bunch_del_lumi*bunch_rec_lumi
123  # filled_xings = len(xing_lumi_array)
124 
125  # convert sum to pileup and get the mean
126  total_int *= parameters.orbitLength / parameters.lumiSectionLength
127  if total_lumi > 0:
128  mean_int = total_int/total_lumi
129  else:
130  mean_int = 0
131 
132  # second loop to get (weighted) RMS
133  for bxid, bunch_del_lumi, bunch_rec_lumi in xing_lumi_array:
134  mean_pileup = bunch_del_lumi * parameters.orbitLength / parameters.lumiSectionLength
135  if mean_pileup > 100:
136  print("mean number of pileup events > 100 for run %d, lum %d : m %f l %f" % \
137  (runNumber, lumi_section, mean_pileup, bunch_del_lumi))
138  #print "mean number of pileup events for lum %d: m %f idx %d l %f" % (lumi_section, mean_pileup, bxid, bunch_rec_lumi)
139 
140  total_int2 += bunch_rec_lumi*(mean_pileup-mean_int)*(mean_pileup-mean_int)
141  total_weight2 += bunch_rec_lumi*bunch_rec_lumi
142 
143  # compute final RMS and write it out
144  #print " LS, Total lumi, filled xings %d, %f, %d" %(lumi_section,total_lumi,filled_xings)
145  bunch_rms_lumi = 0
146  denom = total_lumi*total_lumi-total_weight2
147  if total_lumi > 0 and denom > 0:
148  bunch_rms_lumi = sqrt(total_lumi*total_int2/denom)
149 
150  output_line += "[%d,%2.4e,%2.4e,%2.4e]," % (lumi_section, total_lumi, bunch_rms_lumi, mean_int)
151  last_valid_lumi = [lumi_section, total_lumi, bunch_rms_lumi, mean_int]
152 
153  output_line = output_line[:-1] + ']}'
154  csv_input.close()
155 
156  outputfile = open(output,'w')
157  if not outputfile:
158  raise RuntimeError("Could not open '%s' as an output JSON file" % output)
159 
160  outputfile.write(output_line)
161  outputfile.close()
162  print("Output written to", output)
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
T sqrt(T t)
Definition: SSEVec.h:19
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47