CMS 3D CMS Logo

dqmiodatasetharvest.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import re
4 import json
5 import ROOT
6 import sqlite3
7 import argparse
8 import subprocess
9 import multiprocessing
10 
11 
12 ROOTPREFIX = "root://cms-xrd-global.cern.ch/"
13 #ROOTPREFIX = "root://eoscms//eos/cms" # for more local files
14 
15 parser = argparse.ArgumentParser(description="Collect a MEs from DQMIO data, with maximum possible granularity")
16 
17 parser.add_argument('dataset', help='dataset name, like "/StreamHIExpress/HIRun2018A-Express-v1/DQMIO"')
18 parser.add_argument('-o', '--output', help='SQLite file to write', default='dqmio.sqlite')
19 parser.add_argument('-j', '--njobs', help='Number of threads to read files', type=int, default=1)
20 parser.add_argument('-l', '--limit', help='Only load up to LIMIT files', type=int, default=-1)
21 args = parser.parse_args()
22 
23 
24 # we can save a lot of time by only scanning some types, if we know all interesting MEs are of these types.
25 interesting_types = {
26  "TH2Fs",
27 }
28 
29 interesting_mes = {
30  "PixelPhase1/Phase1_MechanicalView/PXBarrel/digi_occupancy_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_1",
31  "PixelPhase1/Phase1_MechanicalView/PXBarrel/digi_occupancy_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_2",
32  "PixelPhase1/Phase1_MechanicalView/PXBarrel/digi_occupancy_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_3",
33  "PixelPhase1/Phase1_MechanicalView/PXBarrel/digi_occupancy_per_SignedModuleCoord_per_SignedLadderCoord_PXLayer_4",
34  "PixelPhase1/Phase1_MechanicalView/PXForward/digi_occupancy_per_SignedDiskCoord_per_SignedBladePanelCoord_PXRing_1",
35  "PixelPhase1/Phase1_MechanicalView/PXForward/digi_occupancy_per_SignedDiskCoord_per_SignedBladePanelCoord_PXRing_2",
36  "SiStrip/MechanicalView/TEC/MINUS/wheel_1/TkHMap_NumberValidHits_TECM_W1",
37  "SiStrip/MechanicalView/TEC/MINUS/wheel_2/TkHMap_NumberValidHits_TECM_W2",
38  "SiStrip/MechanicalView/TEC/MINUS/wheel_3/TkHMap_NumberValidHits_TECM_W3",
39  "SiStrip/MechanicalView/TEC/MINUS/wheel_4/TkHMap_NumberValidHits_TECM_W4",
40  "SiStrip/MechanicalView/TEC/MINUS/wheel_5/TkHMap_NumberValidHits_TECM_W5",
41  "SiStrip/MechanicalView/TEC/MINUS/wheel_6/TkHMap_NumberValidHits_TECM_W6",
42  "SiStrip/MechanicalView/TEC/MINUS/wheel_7/TkHMap_NumberValidHits_TECM_W7",
43  "SiStrip/MechanicalView/TEC/MINUS/wheel_8/TkHMap_NumberValidHits_TECM_W8",
44  "SiStrip/MechanicalView/TEC/MINUS/wheel_9/TkHMap_NumberValidHits_TECM_W9",
45  "SiStrip/MechanicalView/TEC/PLUS/wheel_1/TkHMap_NumberValidHits_TECP_W1",
46  "SiStrip/MechanicalView/TEC/PLUS/wheel_2/TkHMap_NumberValidHits_TECP_W2",
47  "SiStrip/MechanicalView/TEC/PLUS/wheel_3/TkHMap_NumberValidHits_TECP_W3",
48  "SiStrip/MechanicalView/TEC/PLUS/wheel_4/TkHMap_NumberValidHits_TECP_W4",
49  "SiStrip/MechanicalView/TEC/PLUS/wheel_5/TkHMap_NumberValidHits_TECP_W5",
50  "SiStrip/MechanicalView/TEC/PLUS/wheel_6/TkHMap_NumberValidHits_TECP_W6",
51  "SiStrip/MechanicalView/TEC/PLUS/wheel_7/TkHMap_NumberValidHits_TECP_W7",
52  "SiStrip/MechanicalView/TEC/PLUS/wheel_8/TkHMap_NumberValidHits_TECP_W8",
53  "SiStrip/MechanicalView/TEC/PLUS/wheel_9/TkHMap_NumberValidHits_TECP_W9",
54  "SiStrip/MechanicalView/TIB/layer_1/TkHMap_NumberValidHits_TIB_L1",
55  "SiStrip/MechanicalView/TIB/layer_2/TkHMap_NumberValidHits_TIB_L2",
56  "SiStrip/MechanicalView/TIB/layer_3/TkHMap_NumberValidHits_TIB_L3",
57  "SiStrip/MechanicalView/TIB/layer_4/TkHMap_NumberValidHits_TIB_L4",
58  "SiStrip/MechanicalView/TID/MINUS/wheel_1/TkHMap_NumberValidHits_TIDM_D1",
59  "SiStrip/MechanicalView/TID/MINUS/wheel_2/TkHMap_NumberValidHits_TIDM_D2",
60  "SiStrip/MechanicalView/TID/MINUS/wheel_3/TkHMap_NumberValidHits_TIDM_D3",
61  "SiStrip/MechanicalView/TID/PLUS/wheel_1/TkHMap_NumberValidHits_TIDP_D1",
62  "SiStrip/MechanicalView/TID/PLUS/wheel_2/TkHMap_NumberValidHits_TIDP_D2",
63  "SiStrip/MechanicalView/TID/PLUS/wheel_3/TkHMap_NumberValidHits_TIDP_D3",
64  "SiStrip/MechanicalView/TOB/layer_1/TkHMap_NumberValidHits_TOB_L1",
65  "SiStrip/MechanicalView/TOB/layer_2/TkHMap_NumberValidHits_TOB_L2",
66  "SiStrip/MechanicalView/TOB/layer_3/TkHMap_NumberValidHits_TOB_L3",
67  "SiStrip/MechanicalView/TOB/layer_4/TkHMap_NumberValidHits_TOB_L4",
68  "SiStrip/MechanicalView/TOB/layer_5/TkHMap_NumberValidHits_TOB_L5",
69  "SiStrip/MechanicalView/TOB/layer_6/TkHMap_NumberValidHits_TOB_L6",
70  "EcalBarrel/EBOccupancyTask/EBOT digi occupancy",
71  "EcalEndcap/EEOccupancyTask/EEOT digi occupancy EE -",
72  "EcalEndcap/EEOccupancyTask/EEOT digi occupancy EE +",
73  "EcalPreshower/ESOccupancyTask/ES Energy Density Z -1 P 1",
74  "EcalPreshower/ESOccupancyTask/ES Energy Density Z -1 P 2",
75  "EcalPreshower/ESOccupancyTask/ES Energy Density Z 1 P 1",
76  "EcalPreshower/ESOccupancyTask/ES Energy Density Z 1 P 2",
77  "Hcal/DigiRunHarvesting/Occupancy/depth/depth1",
78  "Hcal/DigiRunHarvesting/Occupancy/depth/depth2",
79  "Hcal/DigiRunHarvesting/Occupancy/depth/depth3",
80  "Hcal/DigiRunHarvesting/Occupancy/depth/depth4",
81  "CSC/CSCOfflineMonitor/Occupancy/hOStripsAndWiresAndCLCT",
82  "RPC/AllHits/SummaryHistograms/Occupancy_for_Barrel",
83  "RPC/AllHits/SummaryHistograms/Occupancy_for_Endcap",
84  "DT/02-Segments/Wheel-1/numberOfSegments_W-1",
85  "DT/02-Segments/Wheel-2/numberOfSegments_W-2",
86  "DT/02-Segments/Wheel0/numberOfSegments_W0",
87  "DT/02-Segments/Wheel1/numberOfSegments_W1",
88  "DT/02-Segments/Wheel2/numberOfSegments_W2",
89 
90  "L1T/L1TObjects/L1TEGamma/timing/egamma_eta_phi_bx_0",
91  "L1T/L1TObjects/L1TJet/timing/jet_eta_phi_bx_0",
92  "L1T/L1TObjects/L1TMuon/timing/muons_eta_phi_bx_0",
93  "L1T/L1TObjects/L1TTau/timing/tau_eta_phi_bx_0",
94  "L1T/L1TObjects/L1TEGamma/timing/denominator_egamma",
95  "L1T/L1TObjects/L1TJet/timing/denominator_jet",
96  "L1T/L1TObjects/L1TMuon/timing/denominator_muons",
97  "L1T/L1TObjects/L1TTau/timing/denominator_tau",
98 }
99 
100 inf = re.compile("([- \[])inf([,}\]])")
101 nan = re.compile("([- \[])nan([,}\]])")
102 
103 def tosqlite(x):
104  if isinstance(x, ROOT.string):
105  try:
106  return unicode(x.data())
107  except:
108  return buffer(x.data())
109  if isinstance(x, int):
110  return x
111  if isinstance(x, float):
112  return x
113  if isinstance(x, long):
114  return x
115  else:
116  try:
117  rootobj = unicode(ROOT.TBufferJSON.ConvertToJSON(x))
118  # turns out ROOT does not generate valid JSON for NaN/inf
119  clean = nan.sub('\\g<1>0\\g<2>', inf.sub('\\g<1>1e38\\g<2>', rootobj))
120  obj = json.loads(clean)
121  jsonobj = json.dumps(obj, allow_nan=False)
122  return jsonobj
123  except Exception as e:
124  return json.dumps({"root2sqlite_error": e.__repr__(), "root2sqlite_object": x.__repr__()})
125 
126 def dasquery(dataset):
127  if not dataset.endswith("DQMIO"):
128  raise Exception("This tool probably cannot read the dataset you specified. The name should end with DQMIO.")
129  dasquery = ["dasgoclient", "-query=file dataset=%s" % dataset]
130  print("Querying das ... %s" % dasquery)
131  files = subprocess.check_output(dasquery)
132  files = files.splitlines()
133  print("Got %d files." % len(files))
134  return files
135 
136 
137 treenames = {
138  0: "Ints",
139  1: "Floats",
140  2: "Strings",
141  3: "TH1Fs",
142  4: "TH1Ss",
143  5: "TH1Ds",
144  6: "TH2Fs",
145  7: "TH2Ss",
146  8: "TH2Ds",
147  9: "TH3Fs",
148  10: "TProfiles",
149  11: "TProfile2Ds",
150 }
151 
152 maketable = """
153  CREATE TABLE IF NOT EXISTS monitorelements (
154  name,
155  fromrun, fromlumi, torun, tolumi,
156  metype,
157  value
158  ); """
159 makeindex = """
160  CREATE INDEX runorder ON monitorelements(fromrun, fromlumi);
161 """
162 insertinto = """
163  INSERT INTO monitorelements (
164  name,
165  fromrun, fromlumi, torun, tolumi,
166  metype,
167  value
168  ) VALUES (
169  ?, ?, ?, ?, ?, ?, ?
170  ); """
171 dumpmes = """
172  SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;
173 """
174 
175 db = sqlite3.connect(args.output)
176 db.execute(maketable)
177 db.execute(makeindex)
178 
179 def harvestfile(fname):
180  f = ROOT.TFile.Open(ROOTPREFIX + fname)
181  idxtree = getattr(f, "Indices")
182  #idxtree.GetEntry._threaded = True # now the blocking call should release the GIL...
183 
184  # we have no good way to find out which lumis where processed in a job.
185  # so we watch the per-lumi indices and assume that all mentioned lumis
186  # are covered in the end-of-job MEs. This might fail if there are no
187  # per-lumi MEs.
188  knownlumis = set()
189  mes_to_store = []
190 
191  for i in range(idxtree.GetEntries()):
192  idxtree.GetEntry(i)
193  run, lumi, metype = idxtree.Run, idxtree.Lumi, idxtree.Type
194  if lumi != 0:
195  knownlumis.add(lumi)
196 
197  if not treenames[metype] in interesting_types:
198  continue
199 
200 
201  endrun = run # assume no multi-run files for now
202  if lumi == 0: # per-job ME
203  endlumi = max(knownlumis)
204  lumi = min(knownlumis)
205  else:
206  endlumi = lumi
207 
208  # inclusive range -- for 0 entries, row is left out
209  firstidx, lastidx = idxtree.FirstIndex, idxtree.LastIndex
210  metree = getattr(f, treenames[metype])
211  metree.SetBranchStatus("*",0)
212  metree.SetBranchStatus("FullName",1)
213 
214  for x in range(firstidx, lastidx+1):
215  metree.GetEntry(x)
216  mename = str(metree.FullName)
217  if mename in interesting_mes:
218  metree.GetEntry(x, 1)
219  value = metree.Value
220 
221  mes_to_store.append((
222  mename,
223  run, lumi, endrun, endlumi,
224  metype,
225  tosqlite(value),
226  ))
227 
228  return mes_to_store
229 
230 files = dasquery(args.dataset)
231 if args.limit > 0: files = files[:args.limit]
232 
233 pool = multiprocessing.Pool(processes=args.njobs)
234 ctr = 0
235 for mes_to_store in pool.imap_unordered(harvestfile, files):
236 #for mes_to_store in map(harvestfile, files):
237  db.executemany(insertinto, mes_to_store);
238  db.commit()
239  ctr += 1
240  print("Processed %d files of %d, got %d MEs...\r" % (ctr, len(files), len(mes_to_store)), end='')
241 print("\nDone.")
242 
243 sqlite2tree = """
244 // Convert the sqlite format saved above back into a TTree.
245 // Saving TTrees with objects (TH1's) seems to be close to impossible in Python,
246 // so we do the roundtrip via SQLite and JSON in a ROOT macro.
247 // This needs a ROOT with TBufferJSON::FromJSON, which the 6.12 in CMSSW for
248 // for now does not have. We can load a newer version from SFT (on lxplus6,
249 // in (!) a cmsenv):
250 // source /cvmfs/sft.cern.ch/lcg/releases/ROOT/6.16.00-f8770/x86_64-slc6-gcc8-opt/bin/thisroot.sh
251 // root sqlite2tree.C
252 // It is rather slow, but the root file is a lot more compact.
253 
254 int run;
255 int fromlumi;
256 int tolumi;
257 TString* name;
258 TH2F* value;
259 
260 int sqlite2tree() {
261 
262  auto sql = TSQLiteServer("sqlite:///dev/shm/schneiml/CMSSW_10_5_0_pre1/src/dqmio.sqlite");
263  auto query = "SELECT fromlumi, tolumi, fromrun, name, value FROM monitorelements ORDER BY fromrun, fromlumi ASC;";
264  auto res = sql.Query(query);
265 
266  TFile outfile("/dev/shm/dqmio.root", "RECREATE");
267  auto outtree = new TTree("MEs", "MonitorElements by run and lumisection");
268  auto nameb = outtree->Branch("name", &name);
269  auto valueb = outtree->Branch("value", &value,128*1024);
270  auto runb = outtree->Branch("run", &run);
271  auto fromlumib = outtree->Branch("fromlumi",&fromlumi);
272  auto tolumib = outtree->Branch("tolumi", &tolumi);
273 
274 
275  while (auto row = res->Next()) {
276  fromlumi = atoi(row->GetField(0));
277  tolumi = atoi(row->GetField(1));
278  run = atoi(row->GetField(2));
279  name = new TString(row->GetField(3));
280  value = nullptr;
281  TBufferJSON::FromJSON(value, row->GetField(4));
282  outtree->Fill();
283  }
284  return 0;
285 }
286 """
287 
288 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
T min(T a, T b)
Definition: MathUtil.h:58
#define str(s)