CMS 3D CMS Logo

isotrackNtupler.py
Go to the documentation of this file.
1 
7 
8 import uproot3
9 import numpy as np
10 import pandas as pd
11 import argparse
12 import matplotlib.pyplot as plt
13 
14 parser = argparse.ArgumentParser()
15 
16 parser.add_argument("-PU", "--filePU", help="input PU file")
17 parser.add_argument("-NPU", "--fileNPU", help="input no PU file")
18 parser.add_argument("-O", "--opfilename", help="ouput file name")
19 parser.add_argument("-s", "--start", help="start entry for input PU file")
20 parser.add_argument("-e", "--end", help="end entry for input PU file")
21 
22 fName1 = parser.parse_args().filePU
23 fName2 = parser.parse_args().fileNPU
24 foutput = parser.parse_args().opfilename
25 start = parser.parse_args().start
26 stop = parser.parse_args().end
27 
28 # PU
29 tree1 = uproot3.open(fName1)['hcalIsoTrkAnalyzer/CalibTree']
30 
31 #no PU
32 tree2 = uproot3.open(fName2)['hcalIsoTrkAnalyzer/CalibTree']
33 
34 print ("loaded files")
35 
36 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
37 
38 #branchesnpu = ['t_Event','t_ieta','t_iphi','t_eHcal']
39 
40 branchesnpu =['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
41 
42 dictpu = tree1.arrays(branchespu, entrystart=int(start), entrystop=int(stop))
43 
44 npu_entries = tree2.numentries
45 
46 scale = 5000000
47 npu_start = 0
48 i = 0
49 
50 for index in range(0,npu_entries, scale):
51  npu_stop = index+scale
52  if (npu_stop > npu_entries):
53  npu_stop = npu_entries
54  dictnpu = tree2.arrays(branchesnpu, entrystart=npu_start, entrystop=npu_stop)
55  npu_start = npu_stop
56 
57  dfspu = pd.DataFrame.from_dict(dictpu)
58  dfspu.columns=branchespu
59  dfsnpu = pd.DataFrame.from_dict(dictnpu)
60  dfsnpu.columns=branchesnpu
61  print("loaded % of nopile file is =",(npu_stop/npu_entries)*100)
62  print ("PU sample size:",dfspu.shape[0])
63  print ("noPU sample size:",dfsnpu.shape[0])
64 
65  cuts_pu = (dfspu['t_selectTk'])&(dfspu['t_qltyFlag'])&(dfspu['t_hmaxNearP']<20)&(dfspu['t_eMipDR']<1)&(abs(dfspu['t_p'] - 50)<10)&(dfspu['t_eHcal']>10)
66 
67  cuts_npu = (dfsnpu['t_selectTk'])&(dfsnpu['t_qltyFlag'])&(dfsnpu['t_hmaxNearP']<20)&(dfsnpu['t_eMipDR']<1)&(abs(dfsnpu['t_p'] - 50)<10)&(dfsnpu['t_eHcal']>10)
68 
69  dfspu = dfspu.loc[cuts_pu]
70  dfspu = dfspu.reset_index(drop=True)
71 
72  dfsnpu = dfsnpu.loc[cuts_npu]
73  dfsnpu = dfsnpu.reset_index(drop=True)
74  branches_skim = ['t_Event','t_ieta','t_iphi','t_eHcal']
75  dfsnpu = dfsnpu[branches_skim]
76 
77  merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
78  print(merged.keys())
79  print ("selected common events before cut:",merged.shape[0])
80 
81  #cuts = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<10)&(merged['t_eMipDR_y']<1)
82  keepvars = ['t_nVtx','t_ieta','t_eHcal10','t_eHcal30','t_delta','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p','t_eMipDR']
83 
84 
85 
86 
88 
89  merged1=merged
90  #merged1 = merged1.reset_index(drop=True)
91 
92  print ("selected events after cut for all ietas:",merged1.shape[0])
93  merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10']
94  final_df_all = merged1[keepvars]
95  output_file = foutput+'_'+str(i)+"_"+start+"_"+stop+"_all.parquet"
96  final_df_all.to_parquet(output_file)
97  final_df_all.to_csv(foutput+"_"+str(i)+"_"+start+"_"+stop+"_all.txt")
98 
99 
100 
101  cuts2 = abs(merged['t_ieta'])<16
102  merged2=merged.loc[cuts2]
103  merged2 = merged2.reset_index(drop=True)
104  print ("selected events after cut for ieta < 16:",merged2.shape[0])
105 
106  merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10']
107  final_df_low = merged2[keepvars]
108  final_df_low.to_parquet(foutput+'_'+str(i)+"_"+start+"_"+stop+"_lo.parquet")
109  final_df_low.to_csv(foutput+'_'+str(i)+"_"+start+"_"+stop+"_lo.txt")
110 
111 
112 
113  cuts3 = abs(merged['t_ieta'])>15
114  merged3=merged.loc[cuts3]
115  merged3 = merged3.reset_index(drop=True)
116  print ("selected events after cut for ieta > 15:",merged3.shape[0])
117 
118  merged3['t_delta']=merged3['t_eHcal30']-merged3['t_eHcal10']
119  final_df_hi = merged3[keepvars]
120  final_df_hi.to_parquet(foutput+'_'+str(i)+"_"+start+"_"+stop+"_hi.parquet")
121  final_df_hi.to_csv(foutput+'_'+str(i)+"_"+start+"_"+stop+"_hi.txt")
122  i+=1
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
#define str(s)