CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
isotrackNtupler.py
Go to the documentation of this file.
1 ######################################################################################
2 # Makes pkl and text files comparing PU and noPU samples for training regressor and other stuff
3 # Usage:
4 # python3 isotrackNtupler.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -NPU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root -O isotk_relval
5 ######################################################################################
6 
7 
8 
9 import uproot
10 import numpy as np
11 import pandas as pd
12 import argparse
13 import matplotlib.pyplot as plt
14 
15 parser = argparse.ArgumentParser()
16 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root")
17 parser.add_argument("-NPU", "--fileNPU",help="input no PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root")
18 parser.add_argument("-O", "--opfilename",help="ouput file name",default="isotk_relval")
19 
20 
21 fName1 = parser.parse_args().filePU
22 fName2 = parser.parse_args().fileNPU
23 foutput = parser.parse_args().opfilename
24 
25 
26 # PU
27 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
28 
29 #no PU
30 tree2 = uproot.open(fName2,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
31 #tree2.keys()
32 print ("loaded files")
33 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_selectTk','t_qltyFlag']
34 branchesnpu = ['t_Event','t_ieta','t_iphi','t_eHcal']
35 #dictn = tree.arrays(branches=branches,entrystart=0, entrystop=300)
36 dictpu = tree1.arrays(branches=branchespu)
37 dictnpu = tree2.arrays(branches=branchesnpu)
38 dfspu = pd.DataFrame.from_dict(dictpu)
39 dfspu.columns=branchespu
40 dfsnpu = pd.DataFrame.from_dict(dictnpu)
41 dfsnpu.columns=branchesnpu
42 print ("loaded dicts and dfs")
43 print ("PU sample size:",dfspu.shape[0])
44 print ("noPU sample size:",dfsnpu.shape[0])
45 merged = pd.merge(dfspu, dfsnpu , on=['t_Event','t_ieta','t_iphi'])
46 print ("selected common events before cut:",merged.shape[0])
47 
48 #cuts = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<10)&(merged['t_eMipDR_y']<1)
49 keepvars = ['t_nVtx','t_ieta','t_eHcal10','t_eHcal30','t_delta','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh','t_pt','t_eHcal_x','t_eHcal_y','t_p','t_eMipDR']
50 
51 
52 
53 #########################all ietas
54 cuts1 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
55 merged1=merged.loc[cuts1]
56 merged1 = merged1.reset_index(drop=True)
57 print ("selected events after cut for all ietas:",merged1.shape[0])
58 merged1['t_delta']=merged1['t_eHcal30']-merged1['t_eHcal10']
59 final_df_all = merged1[keepvars]
60 final_df_all.to_pickle(foutput+"_all.pkl")
61 final_df_all.to_csv(foutput+"_all.txt")
62 #########################split ieta < 16
63 
64 cuts2 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])<16)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
65 merged2=merged.loc[cuts2]
66 merged2 = merged2.reset_index(drop=True)
67 print ("selected events after cut for ieta < 16:",merged2.shape[0])
68 merged2['t_delta']=merged2['t_eHcal30']-merged2['t_eHcal10']
69 final_df_low = merged2[keepvars]
70 final_df_low.to_pickle(foutput+"_lo.pkl")
71 final_df_low.to_csv(foutput+"_lo.txt")
72 
73 #########################split ieta > 15
74 
75 cuts3 = (merged['t_selectTk'])&(merged['t_qltyFlag'])&(merged['t_hmaxNearP']<20)&(merged['t_eMipDR']<1)&(abs(merged['t_ieta'])>15)&(abs(merged['t_p'] - 50)<10)&(merged['t_eHcal_x']>10)
76 merged3=merged.loc[cuts3]
77 merged3 = merged3.reset_index(drop=True)
78 print ("selected events after cut for ieta > 15:",merged3.shape[0])
79 merged3['t_delta']=merged3['t_eHcal30']-merged3['t_eHcal10']
80 final_df_hi = merged3[keepvars]
81 final_df_hi.to_pickle(foutput+"_hi.pkl")
82 final_df_hi.to_csv(foutput+"_hi.txt")
83 
Abs< T >::type abs(const T &t)
Definition: Abs.h:22