CMS 3D CMS Logo

isotrackApplyRegressor.py
Go to the documentation of this file.
1 
9 
10 # In[1]:
11 
12 import pandas as pd
13 import numpy as np
14 import matplotlib.pyplot as plt
15 import argparse
16 import tensorflow.keras
17 from tensorflow.keras.models import Sequential
18 from tensorflow.keras.layers import Dense
19 from tensorflow.keras.layers import Dropout
20 from tensorflow.keras.utils import to_categorical
21 from tensorflow.keras.utils import plot_model
22 from tensorflow.keras import regularizers
23 from sklearn.metrics import roc_curve, auc
24 from tensorflow.keras.layers import Activation
25 from tensorflow.keras import backend as K
26 from tensorflow.keras.models import load_model
27 import uproot
28 
29 
30 # In[2]:
31 
32 parser = argparse.ArgumentParser()
33 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://eoscms.cern.ch//eos/cms/store/group/dpg_hcal/comm_hcal/ISOTRACK/SinglePion_E-50_Eta-0to3_Run3Winter21_112X_PU.root")
34 parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel')
35 parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5")
36 parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt")
37 
38 
39 fName1 = parser.parse_args().filePU
40 modeln = parser.parse_args().modelname
41 foutput = parser.parse_args().opfilename
42 barrelflag = parser.parse_args().ifbarrel
43 
44 
45 
46 #fName1='/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root'
47 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
48 print ("loaded files")
49 
50 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh']
51 dictpu = tree1.arrays(branches=branchespu)
52 #dictpu = tree1.arrays(branches=branchespu,entrystart=0, entrystop=100000)
53 dfspu = pd.DataFrame.from_dict(dictpu)
54 dfspu.columns=branchespu
55 print ("sample size:",dfspu.shape[0])
56 
57 
58 # In[3]:
59 
60 
61 dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10']
62 keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR']
63 
64 df = dfspu[keepvars].copy()
65 df['t_eHcal_xun'] = df['t_eHcal']
66 df['t_delta_un'] = df['t_delta']
67 df['t_ieta_un'] = df['t_ieta']
68 
69 #cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
70 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt']
71 
72 
75 
76 
77 if barrelflag=='barrel':
78  var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
79  mina = [20, -15, 17.33086721925065, 17.655660001095384, 0.0, -1.0, -1.0, 0.0, -5.33359869197011, 4.093925265397289, 20.783629520782718, 16.998163268435746, 20.000221125315875, 40.074083721419896, 0.0, 16.998163268435746, 0.0, -15]
80  maxa = [138, 15, 138.26640254695667, 155.83508832909865, 50.9643486259738, 17.140547914961605, 2870.424876287056, 35.727171580074355, 17.763740802183747, 26.38359781195008, 59.169594172331905, 133.07561272289604, 122.8542027361691, 59.977312414583295, 0.9999987781047821, 133.07561272289604, 50.9643486259738, 15]
81 else:
82  var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
83  mina = [20, -27, 20.934556022286415, 27.213239994598553, 0.7803188574616797, -1.0, -1.0, 0.0, -57.422806948423386, 4.435386319143829, 6.013227444563335, 18.5278014652431, 20.00695458613336, 40.00339106433163, 0.0, 18.5278014652431, 0.7803188574616797, -27]
84  maxa = [117, 28, 584.1150933708996, 822.056631873711, 424.1259534251876, 19.999632518345965, 13218.009997109137, 185.96148682758212, 128.60208600759506, 25.911101538538524, 28.928444814127907, 428.99590471945703, 111.25579111767001, 59.98427126709689, 0.9999948740005493, 428.99590471945703, 424.1259534251876, 28]
85 
86 print(mina)
87 print(maxa)
88 
89 normkey = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal']
90 k = 0
91 for i in df.keys():
92  if i not in normkey:
93  break
94  #print(normkey[k],mina[k],maxa[k])
95  df[i]=abs(df[i]-mina[k])/(maxa[k] - mina[k])
96  k+=1
97 
98 
99 df['t_eHcal_xun'] = df['t_eHcal_xun'].apply(lambda x: 1.0 if (x==0) else x)
100 uncorrected_values = df['t_eHcal_xun'].values
101 print (uncorrected_values.shape)
102 print ('data vars:',df.keys())
103 data = df.values
104 X_train = data[:,0:12]
105 
106 
107 # In[4]:
108 
109 
110 model = load_model(modeln)
111 preds = model.predict(X_train,verbose=1)
112 preds = preds.reshape(preds.shape[0])
113 print (preds.shape)
114 
115 
116 # In[5]:
117 if barrelflag=='barrel':
118  tag = 'barrel'
119 else:
120  tag = 'endcap'
121 
122 print(tag)
123 
124 plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6)
125 plt.savefig('predicted_Edist_'+tag+'.png')
126 plt.close()
127 
128 parray = dfspu['t_p'].values
129 mipdr = dfspu['t_eMipDR'].values
130 plt.hist(preds/(parray - mipdr), bins =100, range=(0,10),label='predicted e/p-ecal',alpha=0.6)
131 plt.savefig('predicted_eopdist_'+tag+'.png')
132 plt.close()
133 
134 
135 
136 # In[6]:
137 eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int)
138 eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
139 #runnumarray = dfspu['t_Run'].values
140 #runnumarray = runnumarray.reshape(runnumarray.shape[0],1)
141 #eventnumarray = dfspu['t_Event'].values
142 #eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
143 ietaarray = dfspu['t_ieta'].values
144 ietaarray = ietaarray.reshape(ietaarray.shape[0],1)
145 #iphiarray = dfspu['t_iphi'].values
146 #iphiarray = iphiarray.reshape(iphiarray.shape[0],1)
147 corrfac = preds/uncorrected_values
148 corrfac = corrfac.reshape(corrfac.shape[0],1)
149 fileo = np.hstack((eventnumarray,ietaarray,corrfac))
150 #fileo = np.hstack((runnumarray,eventnumarray,ietaarray,iphiarray,corrfac))
151 np.savetxt(foutput, fileo,fmt=['%d','%d','%.10f'])
152 
153 
154 # In[ ]:
155 
156 
157 
158 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
Vec apply(Vec v, F f)
Definition: ExtVec.h:81