CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
isotrackApplyRegressor.py
Go to the documentation of this file.
1 ######################################################################################
2 # Evaluates regressor from loaded model
3 # Usage:
4 # python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model1.h5 -B endcap -O corrfac1.txt
5 # python3 isotrackApplyRegressor.py -PU root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root -M ./models/model2.h5 -B barrel -O corrfac2.txt
6 ######################################################################################
7 # coding: utf-8
8 
9 # In[1]:
10 
11 
12 import pandas as pd
13 import numpy as np
14 import matplotlib.pyplot as plt
15 import argparse
16 import tensorflow.keras
17 from tensorflow.keras.models import Sequential
18 from tensorflow.keras.layers import Dense
19 from tensorflow.keras.layers import Dropout
20 from tensorflow.keras.utils import to_categorical
21 from tensorflow.keras.utils import plot_model
22 from tensorflow.keras import regularizers
23 from sklearn.metrics import roc_curve, auc
24 from tensorflow.keras.layers import Activation
25 from tensorflow.keras import backend as K
26 from tensorflow.keras.models import load_model
27 import uproot
28 
29 
30 # In[2]:
31 
32 parser = argparse.ArgumentParser()
33 parser.add_argument("-PU", "--filePU",help="input PU file",default="root://cmseos.fnal.gov//store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root")
34 #parser.add_argument("-PU", "--filePU",help="input PU file",default="/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_noPU.root")
35 parser.add_argument("-B", "--ifbarrel",help="barrel / endcap",default='barrel')
36 parser.add_argument("-M", "--modelname",help="model file name",default="./models/model.h5")
37 parser.add_argument("-O", "--opfilename",help="output text file name",default="corrfac_regression.txt")
38 
39 
40 fName1 = parser.parse_args().filePU
41 modeln = parser.parse_args().modelname
42 foutput = parser.parse_args().opfilename
43 barrelflag = parser.parse_args().ifbarrel
44 
45 
46 
47 #fName1='/eos/uscms/store/user/sghosh/ISOTRACK/DIPI_2021_PUpart.root'
48 tree1 = uproot.open(fName1,xrootdsource=dict(chunkbytes=1024**3, limitbytes=1024**3))['HcalIsoTrkAnalyzer/CalibTree']
49 print ("loaded files")
50 
51 branchespu = ['t_Run','t_Event','t_nVtx','t_ieta','t_iphi','t_p','t_pt','t_gentrackP','t_eMipDR','t_eHcal','t_eHcal10','t_eHcal30','t_hmaxNearP','t_emaxNearP','t_hAnnular','t_eAnnular','t_rhoh']
52 dictpu = tree1.arrays(branches=branchespu)
53 #dictpu = tree1.arrays(branches=branchespu,entrystart=0, entrystop=100000)
54 dfspu = pd.DataFrame.from_dict(dictpu)
55 dfspu.columns=branchespu
56 print ("sample size:",dfspu.shape[0])
57 
58 
59 # In[3]:
60 
61 
62 dfspu['t_delta']=dfspu['t_eHcal30']-dfspu['t_eHcal10']
63 keepvars =['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt','t_eHcal', 't_p', 't_eMipDR']
64 
65 df = dfspu[keepvars].copy()
66 df['t_eHcal_xun'] = df['t_eHcal']
67 df['t_delta_un'] = df['t_delta']
68 df['t_ieta_un'] = df['t_ieta']
69 
70 #cols_to_stand = ['t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
71 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt']
72 
73 #### if using global norm
74 #cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal']
75 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min()) if (x.max() - x.min() > 0) else 1.0/200.0)
76 
77 #### if using training norm
78 if barrelflag=='barrel':
79  var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
80  mina = [20, -15, 17.33086721925065, 17.655660001095384, 0.0, -1.0, -1.0, 0.0, -5.33359869197011, 4.093925265397289, 20.783629520782718, 16.998163268435746, 20.000221125315875, 40.074083721419896, 0.0, 16.998163268435746, 0.0, -15]
81  maxa = [138, 15, 138.26640254695667, 155.83508832909865, 50.9643486259738, 17.140547914961605, 2870.424876287056, 35.727171580074355, 17.763740802183747, 26.38359781195008, 59.169594172331905, 133.07561272289604, 122.8542027361691, 59.977312414583295, 0.9999987781047821, 133.07561272289604, 50.9643486259738, 15]
82 else:
83  var = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal_x', 't_eHcal_y', 't_p', 't_eMipDR', 't_eHcal_xun', 't_delta_un', 't_ieta_un']
84  mina = [20, -27, 20.934556022286415, 27.213239994598553, 0.7803188574616797, -1.0, -1.0, 0.0, -57.422806948423386, 4.435386319143829, 6.013227444563335, 18.5278014652431, 20.00695458613336, 40.00339106433163, 0.0, 18.5278014652431, 0.7803188574616797, -27]
85  maxa = [117, 28, 584.1150933708996, 822.056631873711, 424.1259534251876, 19.999632518345965, 13218.009997109137, 185.96148682758212, 128.60208600759506, 25.911101538538524, 28.928444814127907, 428.99590471945703, 111.25579111767001, 59.98427126709689, 0.9999948740005493, 428.99590471945703, 424.1259534251876, 28]
86 
87 print(mina)
88 print(maxa)
89 
90 normkey = ['t_nVtx', 't_ieta', 't_eHcal10', 't_eHcal30', 't_delta', 't_hmaxNearP', 't_emaxNearP', 't_hAnnular', 't_eAnnular', 't_rhoh', 't_pt', 't_eHcal']
91 k = 0
92 for i in df.keys():
93  if i not in normkey:
94  break
95  #print(normkey[k],mina[k],maxa[k])
96  df[i]=abs(df[i]-mina[k])/(maxa[k] - mina[k])
97  k+=1
98 
99 
100 df['t_eHcal_xun'] = df['t_eHcal_xun'].apply(lambda x: 1.0 if (x==0) else x)
101 uncorrected_values = df['t_eHcal_xun'].values
102 print (uncorrected_values.shape)
103 print ('data vars:',df.keys())
104 data = df.values
105 X_train = data[:,0:12]
106 
107 
108 # In[4]:
109 
110 
111 model = load_model(modeln)
112 preds = model.predict(X_train,verbose=1)
113 preds = preds.reshape(preds.shape[0])
114 print (preds.shape)
115 
116 
117 # In[5]:
118 if barrelflag=='barrel':
119  tag = 'barrel'
120 else:
121  tag = 'endcap'
122 
123 print(tag)
124 
125 plt.hist(preds, bins =100, range=(0,100),label='predicted enerhy',alpha=0.6)
126 plt.savefig('predicted_Edist_'+tag+'.png')
127 plt.close()
128 
129 parray = dfspu['t_p'].values
130 mipdr = dfspu['t_eMipDR'].values
131 plt.hist(preds/(parray - mipdr), bins =100, range=(0,10),label='predicted e/p-ecal',alpha=0.6)
132 plt.savefig('predicted_eopdist_'+tag+'.png')
133 plt.close()
134 
135 
136 
137 # In[6]:
138 eventnumarray = np.arange(0,X_train.shape[0],1,dtype=int)
139 eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
140 #runnumarray = dfspu['t_Run'].values
141 #runnumarray = runnumarray.reshape(runnumarray.shape[0],1)
142 #eventnumarray = dfspu['t_Event'].values
143 #eventnumarray = eventnumarray.reshape(eventnumarray.shape[0],1)
144 ietaarray = dfspu['t_ieta'].values
145 ietaarray = ietaarray.reshape(ietaarray.shape[0],1)
146 #iphiarray = dfspu['t_iphi'].values
147 #iphiarray = iphiarray.reshape(iphiarray.shape[0],1)
148 corrfac = preds/uncorrected_values
149 corrfac = corrfac.reshape(corrfac.shape[0],1)
150 fileo = np.hstack((eventnumarray,ietaarray,corrfac))
151 #fileo = np.hstack((runnumarray,eventnumarray,ietaarray,iphiarray,corrfac))
152 np.savetxt(foutput, fileo,fmt=['%d','%d','%.10f'])
153 
154 
155 # In[ ]:
156 
157 
158 
159 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
Vec apply(Vec v, F f)
Definition: ExtVec.h:81