CMS 3D CMS Logo

isotrackTrainRegressorNoOptimization.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # coding: utf-8
3 
4 # In[1]:
5 
6 
7 
13 
14 
15 import pandas as pd
16 import numpy as np
17 import matplotlib.pyplot as plt
18 import argparse
19 import tensorflow.keras
20 from tensorflow.keras.models import Sequential
21 from tensorflow.keras.layers import Dense
22 from tensorflow.keras.layers import Dropout
23 from tensorflow.keras.utils import to_categorical
24 from tensorflow.keras.utils import plot_model
25 from tensorflow.keras import regularizers
26 from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
27 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV,train_test_split, cross_val_score
28 from sklearn.metrics import roc_curve, auc
29 from tensorflow.keras.layers import Activation
30 from tensorflow.keras import backend as K
31 from tensorflow.keras.models import save_model
32 import tensorflow as tf
33 from tensorflow import keras
34 
35 
36 # In[37]:
37 
38 
39 parser1 = argparse.ArgumentParser()
40 parser1.add_argument("-I", "--input",help="input file",default="isotk_relval_lowinter1.pkl")
41 parser1.add_argument("-V", "--modelv",help="model version (any number) ",default="2")
42 
43 args = parser1.parse_args(args=[])
44 
45 fName = args.input
46 modv = args.modelv
47 
48 
49 # In[38]:
50 
51 
52 print ("file Name:", fName)
53 print ("modv :", modv)
54 
55 
56 # In[39]:
57 
58 
59 df = pd.read_pickle(fName)
60 print ("vars in file:",df.keys())
61 print("events in df original:",df.shape[0])
62 df = df.loc[df['t_eHcal_y'] > 20]
63 print("events in df after energy cut:",df.shape[0])
64 df['t_eHcal_xun'] = df['t_eHcal_x']
65 df['t_delta_un'] = df['t_delta']
66 df['t_ieta_un'] = df['t_ieta']
67 
68 
69 # In[40]:
70 
71 
72 mina = []
73 maxa = []
74 keya = []
75 
76 
77 for i in df.keys():
78  keya.append(i)
79  mina.append(df[i].min())
80  maxa.append(df[i].max())
81 
82 print('var = ',keya)
83 print('mina = ',mina)
84 print('maxa = ',maxa)
85 
86 
87 # In[41]:
88 
89 
90 cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal_x']
91 #df[cols_to_stand] = df[cols_to_stand].apply(lambda x: (x - x.mean()) /(x.std()))
92 #df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.mean()) / (x.max() - x.min()))
93 # #(x.max() - x.min()))
94 df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
95 
96 data = df.values
97 print ('data shape:',data.shape)
98 targets = data[:,12]
99 targets.shape
100 print ('targets shape:', targets.shape)
101 print ("vars in file:",df.keys())
102 
103 
104 # In[42]:
105 
106 
107 data = df.values
108 ntest = 20000
109 testindx = data.shape[0] - ntest
110 # data.shape[0]: 438118 is no of events
111 # data.shape[1] : 18 ==> columns
112 # testindx = 438118-20000
113 print ('data shape:',data.shape[0])
114 print ('testindx: ' ,testindx)
115 # this :testindx = 438118-20000 = 418118
116 X_train = data[:testindx,0:12]
117 Y_train = data[:testindx,12]
118 X_test = data[testindx:,:]
119 print ("shape of X_train:",X_train.shape)
120 print ("shape of Y_train:",Y_train.shape)
121 print ("shape of X_test:",X_test.shape)
122 meany = np.mean(Y_train)
123 print ("mean y:",meany)
124 stdy = np.std(Y_train)
125 print ("std y:", stdy)
126 
127 
128 # In[43]:
129 
130 
131 
132 a0 = [0.973, 0.998, 0.992, 0.965 ]
133 a1 = [0, -0.318, -0.261, -0.406]
134 a2 = [0, 0, 0.047, 0.089]
135 def fac0(jeta):
136  PU_IETA_1 = 9
137  PU_IETA_2 = 16
138  PU_IETA_3 = 25
139  idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
140  return a0[idx]
141 def fac1(jeta):
142  PU_IETA_1 = 9
143  PU_IETA_2 = 16
144  PU_IETA_3 = 25
145  idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
146  return a1[idx]
147 def fac2(jeta):
148  PU_IETA_1 = 9
149  PU_IETA_2 = 16
150  PU_IETA_3 = 25
151  idx = (int(jeta >= PU_IETA_1) + int(jeta >= PU_IETA_2) + int(jeta >= PU_IETA_3))
152  return a2[idx]
153 
154 
155 # In[44]:
156 
157 
158 vec0 = np.vectorize(fac0)
159 vec1 = np.vectorize(fac1)
160 vec2 = np.vectorize(fac2)
161 
162 X_test[:,17]
163 eop = (X_test[:,15]/X_test[:,13])
164 dop = (X_test[:,16]/X_test[:,13])
165 print ("eop: ", eop)
166 #mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*(X_test[:,15]/X_test[:,13])*(X_test[:,16]/X_test[:,13])*( 1 + vec2(fac(abs(X_test[:,17])))*(X_test[:,16]/X_test[:,13]))
167 
168 mcorrval = vec0(abs(X_test[:,17])) + vec1(abs(X_test[:,17]))*eop*dop*( 1 + vec2(abs(X_test[:,17]))*dop)
169 
170 
171 # In[45]:
172 
173 
174 def propweights(y_true):
175  weight = np.copy(y_true)
176  weight[abs(y_true - meany) > 0] = 1.90*abs(y_true - meany)/stdy #1.25
177 # weight[abs(y_true - meany) > stdy] = 1.75*abs((weight[abs(y_true - meany) > stdy]) - meany)/(stdy)
178  weight[abs(y_true - meany) < stdy] = 1
179  print ("wieght : ", weight)
180  return weight
181 
182 
183 # In[46]:
184 
185 
186 from keras import optimizers
187 print ("creating model=========>")
188 model = Sequential()
189 model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
190 #model.add(Dropout(0.2))
191 model.add(Dense(271, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
192 model.add(Dense(301, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
193 model.add(Dense(241, activation='relu',kernel_regularizer=regularizers.l2(0.01)))
194 model.add(Dense(1))
195 
196 
197 # In[47]:
198 
199 
200 print ("compilation up next=======>")
201 model.compile(loss='logcosh',optimizer='adam')
202 model.summary()
203 #print ("Y_train : ", Y_train)
204 #fitting
205 print ("fitting now=========>")
206 history = model.fit(X_train,Y_train , batch_size=5000, epochs=50, validation_split=0.2, verbose=1,sample_weight=propweights(Y_train))
207 
208 
209 # In[48]:
210 
211 
212 plt.plot(history.history['loss'])
213 plt.plot(history.history['val_loss'])
214 plt.title('model loss')
215 plt.ylabel('loss')
216 plt.xlabel('epoch')
217 plt.legend(['train', 'validation'], loc='upper left')
218 plt.savefig(modv+'_loss_distributions_lowWinter.png')
219 #plt.show()
220 plt.close()
221 
222 
223 # In[49]:
224 
225 
226 preds = model.predict(X_test[:,0:12])
227 targets = X_test[:,12]
228 uncorrected = X_test[:,15]
229 marinascorr = X_test[:,15]*mcorrval
230 plt.hist(preds, bins =100, range=(0,200),label='PU regression',alpha=0.6)
231 plt.hist(targets, bins =100, range=(0,200),label='no PU',alpha=0.6)
232 plt.hist(uncorrected, bins =100, range=(0,200),label='uncorrected',alpha=0.6)
233 #plt.hist(marinascorr, bins =100, range=(0,200),label='marinas correction',alpha=0.6)
234 plt.yscale('log')
235 plt.title("Energy distribution")
236 plt.legend(loc='upper right')
237 plt.savefig(modv+'_energy_distributions_lowWinter.png')
238 #plt.show()
239 plt.close()
240 
241 preds = preds.reshape(preds.shape[0])
242 print (preds.shape)
243 
244 
245 # In[50]:
246 
247 
248 plt.hist(abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label='uncorrected',alpha=0.6)
249 #plt.hist(abs(marinascorr -targets)/targets*100, bins =100, range=(0,100),label='marinas correction',alpha=0.6)
250 plt.hist(100*abs(preds -targets)/targets, bins =100, range=(0,100),label='PU correction',alpha=0.6)
251 #plt.yscale('log')
252 plt.title("error distribution")
253 plt.legend(loc='upper right')
254 plt.savefig(modv+'_errors_low.png')
255 #plt.show()
256 plt.close()
257 
258 
259 # In[51]:
260 
261 
262 plt.scatter(targets, uncorrected,alpha=0.3,label='uncorr')
263 plt.scatter(targets, preds,alpha=0.3,label='PUcorr')
264 plt.xlabel('True Values ')
265 plt.ylabel('Predictions ')
266 plt.legend(loc='upper right')
267 lims = [0, 200]
268 plt.xlim(lims)
269 plt.ylim(lims)
270 _ = plt.plot(lims, lims)
271 plt.savefig(modv+'_energyscatt_lowWinter.png')
272 #plt.show()
273 plt.close()
274 
275 
276 # In[52]:
277 
278 
279 pmom= X_test[:,13]
280 #get_ipython().run_line_magic('matplotlib', 'inline')
281 plt.hist(targets/pmom, bins =100, range=(0,5),label='E/p noPU',alpha=0.6)
282 #plt.hist(preds/pmom, bins =100, range=(0,5),label='E/p PUcorr',alpha=0.6)
283 #plt.hist(uncorrected/pmom, bins =100, range=(0,5),label='E/p uncorr',alpha=0.6)
284 plt.hist(marinascorr/pmom, bins =100, range=(0,5),label='E/p marina corr',alpha=0.6)
285 #plt.hist(np.exp(preds.reshape((preds.shape[0])))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
286 #plt.hist(preds.reshape((preds.shape[0]))/pmom[0:n_test_events], bins =100, range=(0,5),label='corrEnp/p',alpha=0.3)
287 plt.legend(loc='upper right')
288 plt.yscale('log')
289 plt.title("E/p distributions")
290 plt.savefig(modv+'_eopdist_lowWinter.png')
291 #plt.show()
292 plt.close()
293 
294 
295 # In[53]:
296 
297 
298 import os
299 
300 if not os.path.exists('models'):
301  os.makedirs('models')
302 model.save('models/model_BarrelWinter'+modv+'.h5')
303 #new_model_2 = load_model('my_model.h5')
304 
305 
306 # In[ ]:
307 
308 
309 
310 
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
Abs< T >::type abs(const T &t)
Definition: Abs.h:22
Vec apply(Vec v, F f)
Definition: ExtVec.h:77