11 import matplotlib.pyplot
as plt
14 from keras.models
import Sequential
15 from keras.layers
import Dense
16 from keras.layers
import Dropout
17 from keras.utils.np_utils
import to_categorical
18 from keras.utils
import plot_model
19 from keras
import regularizers
20 from sklearn.metrics
import roc_curve, auc
21 from keras.layers
import Activation
22 from keras
import backend
as K
23 from keras.models
import save_model
27 parser = argparse.ArgumentParser()
28 parser.add_argument(
"-I",
"--input",help=
"input file",default=
"isotk_relval_hi.pkl")
29 parser.add_argument(
"-V",
"--modelv",help=
"model version (any number) ",default=
"1")
33 fName = parser.parse_args().input
34 modv = parser.parse_args().modelv
38 df = pd.read_pickle(fName)
39 print (
"vars in file:",df.keys())
40 print(
"events in df original:",df.shape[0])
41 df = df.loc[df[
't_eHcal_y'] > 20]
42 print(
"events in df after energy cut:",df.shape[0])
43 df[
't_eHcal_xun'] = df[
't_eHcal_x']
44 df[
't_delta_un'] = df[
't_delta']
45 df[
't_ieta_un'] = df[
't_ieta']
54 mina.append(df[i].
min())
55 maxa.append(df[i].
max())
64 cols_to_minmax = [
't_delta',
't_hmaxNearP',
't_emaxNearP',
't_hAnnular',
't_eAnnular',
't_pt',
't_nVtx',
't_ieta',
't_eHcal10',
't_eHcal30',
't_rhoh',
't_eHcal_x']
68 df[cols_to_minmax] = df[cols_to_minmax].
apply(
lambda x: (x - x.min()) / (x.max() - x.min()))
71 print (
'data shape:',data.shape)
81 testindx = data.shape[0] - ntest
82 X_train = data[:testindx,0:12]
83 Y_train = data[:testindx,12]
84 X_test = data[testindx:,:]
85 print (
"shape of X_train:",X_train.shape)
86 print (
"shape of Y_train:",Y_train.shape)
87 print (
"shape of X_test:",X_test.shape)
88 meany = np.mean(Y_train)
89 print (
"mean y:",meany)
90 stdy = np.std(Y_train)
91 print (
"std y:", stdy)
98 a0 = [0.973, 0.998, 0.992, 0.965 ]
99 a1 = [0, -0.318, -0.261, -0.406]
100 a2 = [0, 0, 0.047, 0.089]
105 idx = (
int(jeta >= PU_IETA_1) +
int(jeta >= PU_IETA_2) +
int(jeta >= PU_IETA_3))
111 idx = (
int(jeta >= PU_IETA_1) +
int(jeta >= PU_IETA_2) +
int(jeta >= PU_IETA_3))
117 idx = (
int(jeta >= PU_IETA_1) +
int(jeta >= PU_IETA_2) +
int(jeta >= PU_IETA_3))
120 vec0 = np.vectorize(fac0)
121 vec1 = np.vectorize(fac1)
122 vec2 = np.vectorize(fac2)
125 eop = (X_test[:,15]/X_test[:,13])
126 dop = (X_test[:,16]/X_test[:,13])
129 mcorrval = vec0(
abs(X_test[:,17])) + vec1(
abs(X_test[:,17]))*eop*dop*( 1 + vec2(
abs(X_test[:,17]))*dop)
136 weight = np.copy(y_true)
137 weight[
abs(y_true - meany) > 0] = 1.90*
abs(y_true - meany)/stdy
139 weight[
abs(y_true - meany) < stdy] = 1
146 from keras
import optimizers
147 print (
"creating model=========>")
149 model.add(Dense(128, input_shape=(X_train.shape[1],), activation=
'relu'))
150 model.add(Dropout(0.2))
151 model.add(Dense(500, activation=
'relu',kernel_regularizer=regularizers.l2(0.01)))
152 model.add(Dense(500, activation=
'relu',kernel_regularizer=regularizers.l2(0.01)))
153 model.add(Dense(60, activation=
'relu',kernel_regularizer=regularizers.l2(0.01)))
156 RMS = keras.optimizers.RMSprop(lr=0.001, rho=0.9)
158 print (
"compilation up next=======>")
159 model.compile(loss=
'logcosh',optimizer=
'adam')
162 print (
"fitting now=========>")
163 history = model.fit(X_train,Y_train , batch_size=5000, epochs=100, validation_split=0.2, verbose=1,sample_weight=
propweights(Y_train))
167 plt.plot(history.history[
'loss'])
168 plt.plot(history.history[
'val_loss'])
169 plt.title(
'model loss')
172 plt.legend([
'train',
'validation'], loc=
'upper left')
173 plt.savefig(modv+
'_loss_distributions.png')
177 preds = model.predict(X_test[:,0:12])
178 targets = X_test[:,12]
179 uncorrected = X_test[:,15]
180 marinascorr = X_test[:,15]*mcorrval
181 plt.hist(preds, bins =100, range=(0,200),label=
'PU regression',alpha=0.6)
182 plt.hist(targets, bins =100, range=(0,200),label=
'no PU',alpha=0.6)
183 plt.hist(uncorrected, bins =100, range=(0,200),label=
'uncorrected',alpha=0.6)
186 plt.title(
"Energy distribution")
187 plt.legend(loc=
'upper right')
188 plt.savefig(modv+
'_energy_distributions.png')
191 preds = preds.reshape(preds.shape[0])
199 plt.hist(
abs(uncorrected -targets)/targets*100, bins =100, range=(0,100),label=
'uncorrected',alpha=0.6)
201 plt.hist(100*
abs(preds -targets)/targets, bins =100, range=(0,100),label=
'PU correction',alpha=0.6)
203 plt.title(
"error distribution")
204 plt.legend(loc=
'upper right')
205 plt.savefig(modv+
'_errors.png')
209 plt.scatter(targets, uncorrected,alpha=0.3,label=
'uncorr')
210 plt.scatter(targets, preds,alpha=0.3,label=
'PUcorr')
211 plt.xlabel(
'True Values ')
212 plt.ylabel(
'Predictions ')
213 plt.legend(loc=
'upper right')
217 _ = plt.plot(lims, lims)
218 plt.savefig(modv+
'_energyscatt.png')
225 plt.hist(targets/pmom, bins =100, range=(0,5),label=
'E/p noPU',alpha=0.6)
228 plt.hist(marinascorr/pmom, bins =100, range=(0,5),label=
'E/p marina corr',alpha=0.6)
231 plt.legend(loc=
'upper right')
233 plt.title(
"E/p distributions")
234 plt.savefig(modv+
'_eopdist.png')
242 if not os.path.exists(
'models'):
243 os.makedirs(
'models')
244 model.save(
'models/model'+modv+
'.h5')