1 #ifndef PhysicsTools_PatAlgos_BaseMVAValueMapProducer
2 #define PhysicsTools_PatAlgos_BaseMVAValueMapProducer
35 #include "TMVA/Factory.h"
36 #include "TMVA/Reader.h"
59 if (backend ==
"TF") {
62 }
else if (backend ==
"ONNX") {
63 ort_ = std::make_unique<cms::Ort::ONNXRuntime>(model_path);
72 std::shared_ptr<tensorflow::GraphDef>
graph_;
74 std::unique_ptr<cms::Ort::ONNXRuntime>
ort_;
90 batch_eval_(iConfig.getParameter<bool>(
"batch_eval")) {
92 throw cms::Exception(
"ConfigError") <<
"Only 'TF', 'ONNX' and 'TMVA' backends are supported\n";
119 for (
const auto&
s : iConfig.
getParameter<std::vector<std::string>>(
"outputFormulas")) {
125 produces<edm::ValueMap<float>>();
128 produces<edm::ValueMap<float>>(
n);
156 std::vector<std::pair<std::string, StringObjectFunction<T, true>>>
funcs_;
175 template <
typename T>
179 readAdditionalCollections(iEvent, iSetup);
180 std::vector<std::vector<float>> mvaOut((tmva_) ? 1 : output_names_.size());
181 for (
auto&
v : mvaOut)
182 v.reserve(src->size());
186 std::vector<float>
data;
187 data.reserve(src->size() * positions_.size());
188 for (
auto const&
o : *src) {
189 for (
auto const&
p : funcs_) {
192 fillAdditionalVariables(
o);
193 data.insert(data.end(), values_.begin(), values_.end());
196 std::vector<float> outputs;
198 tensorflow::TensorShape input_size{(
long long int)src->size(), (
long long int)positions_.size()};
200 input_tensors.resize(1);
203 for (
unsigned i = 0;
i < data.size(); ++
i) {
204 input_tensors[0].second.flat<
float>()(
i) = data[
i];
206 std::vector<tensorflow::Tensor> output_tensors;
207 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &output_tensors);
208 for (
unsigned i = 0;
i < output_tensors.at(0).NumElements(); ++
i) {
209 outputs.push_back(output_tensors.at(0).flat<
float>()(
i));
214 globalCache()->getONNXSession().run({inputTensorName_},
inputs, {}, {outputTensorName_}, src->size())[0];
217 const unsigned outdim = outputs.size() / src->size();
218 for (
unsigned i = 0;
i < src->size(); ++
i) {
219 std::vector<float> tmpOut(outputs.begin() +
i * outdim, outputs.begin() + (
i + 1) * outdim);
220 for (
size_t k = 0;
k < output_names_.size();
k++) {
221 mvaOut[
k].push_back(output_formulas_[
k](tmpOut));
226 for (
auto const&
o : *src) {
227 for (
auto const&
p : funcs_) {
230 fillAdditionalVariables(
o);
232 mvaOut[0].push_back(isClassifier_ ? reader_->EvaluateMVA(name_) : reader_->EvaluateRegression(name_)[0]);
234 std::vector<float> tmpOut;
237 tensorflow::TensorShape input_size{1, (
long long int)positions_.size()};
239 input_tensors.resize(1);
242 for (
size_t j = 0;
j < values_.size();
j++) {
243 input_tensors[0].second.matrix<
float>()(0,
j) = values_[
j];
245 std::vector<tensorflow::Tensor> outputs;
246 tensorflow::run(globalCache()->getTFSession(), input_tensors, {outputTensorName_}, &outputs);
247 for (
int k = 0;
k < outputs.at(0).matrix<
float>().
dimension(1);
k++)
248 tmpOut.push_back(outputs.at(0).matrix<
float>()(0,
k));
251 tmpOut = globalCache()->getONNXSession().run({inputTensorName_},
inputs, {}, {outputTensorName_})[0];
253 for (
size_t k = 0;
k < output_names_.size();
k++)
254 mvaOut[
k].push_back(output_formulas_[
k](tmpOut));
260 for (
auto&
m : mvaOut) {
263 filler.
insert(src,
m.begin(),
m.end());
265 iEvent.
put(
std::move(mvaV), (tmva_) ?
"" : output_names_[k]);
270 template <
typename T>
276 template <
typename T>
279 template <
typename T>
282 desc.
add<
edm::InputTag>(
"src")->setComment(
"input physics object collection");
283 desc.
add<std::vector<std::string>>(
"variablesOrder")->setComment(
"ordered list of MVA input variable names");
284 desc.
add<
std::string>(
"name")->setComment(
"output score variable name");
285 desc.
add<
bool>(
"isClassifier")->setComment(
"is a classifier discriminator");
290 desc.
add<
std::string>(
"backend",
"TMVA")->setComment(
"TMVA, TF or ONNX");
291 desc.
add<
std::string>(
"inputTensorName",
"")->setComment(
"Name of tensorflow input tensor in the model");
292 desc.
add<
std::string>(
"outputTensorName",
"")->setComment(
"Name of tensorflow output tensor in the model");
293 desc.
add<std::vector<std::string>>(
"outputNames", std::vector<std::string>())
294 ->
setComment(
"Names of the output values to be used in the output valuemap");
295 desc.
add<std::vector<std::string>>(
"outputFormulas", std::vector<std::string>())
296 ->
setComment(
"Formulas to be used to post process the output");
297 desc.
add<
bool>(
"batch_eval",
false)->setComment(
"Run inference in batch instead of per-object");
302 template <
typename T>
312 modname +=
"BaseMVAValueMapProducer";
313 descriptions.
add(modname, desc);
std::vector< float > values_
Session * createSession(SessionOptions &sessionOptions)
static edm::ParameterSetDescription getDescription()
void endStream() override
void setComment(std::string const &value)
virtual void fillAdditionalVariables(const T &)
std::vector< NamedTensor > NamedTensorList
OrphanHandle< PROD > put(std::unique_ptr< PROD > product)
Put a new product.
std::vector< StringObjectFunction< std::vector< float > > > output_formulas_
static std::unique_ptr< BaseMVACache > initializeGlobalCache(const edm::ParameterSet &cfg)
std::string inputTensorName_
std::shared_ptr< tensorflow::GraphDef > graph_
bool getByToken(EDGetToken token, Handle< PROD > &result) const
void setAllowAnything()
allow any parameter label/value pairs
const cms::Ort::ONNXRuntime & getONNXSession() const
GraphDef * loadGraphDef(const std::string &pbFile)
static void globalEndJob(const BaseMVACache *cache)
void insert(const H &h, I begin, I end)
void produce(edm::Event &, const edm::EventSetup &) override
std::string outputTensorName_
std::string weightfilename_
std::vector< std::vector< float > > FloatArrays
bool setValue(Container &, const reco::JetBaseRef &, const JetExtendedData &)
associate jet with value. Returns false and associate nothing if jet is already associated ...
void setValue(const std::string var, float val)
std::vector< std::string > getParameterNamesForType(bool trackiness=true) const
std::pair< std::string, Tensor > NamedTensor
std::vector< std::pair< std::string, StringObjectFunction< T, true > > > funcs_
std::vector< std::string > variablesOrder_
BaseMVACache(const std::string &model_path, const std::string &backend)
tensorflow::Session * getTFSession() const
list var
if using global norm cols_to_minmax = ['t_delta', 't_hmaxNearP','t_emaxNearP', 't_hAnnular', 't_eAnnular','t_pt','t_nVtx','t_ieta','t_eHcal10', 't_eHcal30','t_rhoh','t_eHcal'] df[cols_to_minmax] = df[cols_to_minmax].apply(lambda x: (x - x.min()) / (x.max() - x.min()) if (x.max() - x.min() > 0) else 1.0/200.0)
void run(Session *session, const NamedTensorList &inputs, const std::vector< std::string > &outputNames, std::vector< Tensor > *outputs, const thread::ThreadPoolOptions &threadPoolOptions)
bool closeSession(Session *&session)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
std::unique_ptr< cms::Ort::ONNXRuntime > ort_
edm::EDGetTokenT< edm::View< T > > src_
T getParameter(std::string const &) const
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
Analysis-level electron class.
std::map< std::string, size_t > positions_
Analysis-level calorimeter jet class.
void add(std::string const &label, ParameterSetDescription const &psetDescription)
TMVA::IMethod * loadTMVAWeights(TMVA::Reader *reader, const std::string &method, const std::string &weightFile, bool verbose=false)
tensorflow::Session * tf_session_
char data[epos_bytes_allocation]
~BaseMVAValueMapProducer() override
void beginStream(edm::StreamID) override
BaseMVAValueMapProducer(const edm::ParameterSet &iConfig, const BaseMVACache *cache)
uint32_t dimension(pat::CandKinResolution::Parametrization parametrization)
Returns the number of free parameters in a parametrization (3 or 4)
Analysis-level muon class.
std::vector< std::string > output_names_
virtual void readAdditionalCollections(edm::Event &, const edm::EventSetup &)
to be implemented in derived classes, filling values for additional variables