d6/d55/MLClient_8cc_source.html

 #include "DQM/EcalMonitorClient/interface/MLClient.h"


 #include "DataFormats/EcalDetId/interface/EcalTrigTowerDetId.h"


 #include "CondFormats/EcalObjects/interface/EcalDQMStatusHelper.h"


 #include "DQM/EcalCommon/interface/EcalDQMCommonUtils.h"


 #include "FWCore/ParameterSet/interface/ParameterSet.h"


 #include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"


 #include "DQM/EcalCommon/interface/MESetNonObject.h"


 using namespace cms::Ort;


 namespace ecaldqm {

   MLClient::MLClient() : DQWorkerClient() { qualitySummaries_.insert("MLQualitySummary"); }


   void MLClient::setParams(edm::ParameterSet const& _params) {

     MLThreshold_ = _params.getUntrackedParameter<double>("MLThreshold");

     PUcorr_slope_ = _params.getUntrackedParameter<double>("PUcorr_slope");

     PUcorr_intercept_ = _params.getUntrackedParameter<double>("PUcorr_intercept");

     avgOcc_ = _params.getUntrackedParameter<std::vector<double>>("avgOcc");

     if (!onlineMode_) {

       MEs_.erase(std::string("MLQualitySummary"));

       MEs_.erase(std::string("EventsperMLImage"));

       sources_.erase(std::string("PU"));

       sources_.erase(std::string("NumEvents"));

       sources_.erase(std::string("DigiAllByLumi"));

       sources_.erase(std::string("AELoss"));

     }

   }


   void MLClient::producePlots(ProcessType) {

     if (!onlineMode_)

       return;

     using namespace std;

     MESet& meMLQualitySummary(MEs_.at("MLQualitySummary"));

     MESet& meEventsperMLImage(MEs_.at("EventsperMLImage"));


     MESetNonObject const& sPU(static_cast<MESetNonObject&>(sources_.at("PU")));

     MESetNonObject const& sNumEvents(static_cast<MESetNonObject&>(sources_.at("NumEvents")));


     //Get the no.of events and the PU per LS calculated in OccupancyTask

     int nEv = sNumEvents.getFloatValue();

     double pu = sPU.getFloatValue();

     //Do not compute ML quality if PU is non existent.

     if (pu < 0.) {

       return;

     }

     uint32_t mask(1 << EcalDQMStatusHelper::PEDESTAL_ONLINE_HIGH_GAIN_RMS_ERROR |

                   1 << EcalDQMStatusHelper::PHYSICS_BAD_CHANNEL_WARNING |

                   1 << EcalDQMStatusHelper::PHYSICS_BAD_CHANNEL_ERROR);


     //Inorder to feed the data into the ML model we apply some preprocessing.

     //We use the Digi Occupancy per Lumisection as the input source.

     //The model was trained on each occupancy plot having 500 events.

     //In apprehension of the low luminosity in the beginning of Run3, where in online DQM

     //the no.of events per LS could be lower than 500, we sum the occupancies over a fixed no.of lumisections as a running sum,

     //and require that the total no.of events on this summed occupancy to be atleast 200.

     //(This no.of LS and the no.of events are parameters which would require tuning later)

     //This summed occupancy is now the input image, which is then corrected for PileUp(PU) dependence and

     //change in no.of events, which are derived from training.

     //The input image is also padded by replicating the top and bottom rows so as to prevent the "edge effect"

     //wherein the ML model's learning degrades near the edge of the data set it sees.

     //This padding is then removed during inference on the model output.


     //Get the histogram of the input digi occupancy per lumisection.

     TH2F* hEbDigiMap((sources_.at("DigiAllByLumi")).getME(1)->getTH2F());


     size_t nTowers = nEtaTowers * nPhiTowers;  //Each occupancy map is of size 34x72 towers

     std::vector<float> ebOccMap1dCumulPad;     //Vector to feed into the ML network

     std::valarray<float> ebOccMap1d(nTowers);  //Array to store occupancy map of size 34x72

     //Store the values from the input histogram into the array

     //to do preprocessing

     for (int i = 0; i < hEbDigiMap->GetNbinsY(); i++) {  //NbinsY = 34, NbinsX = 72

       for (int j = 0; j < hEbDigiMap->GetNbinsX(); j++) {

         int bin = hEbDigiMap->GetBin(j + 1, i + 1);

         int k = (i * nPhiTowers) + j;

         ebOccMap1d[k] = hEbDigiMap->GetBinContent(bin);

       }

     }

     ebOccMap1dQ.push_back(ebOccMap1d);  //Queue which stores input occupancy maps for nLS lumis

     NEventQ.push_back(nEv);             //Queue which stores the no.of events per LS for nLS lumis


     if (NEventQ.size() < nLS) {

       return;  //Should have nLS lumis to add the occupancy over.

     }

     if (NEventQ.size() > nLS) {

       NEventQ.pop_front();  //Keep only nLS consecutive LS. Pop the first one if size greater than nLS

     }

     if (ebOccMap1dQ.size() > nLS) {

       ebOccMap1dQ.pop_front();  //Same conditon for the input occupancy maps.

     }


     int TNum = 0;

     for (size_t i = 0; i < nLS; i++) {

       TNum += NEventQ[i];  //Total no.of events over nLS lumis

     }

     if (TNum < 200) {

       return;  //The total no.of events should be atleast 200 over nLS for meaningful statistics

     }

     //Fill the ME to monitor the trend of the total no.of events in each input image to the ML model

     meEventsperMLImage.fill(getEcalDQMSetupObjects(), EcalBarrel, double(timestamp_.iLumi), double(TNum));


     //Array to hold the sum of inputs, which make atleast 200 events.

     std::valarray<float> ebOccMap1dCumul(0., nTowers);


     for (size_t i = 0; i < ebOccMap1dQ.size(); i++) {

       ebOccMap1dCumul += ebOccMap1dQ[i];  //Sum the input arrays of N LS.

     }

     //Applying PU correction derived from training

     ebOccMap1dCumul = ebOccMap1dCumul / (PUcorr_slope_ * pu + PUcorr_intercept_);


     //Scaling up to match input dimensions. 36*72 used instead of 34*72 to accommodate the additional padding

     //of 2 rows to prevent the "edge effect" which is done below

     ebOccMap1dCumul = ebOccMap1dCumul * (nEtaTowersPad * nPhiTowers);


     //Correction for no.of events in each input image as originally model trained with 500 events per image

     ebOccMap1dCumul = ebOccMap1dCumul * (500. / TNum);


     //The pre-processed input is now fed into the input tensor vector which will go into the ML model

     ebOccMap1dCumulPad.assign(std::begin(ebOccMap1dCumul), std::end(ebOccMap1dCumul));


     //Replicate and pad with the first and last row to prevent the edge effect

     for (int k = 0; k < nPhiTowers; k++) {

       float val = ebOccMap1dCumulPad[nPhiTowers - 1];

       ebOccMap1dCumulPad.insert(ebOccMap1dCumulPad.begin(),

                                 val);  //padding in the beginning with the first row elements

     }


     int size = ebOccMap1dCumulPad.size();

     for (int k = (size - nPhiTowers); k < size; k++) {

       float val = ebOccMap1dCumulPad[k];

       ebOccMap1dCumulPad.push_back(val);  //padding at the end with the last row elements

     }


     //An Autoencoder (AE) network with resnet architecture is used here which is trained on

     //certified good data (EB digi occupancy) from Run 2018 data.

     //On giving an input occupancy map, the encoder part of the AE compresses and reduces the input data, learning its features,

     //and the decoder reconstructs the data from the encoded form into a representation as close to the original input as possible.

     //We then compute the Mean squared error (MSE) between the input and output image, also called the Reconstruction Loss,

     //calculated at a tower by tower basis.

     //Thus, given an anomalous tower the loss should be significantly higher than the loss with respect to good towers, which the model

     //has already seen --> anomaly detection.

     //When calculating the loss we also apply a response correction by dividing each input and output image with the average occupancy from

     //all 2018 data (also to be tuned),to accommodate the difference in response of crystals in different regions of the Ecal Barrel

     //Further each loss map from each input image is then multiplied by the last N loss maps,

     //A quality threshold is then applied on this time multiplied loss map, to mark them as GOOD or BAD,

     //after which it is stored as a quality summary ME.


     std::string instanceName{"AE-DQM-inference"};

     std::string modelFilepath = edm::FileInPath("DQM/EcalMonitorClient/data/onnxModels/resnet.onnx").fullPath();


     Ort::SessionOptions sessionOptions;

     sessionOptions.SetIntraOpNumThreads(1);

     Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str());

     Ort::Session session(env, modelFilepath.c_str(), sessionOptions);


     Ort::AllocatorWithDefaultOptions allocator;


     const char* inputName = session.GetInputName(0, allocator);


     Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);

     auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();


     std::vector<int64_t> inputDims = inputTensorInfo.GetShape();


     const char* outputName = session.GetOutputName(0, allocator);


     Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);

     auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();


     std::vector<int64_t> outputDims = outputTensorInfo.GetShape();


     size_t TensorSize = nEtaTowersPad * nPhiTowers;

     std::vector<float> ebRecoOccMap1dPad(TensorSize);  //To store the output reconstructed occupancy


     std::vector<const char*> inputNames{inputName};

     std::vector<const char*> outputNames{outputName};

     std::vector<Ort::Value> inputTensors;

     std::vector<Ort::Value> outputTensors;


     Ort::MemoryInfo memoryInfo =

         Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);

     inputTensors.push_back(Ort::Value::CreateTensor<float>(

         memoryInfo, ebOccMap1dCumulPad.data(), TensorSize, inputDims.data(), inputDims.size()));


     outputTensors.push_back(Ort::Value::CreateTensor<float>(

         memoryInfo, ebRecoOccMap1dPad.data(), TensorSize, outputDims.data(), outputDims.size()));


     session.Run(Ort::RunOptions{nullptr},

                 inputNames.data(),

                 inputTensors.data(),

                 1,

                 outputNames.data(),

                 outputTensors.data(),

                 1);


     //2D Loss map to store tower by tower loss between the output (reconstructed) and input occupancies,

     //Have same dimensions as the occupancy plot

     std::valarray<std::valarray<float>> lossMap2d(std::valarray<float>(nPhiTowers), nEtaTowers);


     //1D val arrays to store row wise information corresponding to the reconstructed, input and average occupancies, and loss.

     //and to do element wise (tower wise) operations on them to calculate the MSE loss between the reco and input occupancy.

     std::valarray<float> recoOcc1d(0., nPhiTowers);

     std::valarray<float> inputOcc1d(0., nPhiTowers);

     std::valarray<float> avgOcc1d(0., nPhiTowers);

     std::valarray<float> loss_;


     //Loss calculation

     //Ignore the top and bottom replicated padded rows when doing inference

     //by making index i run over (1,35) instead of (0,36)

     for (int i = 1; i < 35; i++) {

       for (int j = 0; j < nPhiTowers; j++) {

         int k = (i * nPhiTowers) + j;

         recoOcc1d[j] = ebRecoOccMap1dPad[k];

         inputOcc1d[j] = ebOccMap1dCumulPad[k];

         avgOcc1d[j] = avgOcc_[k];

       }

       //Calculate the MSE loss = (output-input)^2, with avg response correction

       loss_ = std::pow((recoOcc1d / avgOcc1d - inputOcc1d / avgOcc1d), 2);

       lossMap2d[i - 1] = (loss_);

     }


     lossMap2dQ.push_back(lossMap2d);  //Store each loss map from the output in the queue

     if (lossMap2dQ.size() > nLSloss) {

       lossMap2dQ.pop_front();  //Keep exactly nLSloss loss maps to multiply

     }

     if (lossMap2dQ.size() < nLSloss) {  //Exit if there are not nLSloss loss maps

       return;

     }

     //To hold the final multiplied loss

     std::valarray<std::valarray<float>> lossMap2dMult(std::valarray<float>(1., nPhiTowers), nEtaTowers);


     //Multiply together the last nLSloss loss maps

     //So that real anomalies which persist with time are enhanced and fluctuations are suppressed.

     for (size_t i = 0; i < lossMap2dQ.size(); i++) {

       lossMap2dMult *= lossMap2dQ[i];

     }


     //Fill the AELoss ME with the values of this time multiplied loss map

     MESet const& sAELoss(sources_.at("AELoss"));

     TH2F* hLossMap2dMult(sAELoss.getME(1)->getTH2F());

     for (int i = 0; i < hLossMap2dMult->GetNbinsY(); i++) {

       for (int j = 0; j < hLossMap2dMult->GetNbinsX(); j++) {

         int bin_ = hLossMap2dMult->GetBin(j + 1, i + 1);

         double content = lossMap2dMult[i][j];

         hLossMap2dMult->SetBinContent(bin_, content);

       }

     }

     //Apply the quality threshold on the time multiplied loss map stored in the ME AELoss

     //If anomalous, the tower entry will have a large loss value. If good, the value will be close to zero.


     MESet::const_iterator dAEnd(sAELoss.end(GetElectronicsMap()));

     for (MESet::const_iterator dItr(sAELoss.beginChannel(GetElectronicsMap())); dItr != dAEnd;

          dItr.toNextChannel(GetElectronicsMap())) {

       DetId id(dItr->getId());


       bool doMaskML(meMLQualitySummary.maskMatches(id, mask, statusManager_, GetTrigTowerMap()));


       float entries(dItr->getBinContent());

       int quality(doMaskML ? kMGood : kGood);

       //If a trigger tower entry is greater than the ML threshold, set it to Bad quality, otherwise Good.

       if (entries > MLThreshold_) {

         quality = doMaskML ? kMBad : kBad;

       }

       //Fill the quality summary with the quality of the given tower id.

       meMLQualitySummary.setBinContent(getEcalDQMSetupObjects(), id, double(quality));

     }  // ML Quality Summary

   }    // producePlots()


   DEFINE_ECALDQM_WORKER(MLClient);

 }  // namespace ecaldqm

ecaldqm::MLClient::nEtaTowersPad
static const int nEtaTowersPad
Definition: MLClient.h:26

edm::ParameterSet::getUntrackedParameter
T getUntrackedParameter(std::string const &, T const &) const

run_AlCaRecoTriggerBitsUpdateWorkflow.session
tuple session
Definition: run_AlCaRecoTriggerBitsUpdateWorkflow.py:57

ecaldqm::DQWorkerClient::kMBad
Definition: DQWorkerClient.h:44

dqmiolumiharvest.j
tuple j
Definition: dqmiolumiharvest.py:66

mps_fire.i
i
Definition: mps_fire.py:428

ecaldqm::MLClient::setParams
void setParams(edm::ParameterSet const &) override
Definition: MLClient.cc:20

DEFINE_ECALDQM_WORKER
#define DEFINE_ECALDQM_WORKER(TYPE)
Definition: DQWorker.h:162

ecaldqm::MLClient::MLThreshold_
float MLThreshold_
Definition: MLClient.h:27

ecaldqm::DQWorkerClient
Definition: DQWorkerClient.h:17

gpuClustering::id
uint16_t *__restrict__ id
Definition: gpuClusterChargeCut.h:20

ecaldqm::DQWorker::Timestamp::iLumi
edm::LuminosityBlockNumber_t iLumi
Definition: DQWorker.h:48

EcalDQMStatusHelper::PHYSICS_BAD_CHANNEL_WARNING
static const int PHYSICS_BAD_CHANNEL_WARNING
Definition: EcalDQMStatusHelper.h:46

ecaldqm::MESetCollection::at
MESet & at(const std::string &key)
Definition: MESet.h:399

isotrackApplyRegressor.k
int k
Definition: isotrackApplyRegressor.py:91

quality
uint32_t const *__restrict__ Quality * quality
Definition: CAHitNtupletGeneratorKernelsImpl.h:122

SplitLinear.begin
list begin
Definition: SplitLinear.py:25

ecaldqm::DQWorkerClient::kMGood
Definition: DQWorkerClient.h:44

hgcalPerformanceValidation.val
tuple val
Definition: hgcalPerformanceValidation.py:364

EcalDQMStatusHelper::PEDESTAL_ONLINE_HIGH_GAIN_RMS_ERROR
static const int PEDESTAL_ONLINE_HIGH_GAIN_RMS_ERROR
Definition: EcalDQMStatusHelper.h:25

EcalDQMStatusHelper::PHYSICS_BAD_CHANNEL_ERROR
static const int PHYSICS_BAD_CHANNEL_ERROR
Definition: EcalDQMStatusHelper.h:47

ecaldqm::MLClient::producePlots
void producePlots(ProcessType) override
Definition: MLClient.cc:35

AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256

l1extraParticles_cfi._params
tuple _params
Definition: l1extraParticles_cfi.py:29

ONNXRuntime.h

ParameterSet.h

ecaldqm::DQWorker::GetTrigTowerMap
EcalTrigTowerConstituentsMap const * GetTrigTowerMap()
Definition: DQWorker.cc:124

ecaldqm::MESetNonObject
Definition: MESetNonObject.h:7

ws_sso_content_reader.content
tuple content
Definition: ws_sso_content_reader.py:118

ecaldqm::DQWorkerClient::qualitySummaries_
std::set< std::string > qualitySummaries_
Definition: DQWorkerClient.h:63

newFWLiteAna.bin
string bin
Definition: newFWLiteAna.py:161

ecaldqm::DQWorkerClient::statusManager_
StatusManager const * statusManager_
Definition: DQWorkerClient.h:67

ecaldqm::MLClient::lossMap2dQ
std::deque< std::valarray< std::valarray< float > > > lossMap2dQ
Definition: MLClient.h:36

ecaldqm::nTowers
Definition: EcalDQMCommonUtils.h:114

ecaldqm::MLClient::nEtaTowers
static const int nEtaTowers
Definition: MLClient.h:23

ecaldqm::MLClient::nLSloss
size_t nLSloss
Definition: MLClient.h:31

ecaldqm::MLClient
Definition: MLClient.h:11

ecaldqm::DQWorkerClient::kBad
Definition: DQWorkerClient.h:44

EcalDQMStatusHelper.h

ecaldqm::MESet::const_iterator
Definition: MESet.h:273

ecaldqm::MESet
Definition: MESet.h:42

ecaldqm::DQWorkerClient::sources_
MESetCollection sources_
Definition: DQWorkerClient.h:62

EcalDQMCommonUtils.h

edm::FileInPath
Definition: FileInPath.h:61

ecaldqm::DQWorker::getEcalDQMSetupObjects
EcalDQMSetupObjects const getEcalDQMSetupObjects()
Definition: DQWorker.cc:142

DetId
Definition: DetId.h:17

ecaldqm::DQWorker::timestamp_
Timestamp timestamp_
Definition: DQWorker.h:128

ecaldqm::DQWorker::MEs_
MESetCollection MEs_
Definition: DQWorker.h:125

ecaldqm::MLClient::NEventQ
std::deque< int > NEventQ
Definition: MLClient.h:33

MLClient.h

ecaldqm::MLClient::PUcorr_intercept_
float PUcorr_intercept_
Definition: MLClient.h:29

ecaldqm::MESet::const_iterator::toNextChannel
const_iterator & toNextChannel(EcalElectronicsMapping const *)
Definition: MESet.cc:413

ecaldqm::DQWorkerClient::kGood
Definition: DQWorkerClient.h:44

ecaldqm::MESetNonObject::getFloatValue
double getFloatValue() const
Definition: MESetNonObject.cc:275

ecaldqm::MLClient::PUcorr_slope_
float PUcorr_slope_
Definition: MLClient.h:28

web.browse_db.env
tuple env
Definition: browse_db.py:18

makeMuonMisalignmentScenario.outputName
list outputName
Definition: makeMuonMisalignmentScenario.py:46

dataset.end
string end
Definition: dataset.py:937

EcalBarrel
Definition: EcalSubdetector.h:10

edm::FileInPath::fullPath
std::string fullPath() const
Definition: FileInPath.cc:161

edm::ParameterSet
Definition: ParameterSet.h:47

ecaldqm::DQWorker::GetElectronicsMap
EcalElectronicsMapping const * GetElectronicsMap()
Definition: DQWorker.cc:118

MESetNonObject.h

ecaldqm::DQWorker::onlineMode_
bool onlineMode_
Definition: DQWorker.h:132

EcalTrigTowerDetId.h

ecaldqm::MLClient::nPhiTowers
static const int nPhiTowers
Definition: MLClient.h:24

ecaldqm::DQWorkerClient::ProcessType
ProcessType
Definition: DQWorkerClient.h:19

findQualityFiles.size
tuple size
Write out results.
Definition: findQualityFiles.py:443

ecaldqm::MESetCollection::erase
void erase(const std::string &key)
Definition: MESet.h:390

ecaldqm::MLClient::ebOccMap1dQ
std::deque< std::valarray< float > > ebOccMap1dQ
Definition: MLClient.h:34

funct::pow
Power< A, B >::type pow(const A &a, const B &b)
Definition: Power.h:29

ecaldqm::MLClient::avgOcc_
std::vector< double > avgOcc_
Definition: MLClient.h:35

ecaldqm::MLClient::nLS
size_t nLS
Definition: MLClient.h:30