CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes
TritonClient Class Reference

#include <TritonClient.h>

Inheritance diagram for TritonClient:
SonicClient< TritonInputMap, TritonOutputMap > SonicClientBase SonicClientTypes< TritonInputMap, TritonOutputMap >

Classes

struct  ServerSideStats
 

Public Member Functions

TritonBatchMode batchMode () const
 
unsigned batchSize () const
 
bool isLocal () const
 
void reset () override
 
void resetBatchMode ()
 
TritonServerType serverType () const
 
void setBatchMode (TritonBatchMode batchMode)
 
bool setBatchSize (unsigned bsize)
 
void setUseSharedMemory (bool useShm)
 
 TritonClient (const edm::ParameterSet &params, const std::string &debugName)
 
bool useSharedMemory () const
 
bool verbose () const
 
 ~TritonClient () override
 
- Public Member Functions inherited from SonicClient< TritonInputMap, TritonOutputMap >
 SonicClient (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
- Public Member Functions inherited from SonicClientBase
const std::string & clientName () const
 
const std::string & debugName () const
 
virtual void dispatch (edm::WaitingTaskWithArenaHolder holder)
 
virtual void dispatch ()
 
SonicMode mode () const
 
 SonicClientBase (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
virtual ~SonicClientBase ()=default
 
- Public Member Functions inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Inputinput ()
 
const Outputoutput () const
 
virtual ~SonicClientTypes ()=default
 

Static Public Member Functions

static void fillPSetDescription (edm::ParameterSetDescription &iDesc)
 
- Static Public Member Functions inherited from SonicClientBase
static void fillBasePSetDescription (edm::ParameterSetDescription &desc, bool allowRetry=true)
 

Protected Member Functions

void evaluate () override
 
void getResults (const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
 
inference::ModelStatistics getServerSideStatus () const
 
template<typename F >
bool handle_exception (F &&call)
 
unsigned nEntries () const
 
bool noOuterDim () const
 
unsigned outerDim () const
 
void reportServerSideStats (const ServerSideStats &stats) const
 
ServerSideStats summarizeServerStats (const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
 
- Protected Member Functions inherited from SonicClientBase
void finish (bool success, std::exception_ptr eptr=std::exception_ptr{})
 
void setMode (SonicMode mode)
 
void start (edm::WaitingTaskWithArenaHolder holder)
 
void start ()
 

Protected Attributes

TritonBatchMode batchMode_
 
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
 
grpc_compression_algorithm compressionAlgo_
 
triton::client::Headers headers_
 
bool isLocal_
 
bool manualBatchMode_
 
unsigned maxOuterDim_
 
unsigned nEntries_
 
bool noOuterDim_
 
std::vector< triton::client::InferOptions > options_
 
unsigned outerDim_
 
TritonServerType serverType_
 
bool useSharedMemory_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientBase
unsigned allowedTries_
 
std::string clientName_
 
std::string debugName_
 
std::unique_ptr< SonicDispatcherdispatcher_
 
std::string fullDebugName_
 
std::optional< edm::WaitingTaskWithArenaHolderholder_
 
SonicMode mode_
 
unsigned tries_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Input input_
 
Output output_
 

Private Member Functions

void addEntry (unsigned entry)
 
auto client ()
 
void resizeEntries (unsigned entry)
 

Private Attributes

friend TritonInputData
 
friend TritonOutputData
 

Additional Inherited Members

- Public Types inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
typedef TritonInputMap Input
 
typedef TritonOutputMap Output
 

Detailed Description

Definition at line 21 of file TritonClient.h.

Constructor & Destructor Documentation

◆ TritonClient()

TritonClient::TritonClient ( const edm::ParameterSet params,
const std::string &  debugName 
)

Definition at line 56 of file TritonClient.cc.

References a, client_, edm::conversion(), SiPixelPhase1Clusters_cfi::e3, Exception, SonicClientBase::fullDebugName_, edm::FileInPath::fullPath(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, isLocal_, submitPVResolutionJobs::key, LocalCPU, LocalGPU, SiStripPI::max, maxOuterDim_, mps_check::msg, noOuterDim_, heppy_report::oname, options_, or, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, submitPVValidationJobs::params, TritonService::pid(), triton_utils::printColl(), Rectangular, contentValuesFiles::server, TritonService::serverInfo(), serverType_, setBatchSize(), SonicClientBase::setMode(), AlCaHLTBitMon_QueryRunRegistry::string, summarizeEdmComparisonLogfiles::success, Sync, TRITON_THROW_IF_ERROR, heppy_batch::val, and verbose_.

57  : SonicClient(params, debugName, "TritonClient"),
59  manualBatchMode_(false),
60  verbose_(params.getUntrackedParameter<bool>("verbose")),
61  useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
62  compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))) {
63  options_.emplace_back(params.getParameter<std::string>("modelName"));
64  //get appropriate server for this model
66  const auto& server =
67  ts->serverInfo(options_[0].model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
68  serverType_ = server.type;
69  if (verbose_)
70  edm::LogInfo(fullDebugName_) << "Using server: " << server.url;
71  //enforce sync mode for fallback CPU server to avoid contention
72  //todo: could enforce async mode otherwise (unless mode was specified by user?)
76 
77  //connect to the server
79  tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions),
80  "TritonClient(): unable to create inference context",
81  isLocal_);
82 
83  //set options
84  options_[0].model_version_ = params.getParameter<std::string>("modelVersion");
85  options_[0].client_timeout_ = params.getUntrackedParameter<unsigned>("timeout");
86  //convert to microseconds
87  const auto& timeoutUnit = params.getUntrackedParameter<std::string>("timeoutUnit");
88  unsigned conversion = 1;
89  if (timeoutUnit == "seconds")
90  conversion = 1e6;
91  else if (timeoutUnit == "milliseconds")
92  conversion = 1e3;
93  else if (timeoutUnit == "microseconds")
94  conversion = 1;
95  else
96  throw cms::Exception("Configuration") << "Unknown timeout unit: " << timeoutUnit;
97  options_[0].client_timeout_ *= conversion;
98 
99  //get fixed parameters from local config
100  inference::ModelConfig localModelConfig;
101  {
102  const std::string& localModelConfigPath(params.getParameter<edm::FileInPath>("modelConfigPath").fullPath());
103  int fileDescriptor = open(localModelConfigPath.c_str(), O_RDONLY);
104  if (fileDescriptor < 0)
105  throw TritonException("LocalFailure")
106  << "TritonClient(): unable to open local model config: " << localModelConfigPath;
107  google::protobuf::io::FileInputStream localModelConfigInput(fileDescriptor);
108  localModelConfigInput.SetCloseOnDelete(true);
109  if (!google::protobuf::TextFormat::Parse(&localModelConfigInput, &localModelConfig))
110  throw TritonException("LocalFailure")
111  << "TritonClient(): unable to parse local model config: " << localModelConfigPath;
112  }
113 
114  //check batch size limitations (after i/o setup)
115  //triton uses max batch size = 0 to denote a model that does not support native batching (using the outer dimension)
116  //but for models that do support batching (native or otherwise), a given event may set batch size 0 to indicate no valid input is present
117  //so set the local max to 1 and keep track of "no outer dim" case
118  maxOuterDim_ = localModelConfig.max_batch_size();
119  noOuterDim_ = maxOuterDim_ == 0;
121  //propagate batch size
122  setBatchSize(1);
123 
124  //compare model checksums to remote config to enforce versioning
125  inference::ModelConfigResponse modelConfigResponse;
126  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_),
127  "TritonClient(): unable to get model config",
128  isLocal_);
129  inference::ModelConfig remoteModelConfig(modelConfigResponse.config());
130 
131  std::map<std::string, std::array<std::string, 2>> checksums;
132  size_t fileCounter = 0;
133  for (const auto& modelConfig : {localModelConfig, remoteModelConfig}) {
134  const auto& agents = modelConfig.model_repository_agents().agents();
135  auto agent = std::find_if(agents.begin(), agents.end(), [](auto const& a) { return a.name() == "checksum"; });
136  if (agent != agents.end()) {
137  const auto& params = agent->parameters();
138  for (const auto& [key, val] : params) {
139  // only check the requested version
140  if (key.compare(0, options_[0].model_version_.size() + 1, options_[0].model_version_ + "/") == 0)
141  checksums[key][fileCounter] = val;
142  }
143  }
144  ++fileCounter;
145  }
146  std::vector<std::string> incorrect;
147  for (const auto& [key, val] : checksums) {
148  if (checksums[key][0] != checksums[key][1])
149  incorrect.push_back(key);
150  }
151  if (!incorrect.empty())
152  throw TritonException("ModelVersioning") << "The following files have incorrect checksums on the remote server: "
153  << triton_utils::printColl(incorrect, ", ");
154 
155  //get model info
156  inference::ModelMetadataResponse modelMetadata;
157  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_),
158  "TritonClient(): unable to get model metadata",
159  isLocal_);
160 
161  //get input and output (which know their sizes)
162  const auto& nicInputs = modelMetadata.inputs();
163  const auto& nicOutputs = modelMetadata.outputs();
164 
165  //report all model errors at once
166  std::stringstream msg;
167  std::string msg_str;
168 
169  //currently no use case is foreseen for a model with zero inputs or outputs
170  if (nicInputs.empty())
171  msg << "Model on server appears malformed (zero inputs)\n";
172 
173  if (nicOutputs.empty())
174  msg << "Model on server appears malformed (zero outputs)\n";
175 
176  //stop if errors
177  msg_str = msg.str();
178  if (!msg_str.empty())
179  throw cms::Exception("ModelErrors") << msg_str;
180 
181  //setup input map
182  std::stringstream io_msg;
183  if (verbose_)
184  io_msg << "Model inputs: "
185  << "\n";
186  for (const auto& nicInput : nicInputs) {
187  const auto& iname = nicInput.name();
188  auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
189  std::forward_as_tuple(iname),
190  std::forward_as_tuple(iname, nicInput, this, ts->pid()));
191  auto& curr_input = curr_itr->second;
192  if (verbose_) {
193  io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
194  << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
195  }
196  }
197 
198  //allow selecting only some outputs from server
199  const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
200  std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
201 
202  //setup output map
203  if (verbose_)
204  io_msg << "Model outputs: "
205  << "\n";
206  for (const auto& nicOutput : nicOutputs) {
207  const auto& oname = nicOutput.name();
208  if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
209  continue;
210  auto [curr_itr, success] = output_.emplace(std::piecewise_construct,
211  std::forward_as_tuple(oname),
212  std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
213  auto& curr_output = curr_itr->second;
214  if (verbose_) {
215  io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
216  << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
217  }
218  if (!s_outputs.empty())
219  s_outputs.erase(oname);
220  }
221 
222  //check if any requested outputs were not available
223  if (!s_outputs.empty())
224  throw cms::Exception("MissingOutput")
225  << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
226 
227  //print model info
228  std::stringstream model_msg;
229  if (verbose_) {
230  model_msg << "Model name: " << options_[0].model_name_ << "\n"
231  << "Model version: " << options_[0].model_version_ << "\n"
232  << "Model max outer dim: " << (noOuterDim_ ? 0 : maxOuterDim_) << "\n";
233  edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
234  }
235 }
const std::string & pid() const
const std::string & debugName() const
unsigned maxOuterDim_
Definition: TritonClient.h:73
bool setBatchSize(unsigned bsize)
bool noOuterDim_
Definition: TritonClient.h:75
std::string fullPath() const
Definition: FileInPath.cc:161
bool manualBatchMode_
Definition: TritonClient.h:78
bool useSharedMemory_
Definition: TritonClient.h:80
void setMode(SonicMode mode)
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
TritonServerType serverType_
Definition: TritonClient.h:81
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
SonicClient(const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
Definition: SonicClient.h:12
key
prepare the HTCondor submission files and eventually submit them
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:83
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
Log< level::Info, false > LogInfo
void conversion(EventAux const &from, EventAuxiliary &to)
Definition: EventAux.cc:9
tuple msg
Definition: mps_check.py:286
Server serverInfo(const std::string &model, const std::string &preferred="") const
double a
Definition: hdecay.h:121
std::string fullDebugName_
TritonBatchMode batchMode_
Definition: TritonClient.h:77
std::string printColl(const C &coll, const std::string &delim=", ")
Definition: triton_utils.cc:10
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88

◆ ~TritonClient()

TritonClient::~TritonClient ( )
override

Definition at line 237 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

237  {
238  //by default: members of this class destroyed before members of base class
239  //in shared memory case, TritonMemResource (member of TritonData) unregisters from client_ in its destructor
240  //but input/output objects are member of base class, so destroyed after client_ (member of this class)
241  //therefore, clear the maps here
242  input_.clear();
243  output_.clear();
244 }

Member Function Documentation

◆ addEntry()

void TritonClient::addEntry ( unsigned  entry)
private

Definition at line 297 of file TritonClient.cc.

References batchMode_, mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, outerDim_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Ragged.

Referenced by resizeEntries().

297  {
298  for (auto& element : input_) {
299  element.second.addEntryImpl(entry);
300  }
301  for (auto& element : output_) {
302  element.second.addEntryImpl(entry);
303  }
304  if (entry > 0) {
306  outerDim_ = 1;
307  }
308 }
unsigned outerDim_
Definition: TritonClient.h:74
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ batchMode()

TritonBatchMode TritonClient::batchMode ( ) const
inline

Definition at line 42 of file TritonClient.h.

References batchMode_.

Referenced by setBatchMode().

42 { return batchMode_; }
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ batchSize()

unsigned TritonClient::batchSize ( ) const

◆ client()

auto TritonClient::client ( )
inlineprivate

Definition at line 95 of file TritonClient.h.

References client_.

Referenced by TritonData< IO >::client().

95 { return client_.get(); }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86

◆ evaluate()

void TritonClient::evaluate ( )
overrideprotectedvirtual

Implements SonicClientBase.

Definition at line 364 of file TritonClient.cc.

References Async, batchSize(), client_, compressionAlgo_, SonicClientBase::finish(), getResults(), getServerSideStatus(), handle_exception(), headers_, mps_fire::i, ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets::if(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input(), SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, isLocal_, SonicClientBase::mode_, nEntries(), TritonService::notifyCallStatus(), heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, reportServerSideStats(), mysort::results, dqmMemoryStats::stats, summarizeEdmComparisonLogfiles::success, summarizeServerStats(), SonicClientBase::tries_, TRITON_THROW_IF_ERROR, and verbose().

364  {
365  //undo previous signal from TritonException
366  if (tries_ > 0) {
368  ts->notifyCallStatus(true);
369  }
370 
371  //in case there is nothing to process
372  if (batchSize() == 0) {
373  //call getResults on an empty vector
374  std::vector<std::shared_ptr<tc::InferResult>> empty_results;
375  getResults(empty_results);
376  finish(true);
377  return;
378  }
379 
380  //set up input pointers for triton (generalized for multi-request ragged batching case)
381  //one vector<InferInput*> per request
382  unsigned nEntriesVal = nEntries();
383  std::vector<std::vector<triton::client::InferInput*>> inputsTriton(nEntriesVal);
384  for (auto& inputTriton : inputsTriton) {
385  inputTriton.reserve(input_.size());
386  }
387  for (auto& [iname, input] : input_) {
388  for (unsigned i = 0; i < nEntriesVal; ++i) {
389  inputsTriton[i].push_back(input.data(i));
390  }
391  }
392 
393  //set up output pointers similarly
394  std::vector<std::vector<const triton::client::InferRequestedOutput*>> outputsTriton(nEntriesVal);
395  for (auto& outputTriton : outputsTriton) {
396  outputTriton.reserve(output_.size());
397  }
398  for (auto& [oname, output] : output_) {
399  for (unsigned i = 0; i < nEntriesVal; ++i) {
400  outputsTriton[i].push_back(output.data(i));
401  }
402  }
403 
404  //set up shared memory for output
405  auto success = handle_exception([&]() {
406  for (auto& element : output_) {
407  element.second.prepare();
408  }
409  });
410  if (!success)
411  return;
412 
413  // Get the status of the server prior to the request being made.
414  inference::ModelStatistics start_status;
415  success = handle_exception([&]() {
416  if (verbose())
417  start_status = getServerSideStatus();
418  });
419  if (!success)
420  return;
421 
422  if (mode_ == SonicMode::Async) {
423  //non-blocking call
424  success = handle_exception([&]() {
425  TRITON_THROW_IF_ERROR(client_->AsyncInferMulti(
426  [start_status, this](std::vector<tc::InferResult*> resultsTmp) {
427  //immediately convert to shared_ptr
428  const auto& results = convertToShared(resultsTmp);
429  //check results
430  for (auto ptr : results) {
431  auto success = handle_exception([&]() {
432  TRITON_THROW_IF_ERROR(
433  ptr->RequestStatus(), "evaluate(): unable to get result(s)", isLocal_);
434  });
435  if (!success)
436  return;
437  }
438 
439  if (verbose()) {
440  inference::ModelStatistics end_status;
441  auto success = handle_exception([&]() { end_status = getServerSideStatus(); });
442  if (!success)
443  return;
444 
445  const auto& stats = summarizeServerStats(start_status, end_status);
447  }
448 
449  //check result
450  auto success = handle_exception([&]() { getResults(results); });
451  if (!success)
452  return;
453 
454  //finish
455  finish(true);
456  },
457  options_,
458  inputsTriton,
459  outputsTriton,
460  headers_,
462  "evaluate(): unable to launch async run",
463  isLocal_);
464  });
465  if (!success)
466  return;
467  } else {
468  //blocking call
469  std::vector<tc::InferResult*> resultsTmp;
470  success = handle_exception([&]() {
472  client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_),
473  "evaluate(): unable to run and/or get result",
474  isLocal_);
475  });
476  //immediately convert to shared_ptr
477  const auto& results = convertToShared(resultsTmp);
478  if (!success)
479  return;
480 
481  if (verbose()) {
482  inference::ModelStatistics end_status;
483  success = handle_exception([&]() { end_status = getServerSideStatus(); });
484  if (!success)
485  return;
486 
487  const auto& stats = summarizeServerStats(start_status, end_status);
489  }
490 
492  if (!success)
493  return;
494 
495  finish(true);
496  }
497 }
bool verbose() const
Definition: TritonClient.h:43
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
void notifyCallStatus(bool status) const
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:83
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:84
unsigned nEntries() const
unsigned batchSize() const
void reportServerSideStats(const ServerSideStats &stats) const
results
Definition: mysort.py:8
Definition: output.py:1
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88
if(threadIdxLocalY==0 &&threadIdxLocalX==0)

◆ fillPSetDescription()

void TritonClient::fillPSetDescription ( edm::ParameterSetDescription iDesc)
static

Definition at line 569 of file TritonClient.cc.

References edm::ParameterSetDescription::add(), edm::ParameterSetDescription::addUntracked(), SonicClientBase::fillBasePSetDescription(), edm::ParameterSetDescription::ifValue(), and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by DeepTauIdSonicProducer::fillDescriptions(), DeepMETSonicProducer::fillDescriptions(), ParticleNetSonicJetTagsProducer::fillDescriptions(), SCEnergyCorrectorDRNProducer::fillDescriptions(), and DRNCorrectionProducerT< T >::fillDescriptions().

569  {
570  edm::ParameterSetDescription descClient;
571  fillBasePSetDescription(descClient);
572  descClient.add<std::string>("modelName");
573  descClient.add<std::string>("modelVersion", "");
574  descClient.add<edm::FileInPath>("modelConfigPath");
575  //server parameters should not affect the physics results
576  descClient.addUntracked<std::string>("preferredServer", "");
577  descClient.addUntracked<unsigned>("timeout");
578  descClient.ifValue(edm::ParameterDescription<std::string>("timeoutUnit", "seconds", false),
579  edm::allowedValues<std::string>("seconds", "milliseconds", "microseconds"));
580  descClient.addUntracked<bool>("useSharedMemory", true);
581  descClient.addUntracked<std::string>("compression", "");
582  descClient.addUntracked<std::vector<std::string>>("outputs", {});
583  iDesc.add<edm::ParameterSetDescription>("Client", descClient);
584 }
ParameterDescriptionNode * ifValue(ParameterDescription< T > const &switchParameter, std::unique_ptr< ParameterDescriptionCases< T >> cases)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillBasePSetDescription(edm::ParameterSetDescription &desc, bool allowRetry=true)

◆ getResults()

void TritonClient::getResults ( const std::vector< std::shared_ptr< triton::client::InferResult >> &  results)
protected

Definition at line 341 of file TritonClient.cc.

References mps_fire::i, noOuterDim_, heppy_report::oname, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, mps_fire::result, mysort::results, and TRITON_THROW_IF_ERROR.

Referenced by evaluate().

341  {
342  for (unsigned i = 0; i < results.size(); ++i) {
343  const auto& result = results[i];
344  for (auto& [oname, output] : output_) {
345  //set shape here before output becomes const
346  if (output.variableDims()) {
347  std::vector<int64_t> tmp_shape;
349  result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname, false);
350  if (!noOuterDim_)
351  tmp_shape.erase(tmp_shape.begin());
352  output.setShape(tmp_shape, i);
353  }
354  //extend lifetime
355  output.setResult(result, i);
356  //compute size after getting all result entries
357  if (i == results.size() - 1)
358  output.computeSizes();
359  }
360  }
361 }
bool noOuterDim_
Definition: TritonClient.h:75
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
results
Definition: mysort.py:8
Definition: output.py:1

◆ getServerSideStatus()

inference::ModelStatistics TritonClient::getServerSideStatus ( ) const
protected

Definition at line 557 of file TritonClient.cc.

References client_, isLocal_, options_, TRITON_THROW_IF_ERROR, and verbose_.

Referenced by evaluate().

557  {
558  if (verbose_) {
559  inference::ModelStatisticsResponse resp;
560  TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_),
561  "getServerSideStatus(): unable to get model statistics",
562  isLocal_);
563  return *(resp.model_stats().begin());
564  }
565  return inference::ModelStatistics{};
566 }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:86
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
Definition: triton_utils.h:78
std::vector< triton::client::InferOptions > options_
Definition: TritonClient.h:88

◆ handle_exception()

template<typename F >
bool TritonClient::handle_exception ( F &&  call)
protected

Definition at line 322 of file TritonClient.cc.

References CMS_SA_ALLOW, MillePedeFileConverter_cfg::e, and SonicClientBase::finish().

Referenced by evaluate().

322  {
323  //caught exceptions will be propagated to edm::WaitingTaskWithArenaHolder
324  CMS_SA_ALLOW try {
325  call();
326  return true;
327  }
328  //TritonExceptions are intended/expected to be recoverable, i.e. retries should be allowed
329  catch (TritonException& e) {
330  e.convertToWarning();
331  finish(false);
332  return false;
333  }
334  //other exceptions are not: execution should stop if they are encountered
335  catch (...) {
336  finish(false, std::current_exception());
337  return false;
338  }
339 }
#define CMS_SA_ALLOW
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})

◆ isLocal()

bool TritonClient::isLocal ( ) const
inline

Definition at line 51 of file TritonClient.h.

References isLocal_.

51 { return isLocal_; }

◆ nEntries()

unsigned TritonClient::nEntries ( ) const
protected

Definition at line 260 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_.

Referenced by batchSize(), evaluate(), and resizeEntries().

260 { return !input_.empty() ? input_.begin()->second.entries_.size() : 0; }

◆ noOuterDim()

bool TritonClient::noOuterDim ( ) const
inlineprotected

Definition at line 58 of file TritonClient.h.

References noOuterDim_.

Referenced by TritonData< IO >::fullLoc(), and TritonData< IO >::toServer().

58 { return noOuterDim_; }
bool noOuterDim_
Definition: TritonClient.h:75

◆ outerDim()

unsigned TritonClient::outerDim ( ) const
inlineprotected

Definition at line 59 of file TritonClient.h.

References outerDim_.

Referenced by TritonData< IO >::computeSizes(), TritonData< IO >::fromServer(), and TritonData< IO >::toServer().

59 { return outerDim_; }
unsigned outerDim_
Definition: TritonClient.h:74

◆ reportServerSideStats()

void TritonClient::reportServerSideStats ( const ServerSideStats stats) const
protected

Definition at line 499 of file TritonClient.cc.

References ALPAKA_ACCELERATOR_NAMESPACE::brokenline::constexpr(), submitPVResolutionJobs::count, SonicClientBase::debugName_, SonicClientBase::fullDebugName_, mps_check::msg, and dqmMemoryStats::stats.

Referenced by evaluate().

499  {
500  std::stringstream msg;
501 
502  // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/inference_profiler.cc
503  const uint64_t count = stats.success_count_;
504  msg << " Inference count: " << stats.inference_count_ << "\n";
505  msg << " Execution count: " << stats.execution_count_ << "\n";
506  msg << " Successful request count: " << count << "\n";
507 
508  if (count > 0) {
509  auto get_avg_us = [count](uint64_t tval) {
510  constexpr uint64_t us_to_ns = 1000;
511  return tval / us_to_ns / count;
512  };
513 
514  const uint64_t cumm_avg_us = get_avg_us(stats.cumm_time_ns_);
515  const uint64_t queue_avg_us = get_avg_us(stats.queue_time_ns_);
516  const uint64_t compute_input_avg_us = get_avg_us(stats.compute_input_time_ns_);
517  const uint64_t compute_infer_avg_us = get_avg_us(stats.compute_infer_time_ns_);
518  const uint64_t compute_output_avg_us = get_avg_us(stats.compute_output_time_ns_);
519  const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
520  const uint64_t overhead =
521  (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
522 
523  msg << " Avg request latency: " << cumm_avg_us << " usec"
524  << "\n"
525  << " (overhead " << overhead << " usec + "
526  << "queue " << queue_avg_us << " usec + "
527  << "compute input " << compute_input_avg_us << " usec + "
528  << "compute infer " << compute_infer_avg_us << " usec + "
529  << "compute output " << compute_output_avg_us << " usec)" << std::endl;
530  }
531 
532  if (!debugName_.empty())
533  edm::LogInfo(fullDebugName_) << msg.str();
534 }
std::string debugName_
unsigned long long uint64_t
Definition: Time.h:13
tuple msg
Definition: mps_check.py:286
std::string fullDebugName_

◆ reset()

void TritonClient::reset ( void  )
overridevirtual

Reimplemented from SonicClientBase.

Definition at line 310 of file TritonClient.cc.

References batchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, manualBatchMode_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, and Rectangular.

310  {
311  if (!manualBatchMode_)
313  for (auto& element : input_) {
314  element.second.reset();
315  }
316  for (auto& element : output_) {
317  element.second.reset();
318  }
319 }
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ resetBatchMode()

void TritonClient::resetBatchMode ( )

Definition at line 255 of file TritonClient.cc.

References batchMode_, manualBatchMode_, and Rectangular.

255  {
257  manualBatchMode_ = false;
258 }
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ resizeEntries()

void TritonClient::resizeEntries ( unsigned  entry)
private

Definition at line 283 of file TritonClient.cc.

References addEntry(), mps_splice::entry, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, nEntries(), and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Referenced by setBatchSize().

283  {
284  if (entry > nEntries())
285  //addEntry(entry) extends the vector to size entry+1
286  addEntry(entry - 1);
287  else if (entry < nEntries()) {
288  for (auto& element : input_) {
289  element.second.entries_.resize(entry);
290  }
291  for (auto& element : output_) {
292  element.second.entries_.resize(entry);
293  }
294  }
295 }
void addEntry(unsigned entry)
unsigned nEntries() const

◆ serverType()

TritonServerType TritonClient::serverType ( ) const
inline

Definition at line 50 of file TritonClient.h.

References serverType_.

Referenced by TritonData< IO >::updateMem().

50 { return serverType_; }
TritonServerType serverType_
Definition: TritonClient.h:81

◆ setBatchMode()

void TritonClient::setBatchMode ( TritonBatchMode  batchMode)

Definition at line 246 of file TritonClient.cc.

References batchMode(), batchMode_, batchSize(), manualBatchMode_, and setBatchSize().

246  {
247  unsigned oldBatchSize = batchSize();
249  manualBatchMode_ = true;
250  //this allows calling setBatchSize() and setBatchMode() in either order consistently to change back and forth
251  //includes handling of change from ragged to rectangular if multiple entries already created
252  setBatchSize(oldBatchSize);
253 }
bool setBatchSize(unsigned bsize)
bool manualBatchMode_
Definition: TritonClient.h:78
TritonBatchMode batchMode() const
Definition: TritonClient.h:42
unsigned batchSize() const
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ setBatchSize()

bool TritonClient::setBatchSize ( unsigned  bsize)

Definition at line 264 of file TritonClient.cc.

References batchMode_, SonicClientBase::fullDebugName_, maxOuterDim_, SiStripPI::min, outerDim_, Rectangular, and resizeEntries().

Referenced by setBatchMode(), and TritonClient().

264  {
266  if (bsize > maxOuterDim_) {
267  edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
268  << maxOuterDim_ << ". Batch size will remain as " << outerDim_;
269  return false;
270  } else {
271  outerDim_ = bsize;
272  //take min to allow resizing to 0
274  return true;
275  }
276  } else {
277  resizeEntries(bsize);
278  outerDim_ = 1;
279  return true;
280  }
281 }
unsigned maxOuterDim_
Definition: TritonClient.h:73
void resizeEntries(unsigned entry)
unsigned outerDim_
Definition: TritonClient.h:74
std::string fullDebugName_
Log< level::Warning, false > LogWarning
TritonBatchMode batchMode_
Definition: TritonClient.h:77

◆ setUseSharedMemory()

void TritonClient::setUseSharedMemory ( bool  useShm)
inline

Definition at line 45 of file TritonClient.h.

References useSharedMemory_.

45 { useSharedMemory_ = useShm; }
bool useSharedMemory_
Definition: TritonClient.h:80

◆ summarizeServerStats()

TritonClient::ServerSideStats TritonClient::summarizeServerStats ( const inference::ModelStatistics &  start_status,
const inference::ModelStatistics &  end_status 
) const
protected

Definition at line 536 of file TritonClient.cc.

References TritonClient::ServerSideStats::compute_infer_time_ns_, TritonClient::ServerSideStats::compute_input_time_ns_, TritonClient::ServerSideStats::compute_output_time_ns_, TritonClient::ServerSideStats::cumm_time_ns_, TritonClient::ServerSideStats::execution_count_, TritonClient::ServerSideStats::inference_count_, TritonClient::ServerSideStats::queue_time_ns_, and TritonClient::ServerSideStats::success_count_.

Referenced by evaluate().

537  {
538  TritonClient::ServerSideStats server_stats;
539 
540  server_stats.inference_count_ = end_status.inference_count() - start_status.inference_count();
541  server_stats.execution_count_ = end_status.execution_count() - start_status.execution_count();
542  server_stats.success_count_ =
543  end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
544  server_stats.cumm_time_ns_ =
545  end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
546  server_stats.queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
547  server_stats.compute_input_time_ns_ =
548  end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
549  server_stats.compute_infer_time_ns_ =
550  end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
551  server_stats.compute_output_time_ns_ =
552  end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
553 
554  return server_stats;
555 }

◆ useSharedMemory()

bool TritonClient::useSharedMemory ( ) const
inline

Definition at line 44 of file TritonClient.h.

References useSharedMemory_.

44 { return useSharedMemory_; }
bool useSharedMemory_
Definition: TritonClient.h:80

◆ verbose()

bool TritonClient::verbose ( ) const
inline

Definition at line 43 of file TritonClient.h.

References verbose_.

Referenced by evaluate().

43 { return verbose_; }

Member Data Documentation

◆ batchMode_

TritonBatchMode TritonClient::batchMode_
protected

◆ client_

std::unique_ptr<triton::client::InferenceServerGrpcClient> TritonClient::client_
protected

Definition at line 86 of file TritonClient.h.

Referenced by client(), evaluate(), getServerSideStatus(), and TritonClient().

◆ compressionAlgo_

grpc_compression_algorithm TritonClient::compressionAlgo_
protected

Definition at line 83 of file TritonClient.h.

Referenced by evaluate().

◆ headers_

triton::client::Headers TritonClient::headers_
protected

Definition at line 84 of file TritonClient.h.

Referenced by evaluate().

◆ isLocal_

bool TritonClient::isLocal_
protected

Definition at line 82 of file TritonClient.h.

Referenced by evaluate(), getServerSideStatus(), isLocal(), and TritonClient().

◆ manualBatchMode_

bool TritonClient::manualBatchMode_
protected

Definition at line 78 of file TritonClient.h.

Referenced by reset(), resetBatchMode(), and setBatchMode().

◆ maxOuterDim_

unsigned TritonClient::maxOuterDim_
protected

Definition at line 73 of file TritonClient.h.

Referenced by setBatchSize(), and TritonClient().

◆ nEntries_

unsigned TritonClient::nEntries_
protected

Definition at line 76 of file TritonClient.h.

◆ noOuterDim_

bool TritonClient::noOuterDim_
protected

Definition at line 75 of file TritonClient.h.

Referenced by getResults(), noOuterDim(), and TritonClient().

◆ options_

std::vector<triton::client::InferOptions> TritonClient::options_
protected

◆ outerDim_

unsigned TritonClient::outerDim_
protected

Definition at line 74 of file TritonClient.h.

Referenced by addEntry(), batchSize(), outerDim(), and setBatchSize().

◆ serverType_

TritonServerType TritonClient::serverType_
protected

Definition at line 81 of file TritonClient.h.

Referenced by serverType(), and TritonClient().

◆ TritonInputData

friend TritonClient::TritonInputData
private

Definition at line 91 of file TritonClient.h.

◆ TritonOutputData

friend TritonClient::TritonOutputData
private

Definition at line 92 of file TritonClient.h.

◆ useSharedMemory_

bool TritonClient::useSharedMemory_
protected

Definition at line 80 of file TritonClient.h.

Referenced by setUseSharedMemory(), and useSharedMemory().

◆ verbose_

bool TritonClient::verbose_
protected

Definition at line 79 of file TritonClient.h.

Referenced by getServerSideStatus(), TritonClient(), and verbose().