CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes
TritonClient Class Reference

#include <TritonClient.h>

Inheritance diagram for TritonClient:
SonicClient< TritonInputMap, TritonOutputMap > SonicClientBase SonicClientTypes< TritonInputMap, TritonOutputMap >

Classes

struct  ServerSideStats
 

Public Member Functions

unsigned batchSize () const
 
bool noBatch () const
 
void reset () override
 
TritonServerType serverType () const
 
bool setBatchSize (unsigned bsize)
 
void setUseSharedMemory (bool useShm)
 
 TritonClient (const edm::ParameterSet &params, const std::string &debugName)
 
bool useSharedMemory () const
 
bool verbose () const
 
 ~TritonClient () override
 
- Public Member Functions inherited from SonicClient< TritonInputMap, TritonOutputMap >
 SonicClient (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
- Public Member Functions inherited from SonicClientBase
const std::string & clientName () const
 
const std::string & debugName () const
 
virtual void dispatch (edm::WaitingTaskWithArenaHolder holder)
 
virtual void dispatch ()
 
SonicMode mode () const
 
 SonicClientBase (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
virtual ~SonicClientBase ()=default
 
- Public Member Functions inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Inputinput ()
 
const Outputoutput () const
 
virtual ~SonicClientTypes ()=default
 

Static Public Member Functions

static void fillPSetDescription (edm::ParameterSetDescription &iDesc)
 
- Static Public Member Functions inherited from SonicClientBase
static void fillBasePSetDescription (edm::ParameterSetDescription &desc, bool allowRetry=true)
 

Protected Member Functions

void evaluate () override
 
void getResults (std::shared_ptr< triton::client::InferResult > results)
 
inference::ModelStatistics getServerSideStatus () const
 
template<typename F >
bool handle_exception (F &&call)
 
void reportServerSideStats (const ServerSideStats &stats) const
 
ServerSideStats summarizeServerStats (const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
 
- Protected Member Functions inherited from SonicClientBase
void finish (bool success, std::exception_ptr eptr=std::exception_ptr{})
 
void setMode (SonicMode mode)
 
void start (edm::WaitingTaskWithArenaHolder holder)
 
void start ()
 

Protected Attributes

unsigned batchSize_
 
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
 
grpc_compression_algorithm compressionAlgo_
 
triton::client::Headers headers_
 
std::vector< triton::client::InferInput * > inputsTriton_
 
unsigned maxBatchSize_
 
bool noBatch_
 
triton::client::InferOptions options_
 
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
 
TritonServerType serverType_
 
bool useSharedMemory_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientBase
unsigned allowedTries_
 
std::string clientName_
 
std::string debugName_
 
std::unique_ptr< SonicDispatcherdispatcher_
 
std::string fullDebugName_
 
std::optional< edm::WaitingTaskWithArenaHolderholder_
 
SonicMode mode_
 
unsigned tries_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Input input_
 
Output output_
 

Private Member Functions

auto client ()
 

Private Attributes

friend TritonInputData
 
friend TritonOutputData
 

Additional Inherited Members

- Public Types inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
typedef TritonInputMap Input
 
typedef TritonOutputMap Output
 

Detailed Description

Definition at line 19 of file TritonClient.h.

Constructor & Destructor Documentation

◆ TritonClient()

TritonClient::TritonClient ( const edm::ParameterSet params,
const std::string &  debugName 
)

Definition at line 39 of file TritonClient.cc.

References client_, SonicClientBase::fullDebugName_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, inputsTriton_, LocalCPU, SiStripPI::max, maxBatchSize_, mps_check::msg, noBatch_, heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, outputsTriton_, submitPVValidationJobs::params, TritonService::pid(), triton_utils::printColl(), contentValuesFiles::server, TritonService::serverInfo(), serverType_, setBatchSize(), SonicClientBase::setMode(), AlCaHLTBitMon_QueryRunRegistry::string, summarizeEdmComparisonLogfiles::success, Sync, TRITON_THROW_IF_ERROR, and verbose_.

40  : SonicClient(params, debugName, "TritonClient"),
41  verbose_(params.getUntrackedParameter<bool>("verbose")),
42  useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
43  compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))),
44  options_(params.getParameter<std::string>("modelName")) {
45  //get appropriate server for this model
47  const auto& server =
48  ts->serverInfo(options_.model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
49  serverType_ = server.type;
50  if (verbose_)
51  edm::LogInfo(fullDebugName_) << "Using server: " << server.url;
52  //enforce sync mode for fallback CPU server to avoid contention
53  //todo: could enforce async mode otherwise (unless mode was specified by user?)
56 
57  //connect to the server
59  tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions),
60  "TritonClient(): unable to create inference context");
61 
62  //set options
63  options_.model_version_ = params.getParameter<std::string>("modelVersion");
64  //convert seconds to microseconds
65  options_.client_timeout_ = params.getUntrackedParameter<unsigned>("timeout") * 1e6;
66 
67  //config needed for batch size
68  inference::ModelConfigResponse modelConfigResponse;
69  TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_.model_name_, options_.model_version_),
70  "TritonClient(): unable to get model config");
71  inference::ModelConfig modelConfig(modelConfigResponse.config());
72 
73  //check batch size limitations (after i/o setup)
74  //triton uses max batch size = 0 to denote a model that does not support batching
75  //but for models that do support batching, a given event may set batch size 0 to indicate no valid input is present
76  //so set the local max to 1 and keep track of "no batch" case
77  maxBatchSize_ = modelConfig.max_batch_size();
78  noBatch_ = maxBatchSize_ == 0;
80 
81  //get model info
82  inference::ModelMetadataResponse modelMetadata;
83  TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_.model_name_, options_.model_version_),
84  "TritonClient(): unable to get model metadata");
85 
86  //get input and output (which know their sizes)
87  const auto& nicInputs = modelMetadata.inputs();
88  const auto& nicOutputs = modelMetadata.outputs();
89 
90  //report all model errors at once
91  std::stringstream msg;
92  std::string msg_str;
93 
94  //currently no use case is foreseen for a model with zero inputs or outputs
95  if (nicInputs.empty())
96  msg << "Model on server appears malformed (zero inputs)\n";
97 
98  if (nicOutputs.empty())
99  msg << "Model on server appears malformed (zero outputs)\n";
100 
101  //stop if errors
102  msg_str = msg.str();
103  if (!msg_str.empty())
104  throw cms::Exception("ModelErrors") << msg_str;
105 
106  //setup input map
107  std::stringstream io_msg;
108  if (verbose_)
109  io_msg << "Model inputs: "
110  << "\n";
111  inputsTriton_.reserve(nicInputs.size());
112  for (const auto& nicInput : nicInputs) {
113  const auto& iname = nicInput.name();
114  auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
115  std::forward_as_tuple(iname),
116  std::forward_as_tuple(iname, nicInput, this, ts->pid()));
117  auto& curr_input = curr_itr->second;
118  inputsTriton_.push_back(curr_input.data());
119  if (verbose_) {
120  io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
121  << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
122  }
123  }
124 
125  //allow selecting only some outputs from server
126  const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
127  std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
128 
129  //setup output map
130  if (verbose_)
131  io_msg << "Model outputs: "
132  << "\n";
133  outputsTriton_.reserve(nicOutputs.size());
134  for (const auto& nicOutput : nicOutputs) {
135  const auto& oname = nicOutput.name();
136  if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
137  continue;
138  auto [curr_itr, success] = output_.emplace(std::piecewise_construct,
139  std::forward_as_tuple(oname),
140  std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
141  auto& curr_output = curr_itr->second;
142  outputsTriton_.push_back(curr_output.data());
143  if (verbose_) {
144  io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
145  << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
146  }
147  if (!s_outputs.empty())
148  s_outputs.erase(oname);
149  }
150 
151  //check if any requested outputs were not available
152  if (!s_outputs.empty())
153  throw cms::Exception("MissingOutput")
154  << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
155 
156  //propagate batch size to inputs and outputs
157  setBatchSize(1);
158 
159  //print model info
160  std::stringstream model_msg;
161  if (verbose_) {
162  model_msg << "Model name: " << options_.model_name_ << "\n"
163  << "Model version: " << options_.model_version_ << "\n"
164  << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n";
165  edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
166  }
167 }
const std::string & pid() const
const std::string & debugName() const
bool setBatchSize(unsigned bsize)
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
bool useSharedMemory_
Definition: TritonClient.h:69
void setMode(SonicMode mode)
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
unsigned maxBatchSize_
Definition: TritonClient.h:65
TritonServerType serverType_
Definition: TritonClient.h:70
SonicClient(const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
Definition: SonicClient.h:12
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:71
Log< level::Info, false > LogInfo
std::vector< triton::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
tuple msg
Definition: mps_check.py:286
triton::client::InferOptions options_
Definition: TritonClient.h:80
Server serverInfo(const std::string &model, const std::string &preferred="") const
std::string fullDebugName_
std::string printColl(const C &coll, const std::string &delim=", ")
Definition: triton_utils.cc:9

◆ ~TritonClient()

TritonClient::~TritonClient ( )
override

Definition at line 169 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

169  {
170  //by default: members of this class destroyed before members of base class
171  //in shared memory case, TritonMemResource (member of TritonData) unregisters from client_ in its destructor
172  //but input/output objects are member of base class, so destroyed after client_ (member of this class)
173  //therefore, clear the maps here
174  input_.clear();
175  output_.clear();
176 }

Member Function Documentation

◆ batchSize()

unsigned TritonClient::batchSize ( ) const
inline

Definition at line 39 of file TritonClient.h.

References batchSize_.

39 { return batchSize_; }
unsigned batchSize_
Definition: TritonClient.h:66

◆ client()

auto TritonClient::client ( )
inlineprivate

Definition at line 87 of file TritonClient.h.

References client_.

87 { return client_.get(); }
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78

◆ evaluate()

void TritonClient::evaluate ( )
overrideprotectedvirtual

Implements SonicClientBase.

Definition at line 242 of file TritonClient.cc.

References Async, batchSize_, client_, compressionAlgo_, SonicClientBase::finish(), getResults(), getServerSideStatus(), handle_exception(), headers_, inputsTriton_, SonicClientBase::mode_, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, outputsTriton_, reportServerSideStats(), bookConverter::results, dqmMemoryStats::stats, summarizeEdmComparisonLogfiles::success, summarizeServerStats(), TRITON_THROW_IF_ERROR, and verbose().

242  {
243  //in case there is nothing to process
244  if (batchSize_ == 0) {
245  finish(true);
246  return;
247  }
248 
249  //set up shared memory for output
250  auto success = handle_exception([&]() {
251  for (auto& element : output_) {
252  element.second.prepare();
253  }
254  });
255  if (!success)
256  return;
257 
258  // Get the status of the server prior to the request being made.
259  inference::ModelStatistics start_status;
260  success = handle_exception([&]() {
261  if (verbose())
262  start_status = getServerSideStatus();
263  });
264  if (!success)
265  return;
266 
267  if (mode_ == SonicMode::Async) {
268  //non-blocking call
269  success = handle_exception([&]() {
271  client_->AsyncInfer(
272  [start_status, this](tc::InferResult* results) {
273  //get results
274  std::shared_ptr<tc::InferResult> results_ptr(results);
275  auto success = handle_exception(
276  [&]() { TRITON_THROW_IF_ERROR(results_ptr->RequestStatus(), "evaluate(): unable to get result"); });
277  if (!success)
278  return;
279 
280  if (verbose()) {
281  inference::ModelStatistics end_status;
282  success = handle_exception([&]() { end_status = getServerSideStatus(); });
283  if (!success)
284  return;
285 
286  const auto& stats = summarizeServerStats(start_status, end_status);
288  }
289 
290  //check result
291  success = handle_exception([&]() { getResults(results_ptr); });
292  if (!success)
293  return;
294 
295  //finish
296  finish(true);
297  },
298  options_,
301  headers_,
303  "evaluate(): unable to launch async run");
304  });
305  if (!success)
306  return;
307  } else {
308  //blocking call
309  tc::InferResult* results;
310  success = handle_exception([&]() {
313  "evaluate(): unable to run and/or get result");
314  });
315  if (!success)
316  return;
317 
318  if (verbose()) {
319  inference::ModelStatistics end_status;
320  success = handle_exception([&]() { end_status = getServerSideStatus(); });
321  if (!success)
322  return;
323 
324  const auto& stats = summarizeServerStats(start_status, end_status);
326  }
327 
328  std::shared_ptr<tc::InferResult> results_ptr(results);
329  success = handle_exception([&]() { getResults(results_ptr); });
330  if (!success)
331  return;
332 
333  finish(true);
334  }
335 }
bool verbose() const
Definition: TritonClient.h:40
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
unsigned batchSize_
Definition: TritonClient.h:66
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:71
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
Definition: TritonClient.h:72
std::vector< triton::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
triton::client::InferOptions options_
Definition: TritonClient.h:80
void getResults(std::shared_ptr< triton::client::InferResult > results)
void reportServerSideStats(const ServerSideStats &stats) const

◆ fillPSetDescription()

void TritonClient::fillPSetDescription ( edm::ParameterSetDescription iDesc)
static

Definition at line 406 of file TritonClient.cc.

References edm::ParameterSetDescription::add(), edm::ParameterSetDescription::addUntracked(), SonicClientBase::fillBasePSetDescription(), and AlCaHLTBitMon_QueryRunRegistry::string.

Referenced by DeepMETSonicProducer::fillDescriptions(), ParticleNetSonicJetTagsProducer::fillDescriptions(), SCEnergyCorrectorDRNProducer::fillDescriptions(), and DRNCorrectionProducerT< T >::fillDescriptions().

406  {
407  edm::ParameterSetDescription descClient;
408  fillBasePSetDescription(descClient);
409  descClient.add<std::string>("modelName");
410  descClient.add<std::string>("modelVersion", "");
411  descClient.add<edm::FileInPath>("modelConfigPath");
412  //server parameters should not affect the physics results
413  descClient.addUntracked<std::string>("preferredServer", "");
414  descClient.addUntracked<unsigned>("timeout");
415  descClient.addUntracked<bool>("useSharedMemory", true);
416  descClient.addUntracked<std::string>("compression", "");
417  descClient.addUntracked<std::vector<std::string>>("outputs", {});
418  iDesc.add<edm::ParameterSetDescription>("Client", descClient);
419 }
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
ParameterDescriptionBase * add(U const &iLabel, T const &value)
static void fillBasePSetDescription(edm::ParameterSetDescription &desc, bool allowRetry=true)

◆ getResults()

void TritonClient::getResults ( std::shared_ptr< triton::client::InferResult >  results)
protected

Definition at line 225 of file TritonClient.cc.

References noBatch_, heppy_report::oname, SonicClientTypes< TritonInputMap, TritonOutputMap >::output(), SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, bookConverter::results, and TRITON_THROW_IF_ERROR.

Referenced by evaluate().

225  {
226  for (auto& [oname, output] : output_) {
227  //set shape here before output becomes const
228  if (output.variableDims()) {
229  std::vector<int64_t> tmp_shape;
230  TRITON_THROW_IF_ERROR(results->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname);
231  if (!noBatch_)
232  tmp_shape.erase(tmp_shape.begin());
233  output.setShape(tmp_shape);
234  output.computeSizes();
235  }
236  //extend lifetime
237  output.setResult(results);
238  }
239 }
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75

◆ getServerSideStatus()

inference::ModelStatistics TritonClient::getServerSideStatus ( ) const
protected

Definition at line 395 of file TritonClient.cc.

References client_, options_, TRITON_THROW_IF_ERROR, and verbose_.

Referenced by evaluate().

395  {
396  if (verbose_) {
397  inference::ModelStatisticsResponse resp;
398  TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_),
399  "getServerSideStatus(): unable to get model statistics");
400  return *(resp.model_stats().begin());
401  }
402  return inference::ModelStatistics{};
403 }
#define TRITON_THROW_IF_ERROR(X, MSG)
Definition: triton_utils.h:75
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
triton::client::InferOptions options_
Definition: TritonClient.h:80

◆ handle_exception()

template<typename F >
bool TritonClient::handle_exception ( F &&  call)
protected

Definition at line 206 of file TritonClient.cc.

References CMS_SA_ALLOW, MillePedeFileConverter_cfg::e, and SonicClientBase::finish().

Referenced by evaluate().

206  {
207  //caught exceptions will be propagated to edm::WaitingTaskWithArenaHolder
208  CMS_SA_ALLOW try {
209  call();
210  return true;
211  }
212  //TritonExceptions are intended/expected to be recoverable, i.e. retries should be allowed
213  catch (TritonException& e) {
214  e.convertToWarning();
215  finish(false);
216  return false;
217  }
218  //other exceptions are not: execution should stop if they are encountered
219  catch (...) {
220  finish(false, std::current_exception());
221  return false;
222  }
223 }
#define CMS_SA_ALLOW
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})

◆ noBatch()

bool TritonClient::noBatch ( ) const
inline

Definition at line 45 of file TritonClient.h.

References noBatch_.

45 { return noBatch_; }

◆ reportServerSideStats()

void TritonClient::reportServerSideStats ( const ServerSideStats stats) const
protected

Definition at line 337 of file TritonClient.cc.

References submitPVResolutionJobs::count, SonicClientBase::debugName_, SonicClientBase::fullDebugName_, mps_check::msg, and dqmMemoryStats::stats.

Referenced by evaluate().

337  {
338  std::stringstream msg;
339 
340  // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/inference_profiler.cc
341  const uint64_t count = stats.success_count_;
342  msg << " Inference count: " << stats.inference_count_ << "\n";
343  msg << " Execution count: " << stats.execution_count_ << "\n";
344  msg << " Successful request count: " << count << "\n";
345 
346  if (count > 0) {
347  auto get_avg_us = [count](uint64_t tval) {
348  constexpr uint64_t us_to_ns = 1000;
349  return tval / us_to_ns / count;
350  };
351 
352  const uint64_t cumm_avg_us = get_avg_us(stats.cumm_time_ns_);
353  const uint64_t queue_avg_us = get_avg_us(stats.queue_time_ns_);
354  const uint64_t compute_input_avg_us = get_avg_us(stats.compute_input_time_ns_);
355  const uint64_t compute_infer_avg_us = get_avg_us(stats.compute_infer_time_ns_);
356  const uint64_t compute_output_avg_us = get_avg_us(stats.compute_output_time_ns_);
357  const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
358  const uint64_t overhead =
359  (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
360 
361  msg << " Avg request latency: " << cumm_avg_us << " usec"
362  << "\n"
363  << " (overhead " << overhead << " usec + "
364  << "queue " << queue_avg_us << " usec + "
365  << "compute input " << compute_input_avg_us << " usec + "
366  << "compute infer " << compute_infer_avg_us << " usec + "
367  << "compute output " << compute_output_avg_us << " usec)" << std::endl;
368  }
369 
370  if (!debugName_.empty())
371  edm::LogInfo(fullDebugName_) << msg.str();
372 }
std::string debugName_
unsigned long long uint64_t
Definition: Time.h:13
tuple msg
Definition: mps_check.py:286
std::string fullDebugName_

◆ reset()

void TritonClient::reset ( void  )
overridevirtual

Reimplemented from SonicClientBase.

Definition at line 196 of file TritonClient.cc.

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

196  {
197  for (auto& element : input_) {
198  element.second.reset();
199  }
200  for (auto& element : output_) {
201  element.second.reset();
202  }
203 }

◆ serverType()

TritonServerType TritonClient::serverType ( ) const
inline

Definition at line 46 of file TritonClient.h.

References serverType_.

46 { return serverType_; }
TritonServerType serverType_
Definition: TritonClient.h:70

◆ setBatchSize()

bool TritonClient::setBatchSize ( unsigned  bsize)

Definition at line 178 of file TritonClient.cc.

References batchSize_, SonicClientBase::fullDebugName_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, maxBatchSize_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Referenced by TritonClient().

178  {
179  if (bsize > maxBatchSize_) {
180  edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
181  << maxBatchSize_ << ". Batch size will remain as" << batchSize_;
182  return false;
183  } else {
184  batchSize_ = bsize;
185  //set for input and output
186  for (auto& element : input_) {
187  element.second.setBatchSize(bsize);
188  }
189  for (auto& element : output_) {
190  element.second.setBatchSize(bsize);
191  }
192  return true;
193  }
194 }
unsigned maxBatchSize_
Definition: TritonClient.h:65
unsigned batchSize_
Definition: TritonClient.h:66
std::string fullDebugName_
Log< level::Warning, false > LogWarning

◆ setUseSharedMemory()

void TritonClient::setUseSharedMemory ( bool  useShm)
inline

Definition at line 42 of file TritonClient.h.

References useSharedMemory_.

42 { useSharedMemory_ = useShm; }
bool useSharedMemory_
Definition: TritonClient.h:69

◆ summarizeServerStats()

TritonClient::ServerSideStats TritonClient::summarizeServerStats ( const inference::ModelStatistics &  start_status,
const inference::ModelStatistics &  end_status 
) const
protected

Definition at line 374 of file TritonClient.cc.

References TritonClient::ServerSideStats::compute_infer_time_ns_, TritonClient::ServerSideStats::compute_input_time_ns_, TritonClient::ServerSideStats::compute_output_time_ns_, TritonClient::ServerSideStats::cumm_time_ns_, TritonClient::ServerSideStats::execution_count_, TritonClient::ServerSideStats::inference_count_, TritonClient::ServerSideStats::queue_time_ns_, and TritonClient::ServerSideStats::success_count_.

Referenced by evaluate().

375  {
376  TritonClient::ServerSideStats server_stats;
377 
378  server_stats.inference_count_ = end_status.inference_count() - start_status.inference_count();
379  server_stats.execution_count_ = end_status.execution_count() - start_status.execution_count();
380  server_stats.success_count_ =
381  end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
382  server_stats.cumm_time_ns_ =
383  end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
384  server_stats.queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
385  server_stats.compute_input_time_ns_ =
386  end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
387  server_stats.compute_infer_time_ns_ =
388  end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
389  server_stats.compute_output_time_ns_ =
390  end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
391 
392  return server_stats;
393 }

◆ useSharedMemory()

bool TritonClient::useSharedMemory ( ) const
inline

Definition at line 41 of file TritonClient.h.

References useSharedMemory_.

41 { return useSharedMemory_; }
bool useSharedMemory_
Definition: TritonClient.h:69

◆ verbose()

bool TritonClient::verbose ( ) const
inline

Definition at line 40 of file TritonClient.h.

References verbose_.

Referenced by evaluate().

40 { return verbose_; }

Member Data Documentation

◆ batchSize_

unsigned TritonClient::batchSize_
protected

Definition at line 66 of file TritonClient.h.

Referenced by batchSize(), evaluate(), and setBatchSize().

◆ client_

std::unique_ptr<triton::client::InferenceServerGrpcClient> TritonClient::client_
protected

Definition at line 78 of file TritonClient.h.

Referenced by client(), evaluate(), getServerSideStatus(), and TritonClient().

◆ compressionAlgo_

grpc_compression_algorithm TritonClient::compressionAlgo_
protected

Definition at line 71 of file TritonClient.h.

Referenced by evaluate().

◆ headers_

triton::client::Headers TritonClient::headers_
protected

Definition at line 72 of file TritonClient.h.

Referenced by evaluate().

◆ inputsTriton_

std::vector<triton::client::InferInput*> TritonClient::inputsTriton_
protected

Definition at line 75 of file TritonClient.h.

Referenced by evaluate(), and TritonClient().

◆ maxBatchSize_

unsigned TritonClient::maxBatchSize_
protected

Definition at line 65 of file TritonClient.h.

Referenced by setBatchSize(), and TritonClient().

◆ noBatch_

bool TritonClient::noBatch_
protected

Definition at line 67 of file TritonClient.h.

Referenced by getResults(), noBatch(), and TritonClient().

◆ options_

triton::client::InferOptions TritonClient::options_
protected

◆ outputsTriton_

std::vector<const triton::client::InferRequestedOutput*> TritonClient::outputsTriton_
protected

Definition at line 76 of file TritonClient.h.

Referenced by evaluate(), and TritonClient().

◆ serverType_

TritonServerType TritonClient::serverType_
protected

Definition at line 70 of file TritonClient.h.

Referenced by serverType(), and TritonClient().

◆ TritonInputData

friend TritonClient::TritonInputData
private

Definition at line 83 of file TritonClient.h.

◆ TritonOutputData

friend TritonClient::TritonOutputData
private

Definition at line 84 of file TritonClient.h.

◆ useSharedMemory_

bool TritonClient::useSharedMemory_
protected

Definition at line 69 of file TritonClient.h.

Referenced by setUseSharedMemory(), and useSharedMemory().

◆ verbose_

bool TritonClient::verbose_
protected

Definition at line 68 of file TritonClient.h.

Referenced by getServerSideStatus(), TritonClient(), and verbose().