CMS 3D CMS Logo

List of all members | Classes | Public Member Functions | Static Public Member Functions | Protected Member Functions | Protected Attributes | Private Member Functions | Private Attributes
TritonClient Class Reference

#include <TritonClient.h>

Inheritance diagram for TritonClient:
SonicClient< TritonInputMap, TritonOutputMap > SonicClientBase SonicClientTypes< TritonInputMap, TritonOutputMap >

Classes

struct  ServerSideStats
 

Public Member Functions

unsigned batchSize () const
 
bool noBatch () const
 
void reset () override
 
TritonServerType serverType () const
 
bool setBatchSize (unsigned bsize)
 
void setUseSharedMemory (bool useShm)
 
 TritonClient (const edm::ParameterSet &params, const std::string &debugName)
 
bool useSharedMemory () const
 
bool verbose () const
 
 ~TritonClient () override
 
- Public Member Functions inherited from SonicClient< TritonInputMap, TritonOutputMap >
 SonicClient (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
- Public Member Functions inherited from SonicClientBase
const std::string & clientName () const
 
const std::string & debugName () const
 
virtual void dispatch ()
 
virtual void dispatch (edm::WaitingTaskWithArenaHolder holder)
 
SonicMode mode () const
 
 SonicClientBase (const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
 
virtual ~SonicClientBase ()=default
 
- Public Member Functions inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Inputinput ()
 
const Outputoutput () const
 
virtual ~SonicClientTypes ()=default
 

Static Public Member Functions

static void fillPSetDescription (edm::ParameterSetDescription &iDesc)
 
- Static Public Member Functions inherited from SonicClientBase
static void fillBasePSetDescription (edm::ParameterSetDescription &desc, bool allowRetry=true)
 

Protected Member Functions

void evaluate () override
 
void getResults (std::shared_ptr< triton::client::InferResult > results)
 
inference::ModelStatistics getServerSideStatus () const
 
template<typename F >
bool handle_exception (F &&call)
 
void reportServerSideStats (const ServerSideStats &stats) const
 
ServerSideStats summarizeServerStats (const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
 
- Protected Member Functions inherited from SonicClientBase
void finish (bool success, std::exception_ptr eptr=std::exception_ptr{})
 
void setMode (SonicMode mode)
 
void start ()
 
void start (edm::WaitingTaskWithArenaHolder holder)
 

Protected Attributes

unsigned batchSize_
 
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
 
grpc_compression_algorithm compressionAlgo_
 
triton::client::Headers headers_
 
std::vector< triton::client::InferInput * > inputsTriton_
 
unsigned maxBatchSize_
 
bool noBatch_
 
triton::client::InferOptions options_
 
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
 
TritonServerType serverType_
 
bool useSharedMemory_
 
bool verbose_
 
- Protected Attributes inherited from SonicClientBase
unsigned allowedTries_
 
std::string clientName_
 
std::string debugName_
 
std::unique_ptr< SonicDispatcherdispatcher_
 
std::string fullDebugName_
 
std::optional< edm::WaitingTaskWithArenaHolderholder_
 
SonicMode mode_
 
unsigned tries_
 
- Protected Attributes inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
Input input_
 
Output output_
 

Private Member Functions

auto client ()
 

Private Attributes

friend TritonInputData
 
friend TritonOutputData
 

Additional Inherited Members

- Public Types inherited from SonicClientTypes< TritonInputMap, TritonOutputMap >
typedef TritonInputMap Input
 
typedef TritonOutputMap Output
 

Detailed Description

Definition at line 19 of file TritonClient.h.

Constructor & Destructor Documentation

◆ TritonClient()

TritonClient::TritonClient ( const edm::ParameterSet params,
const std::string &  debugName 
)

Definition at line 39 of file TritonClient.cc.

40  : SonicClient(params, debugName, "TritonClient"),
41  verbose_(params.getUntrackedParameter<bool>("verbose")),
42  useSharedMemory_(params.getUntrackedParameter<bool>("useSharedMemory")),
43  compressionAlgo_(getCompressionAlgo(params.getUntrackedParameter<std::string>("compression"))),
44  options_(params.getParameter<std::string>("modelName")) {
45  //get appropriate server for this model
47  const auto& server =
48  ts->serverInfo(options_.model_name_, params.getUntrackedParameter<std::string>("preferredServer"));
49  serverType_ = server.type;
50  if (verbose_)
51  edm::LogInfo(fullDebugName_) << "Using server: " << server.url;
52  //enforce sync mode for fallback CPU server to avoid contention
53  //todo: could enforce async mode otherwise (unless mode was specified by user?)
56 
57  //connect to the server
59  tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions),
60  "TritonClient(): unable to create inference context");
61 
62  //set options
63  options_.model_version_ = params.getParameter<std::string>("modelVersion");
64  //convert seconds to microseconds
65  options_.client_timeout_ = params.getUntrackedParameter<unsigned>("timeout") * 1e6;
66 
67  //config needed for batch size
68  inference::ModelConfigResponse modelConfigResponse;
69  triton_utils::throwIfError(client_->ModelConfig(&modelConfigResponse, options_.model_name_, options_.model_version_),
70  "TritonClient(): unable to get model config");
71  inference::ModelConfig modelConfig(modelConfigResponse.config());
72 
73  //check batch size limitations (after i/o setup)
74  //triton uses max batch size = 0 to denote a model that does not support batching
75  //but for models that do support batching, a given event may set batch size 0 to indicate no valid input is present
76  //so set the local max to 1 and keep track of "no batch" case
77  maxBatchSize_ = modelConfig.max_batch_size();
78  noBatch_ = maxBatchSize_ == 0;
80 
81  //get model info
82  inference::ModelMetadataResponse modelMetadata;
83  triton_utils::throwIfError(client_->ModelMetadata(&modelMetadata, options_.model_name_, options_.model_version_),
84  "TritonClient(): unable to get model metadata");
85 
86  //get input and output (which know their sizes)
87  const auto& nicInputs = modelMetadata.inputs();
88  const auto& nicOutputs = modelMetadata.outputs();
89 
90  //report all model errors at once
91  std::stringstream msg;
92  std::string msg_str;
93 
94  //currently no use case is foreseen for a model with zero inputs or outputs
95  if (nicInputs.empty())
96  msg << "Model on server appears malformed (zero inputs)\n";
97 
98  if (nicOutputs.empty())
99  msg << "Model on server appears malformed (zero outputs)\n";
100 
101  //stop if errors
102  msg_str = msg.str();
103  if (!msg_str.empty())
104  throw cms::Exception("ModelErrors") << msg_str;
105 
106  //setup input map
107  std::stringstream io_msg;
108  if (verbose_)
109  io_msg << "Model inputs: "
110  << "\n";
111  inputsTriton_.reserve(nicInputs.size());
112  for (const auto& nicInput : nicInputs) {
113  const auto& iname = nicInput.name();
114  auto [curr_itr, success] = input_.emplace(std::piecewise_construct,
115  std::forward_as_tuple(iname),
116  std::forward_as_tuple(iname, nicInput, this, ts->pid()));
117  auto& curr_input = curr_itr->second;
118  inputsTriton_.push_back(curr_input.data());
119  if (verbose_) {
120  io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
121  << " b) : " << triton_utils::printColl(curr_input.shape()) << "\n";
122  }
123  }
124 
125  //allow selecting only some outputs from server
126  const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
127  std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
128 
129  //setup output map
130  if (verbose_)
131  io_msg << "Model outputs: "
132  << "\n";
133  outputsTriton_.reserve(nicOutputs.size());
134  for (const auto& nicOutput : nicOutputs) {
135  const auto& oname = nicOutput.name();
136  if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
137  continue;
138  auto [curr_itr, success] = output_.emplace(std::piecewise_construct,
139  std::forward_as_tuple(oname),
140  std::forward_as_tuple(oname, nicOutput, this, ts->pid()));
141  auto& curr_output = curr_itr->second;
142  outputsTriton_.push_back(curr_output.data());
143  if (verbose_) {
144  io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
145  << " b) : " << triton_utils::printColl(curr_output.shape()) << "\n";
146  }
147  if (!s_outputs.empty())
148  s_outputs.erase(oname);
149  }
150 
151  //check if any requested outputs were not available
152  if (!s_outputs.empty())
153  throw cms::Exception("MissingOutput")
154  << "Some requested outputs were not available on the server: " << triton_utils::printColl(s_outputs);
155 
156  //propagate batch size to inputs and outputs
157  setBatchSize(1);
158 
159  //print model info
160  std::stringstream model_msg;
161  if (verbose_) {
162  model_msg << "Model name: " << options_.model_name_ << "\n"
163  << "Model version: " << options_.model_version_ << "\n"
164  << "Model max batch size: " << (noBatch_ ? 0 : maxBatchSize_) << "\n";
165  edm::LogInfo(fullDebugName_) << model_msg.str() << io_msg.str();
166  }
167 }

References client_, SonicClientBase::fullDebugName_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, inputsTriton_, LocalCPU, SiStripPI::max, maxBatchSize_, mps_check::msg, noBatch_, heppy_report::oname, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, outputsTriton_, CalibrationSummaryClient_cfi::params, TritonService::pid(), triton_utils::printColl(), contentValuesFiles::server, TritonService::serverInfo(), serverType_, setBatchSize(), SonicClientBase::setMode(), AlCaHLTBitMon_QueryRunRegistry::string, summarizeEdmComparisonLogfiles::success, Sync, triton_utils::throwIfError(), and verbose_.

◆ ~TritonClient()

TritonClient::~TritonClient ( )
override

Definition at line 169 of file TritonClient.cc.

169  {
170  //by default: members of this class destroyed before members of base class
171  //in shared memory case, TritonMemResource (member of TritonData) unregisters from client_ in its destructor
172  //but input/output objects are member of base class, so destroyed after client_ (member of this class)
173  //therefore, clear the maps here
174  input_.clear();
175  output_.clear();
176 }

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Member Function Documentation

◆ batchSize()

unsigned TritonClient::batchSize ( ) const
inline

Definition at line 39 of file TritonClient.h.

39 { return batchSize_; }

References batchSize_.

◆ client()

auto TritonClient::client ( )
inlineprivate

Definition at line 87 of file TritonClient.h.

87 { return client_.get(); }

References client_.

◆ evaluate()

void TritonClient::evaluate ( )
overrideprotectedvirtual

Implements SonicClientBase.

Definition at line 241 of file TritonClient.cc.

241  {
242  //in case there is nothing to process
243  if (batchSize_ == 0) {
244  finish(true);
245  return;
246  }
247 
248  //set up shared memory for output
249  auto success = handle_exception([&]() {
250  for (auto& element : output_) {
251  element.second.prepare();
252  }
253  });
254  if (!success)
255  return;
256 
257  // Get the status of the server prior to the request being made.
258  inference::ModelStatistics start_status;
259  success = handle_exception([&]() {
260  if (verbose())
261  start_status = getServerSideStatus();
262  });
263  if (!success)
264  return;
265 
266  if (mode_ == SonicMode::Async) {
267  //non-blocking call
268  success = handle_exception([&]() {
270  [start_status, this](tc::InferResult* results) {
271  //get results
272  std::shared_ptr<tc::InferResult> results_ptr(results);
273  auto success = handle_exception([&]() {
274  triton_utils::throwIfError(results_ptr->RequestStatus(),
275  "evaluate(): unable to get result");
276  });
277  if (!success)
278  return;
279 
280  if (verbose()) {
281  inference::ModelStatistics end_status;
282  success = handle_exception([&]() { end_status = getServerSideStatus(); });
283  if (!success)
284  return;
285 
286  const auto& stats = summarizeServerStats(start_status, end_status);
288  }
289 
290  //check result
291  success = handle_exception([&]() { getResults(results_ptr); });
292  if (!success)
293  return;
294 
295  //finish
296  finish(true);
297  },
298  options_,
301  headers_,
303  "evaluate(): unable to launch async run");
304  });
305  if (!success)
306  return;
307  } else {
308  //blocking call
309  tc::InferResult* results;
310  success = handle_exception([&]() {
313  "evaluate(): unable to run and/or get result");
314  });
315  if (!success)
316  return;
317 
318  if (verbose()) {
319  inference::ModelStatistics end_status;
320  success = handle_exception([&]() { end_status = getServerSideStatus(); });
321  if (!success)
322  return;
323 
324  const auto& stats = summarizeServerStats(start_status, end_status);
326  }
327 
328  std::shared_ptr<tc::InferResult> results_ptr(results);
329  success = handle_exception([&]() { getResults(results_ptr); });
330  if (!success)
331  return;
332 
333  finish(true);
334  }
335 }

References Async, batchSize_, client_, compressionAlgo_, SonicClientBase::finish(), getResults(), getServerSideStatus(), handle_exception(), headers_, inputsTriton_, SonicClientBase::mode_, options_, SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, outputsTriton_, reportServerSideStats(), bookConverter::results, dqmMemoryStats::stats, summarizeEdmComparisonLogfiles::success, summarizeServerStats(), triton_utils::throwIfError(), and verbose().

◆ fillPSetDescription()

void TritonClient::fillPSetDescription ( edm::ParameterSetDescription iDesc)
static

Definition at line 406 of file TritonClient.cc.

406  {
407  edm::ParameterSetDescription descClient;
408  fillBasePSetDescription(descClient);
409  descClient.add<std::string>("modelName");
410  descClient.add<std::string>("modelVersion", "");
411  descClient.add<edm::FileInPath>("modelConfigPath");
412  //server parameters should not affect the physics results
413  descClient.addUntracked<std::string>("preferredServer", "");
414  descClient.addUntracked<unsigned>("timeout");
415  descClient.addUntracked<bool>("verbose", false);
416  descClient.addUntracked<bool>("useSharedMemory", true);
417  descClient.addUntracked<std::string>("compression", "");
418  descClient.addUntracked<std::vector<std::string>>("outputs", {});
419  iDesc.add<edm::ParameterSetDescription>("Client", descClient);
420 }

References edm::ParameterSetDescription::add(), edm::ParameterSetDescription::addUntracked(), SonicClientBase::fillBasePSetDescription(), and AlCaHLTBitMon_QueryRunRegistry::string.

◆ getResults()

void TritonClient::getResults ( std::shared_ptr< triton::client::InferResult >  results)
protected

Definition at line 225 of file TritonClient.cc.

225  {
226  for (auto& [oname, output] : output_) {
227  //set shape here before output becomes const
228  if (output.variableDims()) {
229  std::vector<int64_t> tmp_shape;
230  triton_utils::throwIfError(results->Shape(oname, &tmp_shape),
231  "getResults(): unable to get output shape for " + oname);
232  output.setShape(tmp_shape);
233  output.computeSizes();
234  }
235  //extend lifetime
236  output.setResult(results);
237  }
238 }

References heppy_report::oname, SonicClientTypes< TritonInputMap, TritonOutputMap >::output(), SonicClientTypes< TritonInputMap, TritonOutputMap >::output_, bookConverter::results, and triton_utils::throwIfError().

Referenced by evaluate().

◆ getServerSideStatus()

inference::ModelStatistics TritonClient::getServerSideStatus ( ) const
protected

Definition at line 395 of file TritonClient.cc.

395  {
396  if (verbose_) {
397  inference::ModelStatisticsResponse resp;
398  triton_utils::throwIfError(client_->ModelInferenceStatistics(&resp, options_.model_name_, options_.model_version_),
399  "getServerSideStatus(): unable to get model statistics");
400  return *(resp.model_stats().begin());
401  }
402  return inference::ModelStatistics{};
403 }

References client_, options_, triton_utils::throwIfError(), and verbose_.

Referenced by evaluate().

◆ handle_exception()

template<typename F >
bool TritonClient::handle_exception ( F &&  call)
protected

Definition at line 206 of file TritonClient.cc.

206  {
207  //caught exceptions will be propagated to edm::WaitingTaskWithArenaHolder
208  CMS_SA_ALLOW try {
209  call();
210  return true;
211  }
212  //TritonExceptions are intended/expected to be recoverable, i.e. retries should be allowed
213  catch (TritonException& e) {
214  e.convertToWarning();
215  finish(false);
216  return false;
217  }
218  //other exceptions are not: execution should stop if they are encountered
219  catch (...) {
220  finish(false, std::current_exception());
221  return false;
222  }
223 }

References CMS_SA_ALLOW, MillePedeFileConverter_cfg::e, and SonicClientBase::finish().

Referenced by evaluate().

◆ noBatch()

bool TritonClient::noBatch ( ) const
inline

Definition at line 45 of file TritonClient.h.

45 { return noBatch_; }

References noBatch_.

◆ reportServerSideStats()

void TritonClient::reportServerSideStats ( const ServerSideStats stats) const
protected

Definition at line 337 of file TritonClient.cc.

337  {
338  std::stringstream msg;
339 
340  // https://github.com/triton-inference-server/server/blob/v2.3.0/src/clients/c++/perf_client/inference_profiler.cc
341  const uint64_t count = stats.success_count_;
342  msg << " Inference count: " << stats.inference_count_ << "\n";
343  msg << " Execution count: " << stats.execution_count_ << "\n";
344  msg << " Successful request count: " << count << "\n";
345 
346  if (count > 0) {
347  auto get_avg_us = [count](uint64_t tval) {
348  constexpr uint64_t us_to_ns = 1000;
349  return tval / us_to_ns / count;
350  };
351 
352  const uint64_t cumm_avg_us = get_avg_us(stats.cumm_time_ns_);
353  const uint64_t queue_avg_us = get_avg_us(stats.queue_time_ns_);
354  const uint64_t compute_input_avg_us = get_avg_us(stats.compute_input_time_ns_);
355  const uint64_t compute_infer_avg_us = get_avg_us(stats.compute_infer_time_ns_);
356  const uint64_t compute_output_avg_us = get_avg_us(stats.compute_output_time_ns_);
357  const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
358  const uint64_t overhead =
359  (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
360 
361  msg << " Avg request latency: " << cumm_avg_us << " usec"
362  << "\n"
363  << " (overhead " << overhead << " usec + "
364  << "queue " << queue_avg_us << " usec + "
365  << "compute input " << compute_input_avg_us << " usec + "
366  << "compute infer " << compute_infer_avg_us << " usec + "
367  << "compute output " << compute_output_avg_us << " usec)" << std::endl;
368  }
369 
370  if (!debugName_.empty())
371  edm::LogInfo(fullDebugName_) << msg.str();
372 }

References submitPVResolutionJobs::count, SonicClientBase::debugName_, SonicClientBase::fullDebugName_, mps_check::msg, and dqmMemoryStats::stats.

Referenced by evaluate().

◆ reset()

void TritonClient::reset ( void  )
overridevirtual

Reimplemented from SonicClientBase.

Definition at line 196 of file TritonClient.cc.

196  {
197  for (auto& element : input_) {
198  element.second.reset();
199  }
200  for (auto& element : output_) {
201  element.second.reset();
202  }
203 }

References SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

◆ serverType()

TritonServerType TritonClient::serverType ( ) const
inline

Definition at line 46 of file TritonClient.h.

46 { return serverType_; }

References serverType_.

◆ setBatchSize()

bool TritonClient::setBatchSize ( unsigned  bsize)

Definition at line 178 of file TritonClient.cc.

178  {
179  if (bsize > maxBatchSize_) {
180  edm::LogWarning(fullDebugName_) << "Requested batch size " << bsize << " exceeds server-specified max batch size "
181  << maxBatchSize_ << ". Batch size will remain as" << batchSize_;
182  return false;
183  } else {
184  batchSize_ = bsize;
185  //set for input and output
186  for (auto& element : input_) {
187  element.second.setBatchSize(bsize);
188  }
189  for (auto& element : output_) {
190  element.second.setBatchSize(bsize);
191  }
192  return true;
193  }
194 }

References batchSize_, SonicClientBase::fullDebugName_, SonicClientTypes< TritonInputMap, TritonOutputMap >::input_, maxBatchSize_, and SonicClientTypes< TritonInputMap, TritonOutputMap >::output_.

Referenced by TritonClient().

◆ setUseSharedMemory()

void TritonClient::setUseSharedMemory ( bool  useShm)
inline

Definition at line 42 of file TritonClient.h.

42 { useSharedMemory_ = useShm; }

References useSharedMemory_.

◆ summarizeServerStats()

TritonClient::ServerSideStats TritonClient::summarizeServerStats ( const inference::ModelStatistics &  start_status,
const inference::ModelStatistics &  end_status 
) const
protected

Definition at line 374 of file TritonClient.cc.

375  {
376  TritonClient::ServerSideStats server_stats;
377 
378  server_stats.inference_count_ = end_status.inference_count() - start_status.inference_count();
379  server_stats.execution_count_ = end_status.execution_count() - start_status.execution_count();
380  server_stats.success_count_ =
381  end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
382  server_stats.cumm_time_ns_ =
383  end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
384  server_stats.queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
385  server_stats.compute_input_time_ns_ =
386  end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
387  server_stats.compute_infer_time_ns_ =
388  end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
389  server_stats.compute_output_time_ns_ =
390  end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
391 
392  return server_stats;
393 }

References TritonClient::ServerSideStats::compute_infer_time_ns_, TritonClient::ServerSideStats::compute_input_time_ns_, TritonClient::ServerSideStats::compute_output_time_ns_, TritonClient::ServerSideStats::cumm_time_ns_, TritonClient::ServerSideStats::execution_count_, TritonClient::ServerSideStats::inference_count_, TritonClient::ServerSideStats::queue_time_ns_, and TritonClient::ServerSideStats::success_count_.

Referenced by evaluate().

◆ useSharedMemory()

bool TritonClient::useSharedMemory ( ) const
inline

Definition at line 41 of file TritonClient.h.

41 { return useSharedMemory_; }

References useSharedMemory_.

◆ verbose()

bool TritonClient::verbose ( ) const
inline

Definition at line 40 of file TritonClient.h.

40 { return verbose_; }

References verbose_.

Referenced by evaluate().

Member Data Documentation

◆ batchSize_

unsigned TritonClient::batchSize_
protected

Definition at line 66 of file TritonClient.h.

Referenced by batchSize(), evaluate(), and setBatchSize().

◆ client_

std::unique_ptr<triton::client::InferenceServerGrpcClient> TritonClient::client_
protected

Definition at line 78 of file TritonClient.h.

Referenced by client(), evaluate(), getServerSideStatus(), and TritonClient().

◆ compressionAlgo_

grpc_compression_algorithm TritonClient::compressionAlgo_
protected

Definition at line 71 of file TritonClient.h.

Referenced by evaluate().

◆ headers_

triton::client::Headers TritonClient::headers_
protected

Definition at line 72 of file TritonClient.h.

Referenced by evaluate().

◆ inputsTriton_

std::vector<triton::client::InferInput*> TritonClient::inputsTriton_
protected

Definition at line 75 of file TritonClient.h.

Referenced by evaluate(), and TritonClient().

◆ maxBatchSize_

unsigned TritonClient::maxBatchSize_
protected

Definition at line 65 of file TritonClient.h.

Referenced by setBatchSize(), and TritonClient().

◆ noBatch_

bool TritonClient::noBatch_
protected

Definition at line 67 of file TritonClient.h.

Referenced by noBatch(), and TritonClient().

◆ options_

triton::client::InferOptions TritonClient::options_
protected

◆ outputsTriton_

std::vector<const triton::client::InferRequestedOutput*> TritonClient::outputsTriton_
protected

Definition at line 76 of file TritonClient.h.

Referenced by evaluate(), and TritonClient().

◆ serverType_

TritonServerType TritonClient::serverType_
protected

Definition at line 70 of file TritonClient.h.

Referenced by serverType(), and TritonClient().

◆ TritonInputData

friend TritonClient::TritonInputData
private

Definition at line 83 of file TritonClient.h.

◆ TritonOutputData

friend TritonClient::TritonOutputData
private

Definition at line 84 of file TritonClient.h.

◆ useSharedMemory_

bool TritonClient::useSharedMemory_
protected

Definition at line 69 of file TritonClient.h.

Referenced by setUseSharedMemory(), and useSharedMemory().

◆ verbose_

bool TritonClient::verbose_
protected

Definition at line 68 of file TritonClient.h.

Referenced by getServerSideStatus(), TritonClient(), and verbose().

TritonClient::maxBatchSize_
unsigned maxBatchSize_
Definition: TritonClient.h:65
TritonClient::useSharedMemory_
bool useSharedMemory_
Definition: TritonClient.h:69
TritonClient::ServerSideStats::inference_count_
uint64_t inference_count_
Definition: TritonClient.h:22
SonicClientTypes< TritonInputMap, TritonOutputMap >::input_
Input input_
Definition: SonicClientTypes.h:19
TritonClient::noBatch_
bool noBatch_
Definition: TritonClient.h:67
edm::ParameterSetDescription::add
ParameterDescriptionBase * add(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:95
TritonClient::outputsTriton_
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
Definition: TritonClient.h:76
SonicClientBase::fillBasePSetDescription
static void fillBasePSetDescription(edm::ParameterSetDescription &desc, bool allowRetry=true)
Definition: SonicClientBase.cc:73
CalibrationSummaryClient_cfi.params
params
Definition: CalibrationSummaryClient_cfi.py:14
SonicClientTypes< TritonInputMap, TritonOutputMap >::output_
Output output_
Definition: SonicClientTypes.h:20
SonicClientTypes< TritonInputMap, TritonOutputMap >::output
const Output & output() const
Definition: SonicClientTypes.h:16
TritonClient::ServerSideStats
Definition: TritonClient.h:21
TritonClient::summarizeServerStats
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
Definition: TritonClient.cc:374
edm::ParameterSetDescription
Definition: ParameterSetDescription.h:52
TritonClient::setBatchSize
bool setBatchSize(unsigned bsize)
Definition: TritonClient.cc:178
SonicClientBase::finish
void finish(bool success, std::exception_ptr eptr=std::exception_ptr{})
Definition: SonicClientBase.cc:45
bookConverter.results
results
Definition: bookConverter.py:144
mps_check.msg
tuple msg
Definition: mps_check.py:285
edm::LogInfo
Log< level::Info, false > LogInfo
Definition: MessageLogger.h:125
edm::LogWarning
Log< level::Warning, false > LogWarning
Definition: MessageLogger.h:122
CMS_SA_ALLOW
#define CMS_SA_ALLOW
Definition: thread_safety_macros.h:5
SonicClientBase::debugName
const std::string & debugName() const
Definition: SonicClientBase.h:26
edm::FileInPath
Definition: FileInPath.h:61
triton_utils::throwIfError
void throwIfError(const Error &err, std::string_view msg)
Definition: triton_utils.cc:21
TritonClient::client_
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
Definition: TritonClient.h:78
TritonClient::ServerSideStats::compute_infer_time_ns_
uint64_t compute_infer_time_ns_
Definition: TritonClient.h:28
TritonClient::ServerSideStats::compute_input_time_ns_
uint64_t compute_input_time_ns_
Definition: TritonClient.h:27
TritonClient::compressionAlgo_
grpc_compression_algorithm compressionAlgo_
Definition: TritonClient.h:71
summarizeEdmComparisonLogfiles.success
success
Definition: summarizeEdmComparisonLogfiles.py:114
submitPVResolutionJobs.count
count
Definition: submitPVResolutionJobs.py:352
TritonClient::serverType_
TritonServerType serverType_
Definition: TritonClient.h:70
SonicMode::Sync
edm::ParameterSetDescription::addUntracked
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
Definition: ParameterSetDescription.h:100
TritonClient::inputsTriton_
std::vector< triton::client::InferInput * > inputsTriton_
Definition: TritonClient.h:75
TritonClient::verbose_
bool verbose_
Definition: TritonClient.h:68
TritonServerType::LocalCPU
SonicClientBase::mode_
SonicMode mode_
Definition: SonicClientBase.h:54
SonicMode::Async
SonicClient< TritonInputMap, TritonOutputMap >::SonicClient
SonicClient(const edm::ParameterSet &params, const std::string &debugName, const std::string &clientName)
Definition: SonicClient.h:12
TritonClient::reportServerSideStats
void reportServerSideStats(const ServerSideStats &stats) const
Definition: TritonClient.cc:337
SonicClientBase::fullDebugName_
std::string fullDebugName_
Definition: SonicClientBase.h:60
SiStripPI::max
Definition: SiStripPayloadInspectorHelper.h:169
contentValuesFiles.server
server
Definition: contentValuesFiles.py:37
TritonClient::ServerSideStats::execution_count_
uint64_t execution_count_
Definition: TritonClient.h:23
TritonClient::ServerSideStats::success_count_
uint64_t success_count_
Definition: TritonClient.h:24
SonicClientBase::debugName_
std::string debugName_
Definition: SonicClientBase.h:60
edm::Service
Definition: Service.h:30
dqmMemoryStats.stats
stats
Definition: dqmMemoryStats.py:134
TritonClient::ServerSideStats::compute_output_time_ns_
uint64_t compute_output_time_ns_
Definition: TritonClient.h:29
TritonService::serverInfo
Server serverInfo(const std::string &model, const std::string &preferred="") const
Definition: TritonService.cc:178
AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
TritonClient::batchSize_
unsigned batchSize_
Definition: TritonClient.h:66
TritonService::pid
const std::string & pid() const
Definition: TritonService.h:103
TritonClient::handle_exception
bool handle_exception(F &&call)
Definition: TritonClient.cc:206
TritonClient::ServerSideStats::queue_time_ns_
uint64_t queue_time_ns_
Definition: TritonClient.h:26
TritonException
Definition: TritonException.h:8
TritonClient::headers_
triton::client::Headers headers_
Definition: TritonClient.h:72
TritonClient::verbose
bool verbose() const
Definition: TritonClient.h:40
cond::uint64_t
unsigned long long uint64_t
Definition: Time.h:13
cms::Exception
Definition: Exception.h:70
TritonClient::getServerSideStatus
inference::ModelStatistics getServerSideStatus() const
Definition: TritonClient.cc:395
TritonClient::getResults
void getResults(std::shared_ptr< triton::client::InferResult > results)
Definition: TritonClient.cc:225
TritonClient::ServerSideStats::cumm_time_ns_
uint64_t cumm_time_ns_
Definition: TritonClient.h:25
edm::Log
Definition: MessageLogger.h:70
heppy_report.oname
oname
Definition: heppy_report.py:57
triton_utils::printColl
std::string printColl(const C &coll, const std::string &delim=", ")
Definition: triton_utils.cc:12
TritonClient::options_
triton::client::InferOptions options_
Definition: TritonClient.h:80
MillePedeFileConverter_cfg.e
e
Definition: MillePedeFileConverter_cfg.py:37
SonicClientBase::setMode
void setMode(SonicMode mode)
Definition: SonicClientBase.cc:26