6 #include "grpc_client.h"
7 #include "grpc_service.pb.h"
25 verbose_(
params.getUntrackedParameter<
bool>(
"verbose")),
34 std::to_string(
params.getUntrackedParameter<
unsigned>(
"port")));
36 "TritonClient(): unable to create inference context");
41 options_.client_timeout_ =
params.getUntrackedParameter<
unsigned>(
"timeout") * 1e6;
44 inference::ModelConfigResponse modelConfigResponse;
46 "TritonClient(): unable to get model config");
47 inference::ModelConfig modelConfig(modelConfigResponse.config());
58 inference::ModelMetadataResponse modelMetadata;
60 "TritonClient(): unable to get model metadata");
63 const auto& nicInputs = modelMetadata.inputs();
64 const auto& nicOutputs = modelMetadata.outputs();
67 std::stringstream
msg;
71 if (nicInputs.empty())
72 msg <<
"Model on server appears malformed (zero inputs)\n";
74 if (nicOutputs.empty())
75 msg <<
"Model on server appears malformed (zero outputs)\n";
83 std::stringstream io_msg;
85 io_msg <<
"Model inputs: "
88 for (
const auto& nicInput : nicInputs) {
89 const auto& iname = nicInput.name();
91 std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput,
noBatch_));
92 auto& curr_input = curr_itr->second;
95 io_msg <<
" " << iname <<
" (" << curr_input.dname() <<
", " << curr_input.byteSize()
101 const auto& v_outputs =
params.getUntrackedParameter<std::vector<std::string>>(
"outputs");
102 std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());
106 io_msg <<
"Model outputs: "
109 for (
const auto& nicOutput : nicOutputs) {
110 const auto&
oname = nicOutput.name();
111 if (!s_outputs.empty() and s_outputs.find(
oname) == s_outputs.end())
114 std::piecewise_construct, std::forward_as_tuple(
oname), std::forward_as_tuple(
oname, nicOutput,
noBatch_));
115 auto& curr_output = curr_itr->second;
118 io_msg <<
" " <<
oname <<
" (" << curr_output.dname() <<
", " << curr_output.byteSize()
121 if (!s_outputs.empty())
122 s_outputs.erase(
oname);
126 if (!s_outputs.empty())
134 std::stringstream model_msg;
136 model_msg <<
"Model name: " <<
options_.model_name_ <<
"\n"
137 <<
"Model version: " <<
options_.model_version_ <<
"\n"
151 for (
auto& element :
input_) {
152 element.second.setBatchSize(bsize);
154 for (
auto& element :
output_) {
155 element.second.setBatchSize(bsize);
162 for (
auto& element :
input_) {
163 element.second.reset();
165 for (
auto& element :
output_) {
166 element.second.reset();
173 if (
output.variableDims()) {
174 std::vector<int64_t> tmp_shape;
176 "getResults(): unable to get output shape for " +
oname);
179 output.setShape(tmp_shape,
false);
204 [
t1, start_status,
this](nic::InferResult*
results) {
206 std::shared_ptr<nic::InferResult> results_ptr(results);
207 bool status = triton_utils::warnIfError(results_ptr->RequestStatus(),
"evaluate(): unable to get result");
216 <<
"Remote time: " << std::chrono::duration_cast<std::chrono::microseconds>(
t2 -
t1).count();
234 "evaluate(): unable to launch async run");
244 "evaluate(): unable to run and/or get result");
251 if (!debugName_.empty())
253 << std::chrono::duration_cast<std::chrono::microseconds>(
t2 -
t1).count();
255 const auto& end_status = getServerSideStatus();
258 const auto&
stats = summarizeServerStats(start_status, end_status);
259 reportServerSideStats(
stats);
262 std::shared_ptr<nic::InferResult> results_ptr(
results);
263 status = getResults(results_ptr);
270 std::stringstream
msg;
274 msg <<
" Inference count: " <<
stats.inference_count_ <<
"\n";
275 msg <<
" Execution count: " <<
stats.execution_count_ <<
"\n";
276 msg <<
" Successful request count: " <<
count <<
"\n";
281 return tval / us_to_ns /
count;
284 const uint64_t cumm_avg_us = get_avg_us(
stats.cumm_time_ns_);
285 const uint64_t queue_avg_us = get_avg_us(
stats.queue_time_ns_);
286 const uint64_t compute_input_avg_us = get_avg_us(
stats.compute_input_time_ns_);
287 const uint64_t compute_infer_avg_us = get_avg_us(
stats.compute_infer_time_ns_);
288 const uint64_t compute_output_avg_us = get_avg_us(
stats.compute_output_time_ns_);
289 const uint64_t compute_avg_us = compute_input_avg_us + compute_infer_avg_us + compute_output_avg_us;
291 (cumm_avg_us > queue_avg_us + compute_avg_us) ? (cumm_avg_us - queue_avg_us - compute_avg_us) : 0;
293 msg <<
" Avg request latency: " << cumm_avg_us <<
" usec"
295 <<
" (overhead " << overhead <<
" usec + "
296 <<
"queue " << queue_avg_us <<
" usec + "
297 <<
"compute input " << compute_input_avg_us <<
" usec + "
298 <<
"compute infer " << compute_infer_avg_us <<
" usec + "
299 <<
"compute output " << compute_output_avg_us <<
" usec)" << std::endl;
307 const inference::ModelStatistics& end_status)
const {
310 server_stats.
inference_count_ = end_status.inference_count() - start_status.inference_count();
311 server_stats.
execution_count_ = end_status.execution_count() - start_status.execution_count();
313 end_status.inference_stats().success().count() - start_status.inference_stats().success().count();
315 end_status.inference_stats().success().ns() - start_status.inference_stats().success().ns();
316 server_stats.
queue_time_ns_ = end_status.inference_stats().queue().ns() - start_status.inference_stats().queue().ns();
318 end_status.inference_stats().compute_input().ns() - start_status.inference_stats().compute_input().ns();
320 end_status.inference_stats().compute_infer().ns() - start_status.inference_stats().compute_infer().ns();
322 end_status.inference_stats().compute_output().ns() - start_status.inference_stats().compute_output().ns();
329 inference::ModelStatisticsResponse resp;
331 "getServerSideStatus(): unable to get model statistics");
332 return *(resp.model_stats().begin());
334 return inference::ModelStatistics{};
349 descClient.
addUntracked<std::vector<std::string>>(
"outputs", {});