Go to the documentation of this file. 1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
13 #include <unordered_map>
15 #include "grpc_client.h"
16 #include "grpc_service.pb.h"
38 void reset()
override;
45 bool getResults(std::shared_ptr<nvidia::inferenceserver::client::InferResult>
results);
51 const inference::ModelStatistics& end_status)
const;
62 std::vector<nvidia::inferenceserver::client::InferInput*>
inputsTriton_;
63 std::vector<const nvidia::inferenceserver::client::InferRequestedOutput*>
outputsTriton_;
65 std::unique_ptr<nvidia::inferenceserver::client::InferenceServerGrpcClient>
client_;
67 nvidia::inferenceserver::client::InferOptions
options_;
uint64_t inference_count_
std::unique_ptr< nvidia::inferenceserver::client::InferenceServerGrpcClient > client_
std::vector< const nvidia::inferenceserver::client::InferRequestedOutput * > outputsTriton_
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool setBatchSize(unsigned bsize)
uint64_t compute_infer_time_ns_
uint64_t compute_input_time_ns_
std::vector< nvidia::inferenceserver::client::InferInput * > inputsTriton_
void reportServerSideStats(const ServerSideStats &stats) const
uint64_t execution_count_
bool getResults(std::shared_ptr< nvidia::inferenceserver::client::InferResult > results)
TritonClient(const edm::ParameterSet ¶ms)
uint64_t compute_output_time_ns_
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
nvidia::inferenceserver::client::InferOptions options_
unsigned long long uint64_t
inference::ModelStatistics getServerSideStatus() const
unsigned batchSize() const