1 #ifndef HeterogeneousCore_SonicTriton_TritonClient
2 #define HeterogeneousCore_SonicTriton_TritonClient
14 #include <unordered_map>
16 #include "grpc_client.h"
17 #include "grpc_service.pb.h"
44 void reset()
override;
60 const inference::ModelStatistics& end_status)
const;
78 std::unique_ptr<triton::client::InferenceServerGrpcClient>
client_;
uint64_t execution_count_
bool setBatchSize(unsigned bsize)
inference::ModelStatistics getServerSideStatus() const
std::vector< const triton::client::InferRequestedOutput * > outputsTriton_
unsigned batchSize() const
const std::string & debugName() const
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
TritonClient(const edm::ParameterSet ¶ms, const std::string &debugName)
uint64_t compute_infer_time_ns_
uint64_t inference_count_
void reportServerSideStats(const ServerSideStats &stats) const
TritonServerType serverType_
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
bool handle_exception(F &&call)
grpc_compression_algorithm compressionAlgo_
triton::client::Headers headers_
unsigned long long uint64_t
std::vector< triton::client::InferInput * > inputsTriton_
bool useSharedMemory() const
void setUseSharedMemory(bool useShm)
triton::client::InferOptions options_
void getResults(std::shared_ptr< triton::client::InferResult > results)
uint64_t compute_output_time_ns_
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
TritonServerType serverType() const
uint64_t compute_input_time_ns_