1 #ifndef HeterogeneousCore_SonicTriton_TritonClient 2 #define HeterogeneousCore_SonicTriton_TritonClient 14 #include <unordered_map> 16 #include "grpc_client.h" 17 #include "grpc_service.pb.h" 49 void reset()
override;
68 const inference::ModelStatistics& end_status)
const;
86 std::unique_ptr<triton::client::InferenceServerGrpcClient>
client_;
88 std::vector<triton::client::InferOptions>
options_;
uint64_t execution_count_
void getResults(const std::vector< std::shared_ptr< triton::client::InferResult >> &results)
const std::string & debugName() const
bool setBatchSize(unsigned bsize)
void addEntry(unsigned entry)
TritonBatchMode batchMode() const
std::unique_ptr< triton::client::InferenceServerGrpcClient > client_
TritonClient(const edm::ParameterSet ¶ms, const std::string &debugName)
uint64_t compute_infer_time_ns_
uint64_t inference_count_
ServerSideStats summarizeServerStats(const inference::ModelStatistics &start_status, const inference::ModelStatistics &end_status) const
TritonServerType serverType_
bool handle_exception(F &&call)
grpc_compression_algorithm compressionAlgo_
TritonServerType serverType() const
void resizeEntries(unsigned entry)
inference::ModelStatistics getServerSideStatus() const
triton::client::Headers headers_
unsigned nEntries() const
unsigned long long uint64_t
void setUseSharedMemory(bool useShm)
unsigned batchSize() const
unsigned outerDim() const
void setBatchMode(TritonBatchMode batchMode)
void reportServerSideStats(const ServerSideStats &stats) const
bool useSharedMemory() const
uint64_t compute_output_time_ns_
static void fillPSetDescription(edm::ParameterSetDescription &iDesc)
static uInt32 F(BLOWFISH_CTX *ctx, uInt32 x)
TritonBatchMode batchMode_
std::vector< triton::client::InferOptions > options_
uint64_t compute_input_time_ns_