CMS 3D CMS Logo

TritonData.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonData
2 #define HeterogeneousCore_SonicTriton_TritonData
3 
7 
8 #include <vector>
9 #include <string>
10 #include <unordered_map>
11 #include <numeric>
12 #include <algorithm>
13 #include <memory>
14 #include <atomic>
15 #include <typeinfo>
16 
17 #include "grpc_client.h"
18 #include "grpc_service.pb.h"
19 
20 //forward declaration
21 class TritonClient;
22 template <typename IO>
24 template <typename IO>
26 template <typename IO>
28 #ifdef TRITON_ENABLE_GPU
29 template <typename IO>
30 class TritonGpuShmResource;
31 #endif
32 
33 //aliases for local input and output types
34 template <typename DT>
35 using TritonInput = std::vector<std::vector<DT>>;
36 template <typename DT>
37 using TritonOutput = std::vector<edm::Span<const DT*>>;
38 
39 //other useful typdefs
40 template <typename DT>
41 using TritonInputContainer = std::shared_ptr<TritonInput<DT>>;
42 
43 //store all the info needed for triton input and output
44 //NOTE: this class is not const-thread-safe, and should only be used with stream or one modules
45 //(generally recommended for SONIC, but especially necessary here)
46 template <typename IO>
47 class TritonData {
48 public:
49  using Result = triton::client::InferResult;
50  using TensorMetadata = inference::ModelMetadataResponse_TensorMetadata;
51  using ShapeType = std::vector<int64_t>;
53 
54  //constructor
55  TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid);
56 
57  //some members can be modified
58  void setShape(const ShapeType& newShape);
59  void setShape(unsigned loc, int64_t val);
60 
61  //io accessors
62  template <typename DT>
63  TritonInputContainer<DT> allocate(bool reserve = true);
64  template <typename DT>
66  void prepare();
67  template <typename DT>
69 
70  //const accessors
71  const ShapeView& shape() const { return shape_; }
72  int64_t byteSize() const { return byteSize_; }
73  const std::string& dname() const { return dname_; }
74  unsigned batchSize() const { return batchSize_; }
75 
76  //utilities
77  bool variableDims() const { return variableDims_; }
78  int64_t sizeDims() const { return productDims_; }
79  //default to dims if shape isn't filled
80  int64_t sizeShape() const { return variableDims_ ? dimProduct(shape_) : sizeDims(); }
81 
82 private:
83  friend class TritonClient;
84  friend class TritonMemResource<IO>;
85  friend class TritonHeapResource<IO>;
86  friend class TritonCpuShmResource<IO>;
87 #ifdef TRITON_ENABLE_GPU
88  friend class TritonGpuShmResource<IO>;
89 #endif
90 
91  //private accessors only used internally or by client
92  unsigned fullLoc(unsigned loc) const { return loc + (noBatch_ ? 0 : 1); }
93  void setBatchSize(unsigned bsize);
94  void reset();
95  void setResult(std::shared_ptr<Result> result) { result_ = result; }
96  IO* data() { return data_.get(); }
97  void updateMem(size_t size);
98  void computeSizes();
99  void resetSizes();
100  triton::client::InferenceServerGrpcClient* client();
101  template <typename DT>
102  void checkType() const {
103  if (!triton_utils::checkType<DT>(dtype_))
104  throw cms::Exception("TritonDataError")
105  << name_ << ": inconsistent data type " << typeid(DT).name() << " for " << dname_;
106  }
107 
108  //helpers
109  bool anyNeg(const ShapeView& vec) const {
110  return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
111  }
112  int64_t dimProduct(const ShapeView& vec) const {
113  return std::accumulate(vec.begin(), vec.end(), 1, std::multiplies<int64_t>());
114  }
115  void createObject(IO** ioptr);
116  //generates a unique id number for each instance of the class
117  unsigned uid() const {
118  static std::atomic<unsigned> uid{0};
119  return ++uid;
120  }
121  std::string xput() const;
122 
123  //members
125  std::shared_ptr<IO> data_;
127  bool useShm_;
130  bool noBatch_;
131  unsigned batchSize_;
135  int64_t productDims_;
138  int64_t byteSize_;
139  size_t sizeShape_;
142  //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput():
143  //TritonMemResource holds a non-const pointer to an instance of this class
144  //so that TritonOutputGpuShmResource can store data here
145  std::shared_ptr<void> holder_;
146  std::shared_ptr<TritonMemResource<IO>> memResource_;
147  std::shared_ptr<Result> result_;
148  //can be modified in otherwise-const fromServer() method to prevent multiple calls
149  CMS_SA_ALLOW mutable bool done_{};
150 };
151 
153 using TritonInputMap = std::unordered_map<std::string, TritonInputData>;
155 using TritonOutputMap = std::unordered_map<std::string, TritonOutputData>;
156 
157 //avoid "explicit specialization after instantiation" error
158 template <>
160 template <>
162 template <>
163 template <typename DT>
165 template <>
166 template <typename DT>
167 void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr);
168 template <>
170 template <>
171 template <typename DT>
173 template <>
175 template <>
177 template <>
178 void TritonInputData::createObject(triton::client::InferInput** ioptr);
179 template <>
180 void TritonOutputData::createObject(triton::client::InferRequestedOutput** ioptr);
181 
182 //explicit template instantiation declarations
183 extern template class TritonData<triton::client::InferInput>;
185 
186 #endif
std::vector< std::vector< DT > > TritonInput
Definition: TritonData.h:35
size
Write out results.
std::shared_ptr< IO > data_
Definition: TritonData.h:125
void setBatchSize(unsigned bsize)
Definition: TritonData.cc:98
unsigned uid() const
Definition: TritonData.h:117
#define CMS_SA_ALLOW
std::shared_ptr< void > holder_
Definition: TritonData.h:145
bool done_
Definition: TritonData.h:149
size_t totalByteSize_
Definition: TritonData.h:141
unsigned fullLoc(unsigned loc) const
Definition: TritonData.h:92
int64_t byteSize() const
Definition: TritonData.h:72
size_t byteSizePerBatch_
Definition: TritonData.h:140
void computeSizes()
Definition: TritonData.cc:105
const ShapeType dims_
Definition: TritonData.h:129
const ShapeView & shape() const
Definition: TritonData.h:71
void setResult(std::shared_ptr< Result > result)
Definition: TritonData.h:95
bool useShm_
Definition: TritonData.h:127
const std::string & dname() const
Definition: TritonData.h:73
ShapeView shape_
Definition: TritonData.h:133
void updateMem(size_t size)
Definition: TritonData.cc:120
void checkType() const
Definition: TritonData.h:102
int64_t productDims_
Definition: TritonData.h:135
int64_t byteSize_
Definition: TritonData.h:138
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:149
size_t sizeShape_
Definition: TritonData.h:139
std::shared_ptr< Result > result_
Definition: TritonData.h:147
T begin() const
Definition: Span.h:20
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:37
void reset()
Definition: TritonData.cc:231
IO * data()
Definition: TritonData.h:96
triton::client::InferResult Result
Definition: TritonData.h:49
unsigned batchSize() const
Definition: TritonData.h:74
TritonClient * client_
Definition: TritonData.h:126
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:109
void prepare()
Definition: TritonData.cc:196
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:18
int64_t sizeShape() const
Definition: TritonData.h:80
std::unordered_map< std::string, TritonInputData > TritonInputMap
Definition: TritonData.h:153
void setShape(const ShapeType &newShape)
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:50
void createObject(IO **ioptr)
std::shared_ptr< TritonInput< DT > > TritonInputContainer
Definition: TritonData.h:41
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:204
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:67
int64_t sizeDims() const
Definition: TritonData.h:78
void resetSizes()
Definition: TritonData.cc:111
inference::DataType dtype_
Definition: TritonData.h:137
bool noBatch_
Definition: TritonData.h:130
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:146
int64_t dimProduct(const ShapeView &vec) const
Definition: TritonData.h:112
std::string xput() const
Definition: TritonData.cc:57
std::string dname_
Definition: TritonData.h:136
ShapeType fullShape_
Definition: TritonData.h:132
std::string name_
Definition: TritonData.h:124
std::vector< int64_t > ShapeType
Definition: TritonData.h:51
bool variableDims() const
Definition: TritonData.h:77
unsigned batchSize_
Definition: TritonData.h:131
std::string shmName_
Definition: TritonData.h:128
bool variableDims_
Definition: TritonData.h:134
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:163
std::unordered_map< std::string, TritonOutputData > TritonOutputMap
Definition: TritonData.h:155
T end() const
Definition: Span.h:21