CMS 3D CMS Logo

TritonData.h
Go to the documentation of this file.
1 #ifndef HeterogeneousCore_SonicTriton_TritonData
2 #define HeterogeneousCore_SonicTriton_TritonData
3 
7 
8 #include <vector>
9 #include <string>
10 #include <unordered_map>
11 #include <numeric>
12 #include <algorithm>
13 #include <memory>
14 #include <atomic>
15 #include <typeinfo>
16 
17 #include "grpc_client.h"
18 #include "grpc_service.pb.h"
19 
20 //forward declaration
21 class TritonClient;
22 template <typename IO>
24 template <typename IO>
26 template <typename IO>
28 #ifdef TRITON_ENABLE_GPU
29 template <typename IO>
30 class TritonGpuShmResource;
31 #endif
32 
33 //aliases for local input and output types
34 template <typename DT>
35 using TritonInput = std::vector<std::vector<DT>>;
36 template <typename DT>
37 using TritonOutput = std::vector<edm::Span<const DT*>>;
38 
39 //other useful typdefs
40 template <typename DT>
41 using TritonInputContainer = std::shared_ptr<TritonInput<DT>>;
42 
43 //store all the info needed for triton input and output
44 //NOTE: this class is not const-thread-safe, and should only be used with stream or one modules
45 //(generally recommended for SONIC, but especially necessary here)
46 template <typename IO>
47 class TritonData {
48 public:
49  using Result = triton::client::InferResult;
50  using TensorMetadata = inference::ModelMetadataResponse_TensorMetadata;
51  using ShapeType = std::vector<int64_t>;
53 
54  //constructor
55  TritonData(const std::string& name, const TensorMetadata& model_info, TritonClient* client, const std::string& pid);
56 
57  //some members can be modified
58  void setShape(const ShapeType& newShape, unsigned entry = 0);
59  void setShape(unsigned loc, int64_t val, unsigned entry = 0);
60 
61  //io accessors
62  template <typename DT>
63  TritonInputContainer<DT> allocate(bool reserve = true);
64  template <typename DT>
66  void prepare();
67  template <typename DT>
69 
70  //const accessors
71  const ShapeView& shape(unsigned entry = 0) const { return entries_.at(entry).shape_; }
72  int64_t byteSize() const { return byteSize_; }
73  const std::string& dname() const { return dname_; }
74 
75  //utilities
76  bool variableDims() const { return variableDims_; }
77  int64_t sizeDims() const { return productDims_; }
78  //default to dims if shape isn't filled
79  int64_t sizeShape(unsigned entry = 0) const {
80  return variableDims_ ? dimProduct(entries_.at(entry).shape_) : sizeDims();
81  }
82 
83 private:
84  friend class TritonClient;
85  friend class TritonMemResource<IO>;
86  friend class TritonHeapResource<IO>;
87  friend class TritonCpuShmResource<IO>;
88 #ifdef TRITON_ENABLE_GPU
89  friend class TritonGpuShmResource<IO>;
90 #endif
91 
92  //group together all relevant information for a single request
93  //helpful for organizing multi-request ragged batching case
95  public:
96  //constructors
97  TritonDataEntry(const ShapeType& dims, bool noOuterDim, const std::string& name, const std::string& dname)
98  : fullShape_(dims),
99  shape_(fullShape_.begin() + (noOuterDim ? 0 : 1), fullShape_.end()),
100  sizeShape_(0),
102  totalByteSize_(0),
103  offset_(0),
104  output_(nullptr) {
105  //create input or output object
106  IO* iotmp;
107  createObject(&iotmp, name, dname);
108  data_.reset(iotmp);
109  }
110  //default needed to be able to use std::vector resize()
112  : shape_(fullShape_.begin(), fullShape_.end()),
113  sizeShape_(0),
115  totalByteSize_(0),
116  offset_(0),
117  output_(nullptr) {}
118 
119  private:
120  friend class TritonData<IO>;
121  friend class TritonClient;
122  friend class TritonMemResource<IO>;
123  friend class TritonHeapResource<IO>;
124  friend class TritonCpuShmResource<IO>;
125 #ifdef TRITON_ENABLE_GPU
126  friend class TritonGpuShmResource<IO>;
127 #endif
128 
129  //accessors
130  void createObject(IO** ioptr, const std::string& name, const std::string& dname);
131  void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize);
132 
133  //members
137  std::shared_ptr<IO> data_;
138  std::shared_ptr<Result> result_;
139  unsigned offset_;
140  const uint8_t* output_;
141  };
142 
143  //private accessors only used internally or by client
144  void checkShm() {}
145  unsigned fullLoc(unsigned loc) const;
146  void reset();
147  void setResult(std::shared_ptr<Result> result, unsigned entry = 0) { entries_[entry].result_ = result; }
148  IO* data(unsigned entry = 0) { return entries_[entry].data_.get(); }
149  void updateMem(size_t size);
150  void computeSizes();
151  triton::client::InferenceServerGrpcClient* client();
152  template <typename DT>
153  void checkType() const {
154  if (!triton_utils::checkType<DT>(dtype_))
155  throw cms::Exception("TritonDataError")
156  << name_ << ": inconsistent data type " << typeid(DT).name() << " for " << dname_;
157  }
158 
159  //helpers
160  bool anyNeg(const ShapeView& vec) const {
161  return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
162  }
163  int64_t dimProduct(const ShapeView& vec) const {
164  //lambda treats negative dimensions as 0 to avoid overflows
165  return std::accumulate(
166  vec.begin(), vec.end(), 1, [](int64_t dim1, int64_t dim2) { return dim1 * std::max(0l, dim2); });
167  }
168  //generates a unique id number for each instance of the class
169  unsigned uid() const {
170  static std::atomic<unsigned> uid{0};
171  return ++uid;
172  }
173  std::string xput() const;
174  void addEntry(unsigned entry);
175  void addEntryImpl(unsigned entry);
176 
177  //members
180  bool useShm_;
184  int64_t productDims_;
187  int64_t byteSize_;
188  std::vector<TritonDataEntry> entries_;
190  //can be modified in otherwise-const fromServer() method in TritonMemResource::copyOutput():
191  //TritonMemResource holds a non-const pointer to an instance of this class
192  //so that TritonOutputGpuShmResource can store data here
193  std::shared_ptr<void> holder_;
194  std::shared_ptr<TritonMemResource<IO>> memResource_;
195  //can be modified in otherwise-const fromServer() method to prevent multiple calls
196  CMS_SA_ALLOW mutable bool done_{};
197 };
198 
200 using TritonInputMap = std::unordered_map<std::string, TritonInputData>;
202 using TritonOutputMap = std::unordered_map<std::string, TritonOutputData>;
203 
204 //avoid "explicit specialization after instantiation" error
205 template <>
206 void TritonInputData::TritonDataEntry::createObject(triton::client::InferInput** ioptr,
207  const std::string& name,
208  const std::string& dname);
209 template <>
210 void TritonOutputData::TritonDataEntry::createObject(triton::client::InferRequestedOutput** ioptr,
211  const std::string& name,
212  const std::string& dname);
213 template <>
215 template <>
217 template <>
219 template <>
220 template <typename DT>
222 template <>
223 template <typename DT>
224 void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr);
225 template <>
227 template <>
228 template <typename DT>
230 
231 //explicit template instantiation declarations
232 extern template class TritonData<triton::client::InferInput>;
234 
235 #endif
std::vector< std::vector< DT > > TritonInput
Definition: TritonData.h:35
std::vector< TritonDataEntry > entries_
Definition: TritonData.h:188
size
Write out results.
int64_t sizeShape(unsigned entry=0) const
Definition: TritonData.h:79
unsigned uid() const
Definition: TritonData.h:169
#define CMS_SA_ALLOW
void addEntryImpl(unsigned entry)
Definition: TritonData.cc:53
const uint8_t * output_
Definition: TritonData.h:140
std::shared_ptr< void > holder_
Definition: TritonData.h:193
bool done_
Definition: TritonData.h:196
size_t totalByteSize_
Definition: TritonData.h:189
unsigned fullLoc(unsigned loc) const
Definition: TritonData.cc:286
void createObject(IO **ioptr, const std::string &name, const std::string &dname)
std::shared_ptr< IO > data_
Definition: TritonData.h:137
int64_t byteSize() const
Definition: TritonData.h:72
void computeSizes()
Definition: TritonData.cc:128
const ShapeType dims_
Definition: TritonData.h:182
bool useShm_
Definition: TritonData.h:180
const std::string & dname() const
Definition: TritonData.h:73
void computeSizes(int64_t shapeSize, int64_t byteSize, int64_t batchSize)
Definition: TritonData.cc:121
void updateMem(size_t size)
Definition: TritonData.cc:141
void reset()
Definition: TritonData.cc:276
void checkType() const
Definition: TritonData.h:153
void checkShm()
Definition: TritonData.h:144
const ShapeView & shape(unsigned entry=0) const
Definition: TritonData.h:71
IO * data(unsigned entry=0)
Definition: TritonData.h:148
int64_t productDims_
Definition: TritonData.h:184
int64_t byteSize_
Definition: TritonData.h:187
TritonInputContainer< DT > allocate(bool reserve=true)
Definition: TritonData.cc:170
T begin() const
Definition: Span.h:20
std::shared_ptr< Result > result_
Definition: TritonData.h:138
std::vector< edm::Span< const DT * > > TritonOutput
Definition: TritonData.h:37
void setResult(std::shared_ptr< Result > result, unsigned entry=0)
Definition: TritonData.h:147
triton::client::InferResult Result
Definition: TritonData.h:49
void setShape(const ShapeType &newShape, unsigned entry=0)
TritonDataEntry(const ShapeType &dims, bool noOuterDim, const std::string &name, const std::string &dname)
Definition: TritonData.h:97
TritonClient * client_
Definition: TritonData.h:179
bool anyNeg(const ShapeView &vec) const
Definition: TritonData.h:160
void prepare()
Definition: TritonData.cc:236
TritonData(const std::string &name, const TensorMetadata &model_info, TritonClient *client, const std::string &pid)
Definition: TritonData.cc:18
std::unordered_map< std::string, TritonInputData > TritonInputMap
Definition: TritonData.h:200
inference::ModelMetadataResponse_TensorMetadata TensorMetadata
Definition: TritonData.h:50
std::shared_ptr< TritonInput< DT > > TritonInputContainer
Definition: TritonData.h:41
TritonOutput< DT > fromServer() const
Definition: TritonData.cc:244
triton::client::InferenceServerGrpcClient * client()
Definition: TritonData.cc:87
int64_t sizeDims() const
Definition: TritonData.h:77
inference::DataType dtype_
Definition: TritonData.h:186
std::shared_ptr< TritonMemResource< IO > > memResource_
Definition: TritonData.h:194
int64_t dimProduct(const ShapeView &vec) const
Definition: TritonData.h:163
std::string xput() const
Definition: TritonData.cc:77
std::string dname_
Definition: TritonData.h:185
std::string name_
Definition: TritonData.h:178
std::vector< int64_t > ShapeType
Definition: TritonData.h:51
bool variableDims() const
Definition: TritonData.h:76
std::string shmName_
Definition: TritonData.h:181
bool variableDims_
Definition: TritonData.h:183
void toServer(TritonInputContainer< DT > ptr)
Definition: TritonData.cc:188
std::unordered_map< std::string, TritonOutputData > TritonOutputMap
Definition: TritonData.h:202
void addEntry(unsigned entry)
Definition: TritonData.cc:47
T end() const
Definition: Span.h:21