13 #include "grpc_client.h"
14 #include "grpc_service.pb.h"
32 auto pipe = popen((
cmd +
" 2>&1").c_str(),
"r");
33 int thisErrno = errno;
35 throw cms::Exception(
"SystemError") <<
"popen() failed with errno " << thisErrno <<
" for command: " <<
cmd;
38 constexpr
static unsigned buffSize = 128;
39 std::array<char, buffSize>
buffer;
45 thisErrno = ferror(
pipe);
47 throw cms::Exception(
"SystemError") <<
"failed reading command output with errno " << thisErrno;
51 int rv = pclose(
pipe);
52 return std::make_pair(
result, rv);
57 : verbose_(
pset.getUntrackedParameter<
bool>(
"verbose")),
60 allowAddModel_(
false),
61 startedFallback_(
false) {
71 servers_.emplace(std::piecewise_construct,
78 msg =
"List of models for each server:\n";
79 for (
const auto& serverPset :
pset.getUntrackedParameterSetVector(
"servers")) {
85 <<
"Not allowed to specify more than one server with same name (" << serverName <<
")";
86 auto& serverInfo(sit->second);
88 std::unique_ptr<nic::InferenceServerGrpcClient>
client;
90 nic::InferenceServerGrpcClient::Create(&
client, serverInfo.url,
false),
91 "TritonService(): unable to create inference context for " + serverName +
" (" + serverInfo.url +
")");
93 inference::RepositoryIndexResponse repoIndexResponse;
95 client->ModelRepositoryIndex(&repoIndexResponse),
96 "TritonService(): unable to get repository index for " + serverName +
" (" + serverInfo.url +
")");
100 msg += serverName +
": ";
101 for (
const auto& modelIndex : repoIndexResponse.models()) {
102 const auto&
modelName = modelIndex.name();
106 auto& modelInfo(mit->second);
107 modelInfo.servers.insert(serverName);
127 throw cms::Exception(
"DisallowedAddModel") <<
"Attempt to call addModel() outside of module constructors";
147 const auto& moduleInfo(oit->second);
150 auto& modelInfo(mit->second);
151 modelInfo.modules.erase(
id);
153 if (modelInfo.modules.empty())
165 throw cms::Exception(
"MissingModel") <<
"There are no servers that provide model " <<
model;
166 const auto& modelInfo(mit->second);
167 const auto& modelServers = modelInfo.servers;
169 auto msit = modelServers.end();
170 if (!preferred.empty()) {
171 msit = modelServers.find(preferred);
173 if (msit == modelServers.end())
174 edm::LogWarning(
"PreferredServer") <<
"Preferred server " << preferred <<
" for model " <<
model
175 <<
" not available, will choose another server";
177 const auto& serverName(msit == modelServers.end() ? *modelServers.begin() : preferred);
180 const auto& serverInfo(
servers_.find(serverName)->second);
182 return std::make_pair(serverInfo.url, isFallbackCPU);
192 msg =
"List of models for fallback server: ";
254 throw cms::Exception(
"FallbackFailed") <<
"Starting the fallback server failed with exit code " << rv;
257 const std::string& portIndicator(
"CMS_TRITON_GRPC_PORT: ");
260 if (
pos != std::string::npos) {
261 auto pos2 =
pos + portIndicator.size();
262 auto pos3 =
output.find(
'\n', pos2);
263 const auto& portNum =
output.substr(pos2, pos3 - pos2);
264 serverInfo.url +=
":" + portNum;
266 throw cms::Exception(
"FallbackFailed") <<
"Unknown port for fallback server, log follows:\n" <<
output;
271 desc.addUntracked<
bool>(
"verbose",
false);
276 validator.addUntracked<
unsigned>(
"port");
278 desc.addVPSetUntracked(
"servers", validator, {});