13 #include "grpc_client.h" 14 #include "grpc_service.pb.h" 32 auto pipe = popen((
cmd +
" 2>&1").c_str(),
"r");
33 int thisErrno = errno;
36 <<
"TritonService: popen() failed with errno " << thisErrno <<
" for command: " <<
cmd;
40 std::array<char, buffSize>
buffer;
46 thisErrno = ferror(
pipe);
49 <<
"TritonService: failed reading command output with errno " << thisErrno;
53 int rv = pclose(
pipe);
54 return std::make_pair(
result, rv);
59 : verbose_(
pset.getUntrackedParameter<
bool>(
"verbose")),
62 allowAddModel_(
false),
63 startedFallback_(
false),
81 #ifdef TRITON_ENABLE_GPU 86 servers_.emplace(std::piecewise_construct,
94 msg =
"List of models for each server:\n";
95 for (
const auto& serverPset :
pset.getUntrackedParameterSetVector(
"servers")) {
101 <<
"TritonService: Not allowed to specify more than one server with same name (" << serverName <<
")";
102 auto&
server(sit->second);
104 std::unique_ptr<tc::InferenceServerGrpcClient>
client;
107 "TritonService(): unable to create inference context for " + serverName +
" (" +
server.url +
")");
110 inference::ServerMetadataResponse serverMetaResponse;
112 "TritonService(): unable to get metadata for " + serverName +
" (" +
server.url +
")");
114 <<
", version = " << serverMetaResponse.version();
117 inference::RepositoryIndexResponse repoIndexResponse;
119 client->ModelRepositoryIndex(&repoIndexResponse),
120 "TritonService(): unable to get repository index for " + serverName +
" (" +
server.url +
")");
124 msg += serverName +
": ";
125 for (
const auto& modelIndex : repoIndexResponse.models()) {
126 const auto&
modelName = modelIndex.name();
130 auto& modelInfo(mit->second);
131 modelInfo.servers.insert(serverName);
156 <<
"TritonService: Attempt to call addModel() outside of module constructors";
176 const auto& moduleInfo(oit->second);
179 auto& modelInfo(mit->second);
180 modelInfo.modules.erase(
id);
182 if (modelInfo.modules.empty())
193 throw cms::Exception(
"MissingModel") <<
"TritonService: There are no servers that provide model " <<
model;
194 const auto& modelInfo(mit->second);
195 const auto& modelServers = modelInfo.servers;
197 auto msit = modelServers.end();
198 if (!preferred.empty()) {
199 msit = modelServers.find(preferred);
201 if (msit == modelServers.end())
202 edm::LogWarning(
"PreferredServer") <<
"Preferred server " << preferred <<
" for model " <<
model 203 <<
" not available, will choose another server";
205 const auto& serverName(msit == modelServers.end() ? *modelServers.begin() : preferred);
219 msg =
"List of models for fallback server: ";
281 printFallbackServerLog<edm::LogError>();
283 <<
"TritonService: Starting the fallback server failed with exit code " << rv;
287 const std::string& portIndicator(
"CMS_TRITON_GRPC_PORT: ");
290 if (
pos != std::string::npos) {
291 auto pos2 =
pos + portIndicator.size();
292 auto pos3 =
output.find(
'\n', pos2);
293 const auto& portNum =
output.substr(pos2, pos3 - pos2);
294 server.url +=
":" + portNum;
296 throw cms::Exception(
"FallbackFailed") <<
"TritonService: Unknown port for fallback server, log follows:\n" 311 printFallbackServerLog<edm::LogError>();
313 <<
"TritonService: Stopping the fallback server failed with exit code " << rv;
316 printFallbackServerLog<edm::LogInfo>();
320 template <
typename LOG>
326 bool foundLog =
false;
327 for (
const auto&
logName : logNames) {
330 LOG(
"TritonService") <<
"TritonService: server log " <<
logName <<
"\n" <<
infile.rdbuf();
336 LOG(
"TritonService") <<
"TritonService: could not find server log " << logNames[0] <<
" in current directory or " 342 desc.addUntracked<
bool>(
"verbose",
false);
347 validator.addUntracked<
unsigned>(
"port");
348 validator.addUntracked<
bool>(
"useSsl",
false);
349 validator.addUntracked<
std::string>(
"rootCertificates",
"");
350 validator.addUntracked<
std::string>(
"privateKey",
"");
351 validator.addUntracked<
std::string>(
"certificateChain",
"");
353 desc.addVPSetUntracked(
"servers", validator, {});
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
std::unordered_map< std::string, Model > models_
void watchPreallocate(Preallocate::slot_type const &iSlot)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
std::unordered_map< std::string, Model > unservedModels_
#define TRITON_THROW_IF_ERROR(X, MSG)
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void postModuleConstruction(edm::ModuleDescription const &)
void watchPreModuleDestruction(PreModuleDestruction::slot_type const &iSlot)
unsigned currentModuleId_
void preallocate(edm::service::SystemBounds const &)
Log< level::Error, false > LogError
TritonService(const edm::ParameterSet &pset, edm::ActivityRegistry &areg)
static const std::string fallbackAddress
static std::string to_string(const XMLCh *ch)
void addModel(const std::string &modelName, const std::string &path)
FallbackOpts fallbackOpts_
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
def unique(seq, keepstr=True)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void printFallbackServerLog() const
def pipe(cmdline, input=None)
void preModuleConstruction(edm::ModuleDescription const &)
std::string createGlobalIdentifier(bool binary=false)
Log< level::Info, false > LogInfo
void preModuleDestruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::unordered_map< unsigned, Module > modules_
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
ParameterSet const & getParameterSet(ParameterSetID const &id)
Server serverInfo(const std::string &model, const std::string &preferred="") const
if(threadIdxLocalY==0 &&threadIdxLocalX==0)
std::unordered_map< std::string, Server > servers_
static const std::string fallbackName