13 #include "grpc_client.h" 14 #include "grpc_service.pb.h" 32 auto pipe = popen((
cmd +
" 2>&1").c_str(),
"r");
33 int thisErrno = errno;
36 <<
"TritonService: popen() failed with errno " << thisErrno <<
" for command: " <<
cmd;
40 std::array<char, buffSize>
buffer;
46 thisErrno = ferror(
pipe);
49 <<
"TritonService: failed reading command output with errno " << thisErrno;
53 int rv = pclose(
pipe);
54 return std::make_pair(
result, rv);
59 : verbose_(
pset.getUntrackedParameter<
bool>(
"verbose")),
62 allowAddModel_(
false),
63 startedFallback_(
false),
82 #ifdef TRITON_ENABLE_GPU 87 servers_.emplace(std::piecewise_construct,
95 msg =
"List of models for each server:\n";
96 for (
const auto& serverPset :
pset.getUntrackedParameterSetVector(
"servers")) {
102 <<
"TritonService: Not allowed to specify more than one server with same name (" << serverName <<
")";
103 auto&
server(sit->second);
105 std::unique_ptr<tc::InferenceServerGrpcClient>
client;
108 "TritonService(): unable to create inference context for " + serverName +
" (" +
server.url +
")",
112 inference::ServerMetadataResponse serverMetaResponse;
114 "TritonService(): unable to get metadata for " + serverName +
" (" +
server.url +
")",
117 <<
", version = " << serverMetaResponse.version();
120 inference::RepositoryIndexResponse repoIndexResponse;
122 "TritonService(): unable to get repository index for " + serverName +
" (" +
server.url +
")",
127 msg += serverName +
": ";
128 for (
const auto& modelIndex : repoIndexResponse.models()) {
129 const auto&
modelName = modelIndex.name();
133 auto& modelInfo(mit->second);
134 modelInfo.servers.insert(serverName);
159 <<
"TritonService: Attempt to call addModel() outside of module constructors";
179 const auto& moduleInfo(oit->second);
182 auto& modelInfo(mit->second);
183 modelInfo.modules.erase(
id);
185 if (modelInfo.modules.empty())
196 throw cms::Exception(
"MissingModel") <<
"TritonService: There are no servers that provide model " <<
model;
197 const auto& modelInfo(mit->second);
198 const auto& modelServers = modelInfo.servers;
200 auto msit = modelServers.end();
201 if (!preferred.empty()) {
202 msit = modelServers.find(preferred);
204 if (msit == modelServers.end())
205 edm::LogWarning(
"PreferredServer") <<
"Preferred server " << preferred <<
" for model " <<
model 206 <<
" not available, will choose another server";
208 const auto& serverName(msit == modelServers.end() ? *modelServers.begin() : preferred);
222 msg =
"List of models for fallback server: ";
284 printFallbackServerLog<edm::LogError>();
286 <<
"TritonService: Starting the fallback server failed with exit code " << rv;
290 const std::string& portIndicator(
"CMS_TRITON_GRPC_PORT: ");
293 if (
pos != std::string::npos) {
294 auto pos2 =
pos + portIndicator.size();
295 auto pos3 =
output.find(
'\n', pos2);
296 const auto& portNum =
output.substr(pos2, pos3 - pos2);
297 server.url +=
":" + portNum;
299 throw cms::Exception(
"FallbackFailed") <<
"TritonService: Unknown port for fallback server, log follows:\n" 326 printFallbackServerLog<edm::LogError>();
329 std::string stopMsg =
fmt::format(
"TritonService: Stopping the fallback server failed with exit code {}", rv);
338 printFallbackServerLog<edm::LogInfo>();
342 template <
typename LOG>
348 bool foundLog =
false;
349 for (
const auto&
logName : logNames) {
352 LOG(
"TritonService") <<
"TritonService: server log " <<
logName <<
"\n" <<
infile.rdbuf();
358 LOG(
"TritonService") <<
"TritonService: could not find server log " << logNames[0] <<
" in current directory or " 364 desc.addUntracked<
bool>(
"verbose",
false);
369 validator.addUntracked<
unsigned>(
"port");
370 validator.addUntracked<
bool>(
"useSsl",
false);
371 validator.addUntracked<
std::string>(
"rootCertificates",
"");
372 validator.addUntracked<
std::string>(
"privateKey",
"");
373 validator.addUntracked<
std::string>(
"certificateChain",
"");
375 desc.addVPSetUntracked(
"servers", validator, {});
void watchPostModuleConstruction(PostModuleConstruction::slot_type const &iSlot)
void addWithDefaultLabel(ParameterSetDescription const &psetDescription)
std::unordered_map< std::string, Model > models_
void watchPreallocate(Preallocate::slot_type const &iSlot)
ParameterDescriptionBase * addUntracked(U const &iLabel, T const &value)
void watchPostEndJob(PostEndJob::slot_type const &iSlot)
void notifyCallStatus(bool status) const
std::unordered_map< std::string, Model > unservedModels_
void watchPreModuleConstruction(PreModuleConstruction::slot_type const &iSlot)
void postModuleConstruction(edm::ModuleDescription const &)
void watchPreModuleDestruction(PreModuleDestruction::slot_type const &iSlot)
unsigned currentModuleId_
void preallocate(edm::service::SystemBounds const &)
Log< level::Error, false > LogError
TritonService(const edm::ParameterSet &pset, edm::ActivityRegistry &areg)
static const std::string fallbackAddress
static std::string to_string(const XMLCh *ch)
void addModel(const std::string &modelName, const std::string &path)
FallbackOpts fallbackOpts_
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
def unique(seq, keepstr=True)
void preBeginJob(edm::PathsAndConsumesOfModulesBase const &, edm::ProcessContext const &)
void printFallbackServerLog() const
def pipe(cmdline, input=None)
#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY)
void preModuleConstruction(edm::ModuleDescription const &)
std::string createGlobalIdentifier(bool binary=false)
Log< level::Info, false > LogInfo
void preModuleDestruction(edm::ModuleDescription const &)
static void fillDescriptions(edm::ConfigurationDescriptions &descriptions)
std::unordered_map< unsigned, Module > modules_
std::atomic< int > callFails_
void watchPreBeginJob(PreBeginJob::slot_type const &iSlot)
convenience function for attaching to signal
ParameterSet const & getParameterSet(ParameterSetID const &id)
Server serverInfo(const std::string &model, const std::string &preferred="") const
Log< level::Warning, false > LogWarning
if(threadIdxLocalY==0 &&threadIdxLocalX==0)
std::unordered_map< std::string, Server > servers_
static const std::string fallbackName