4 #include "classlib/iobase/InetServerSocket.h" 5 #include "classlib/iobase/LocalServerSocket.h" 6 #include "classlib/iobase/Filename.h" 7 #include "classlib/sysapi/InetSocket.h" 8 #include "classlib/utils/TimeInfo.h" 9 #include "classlib/utils/StringList.h" 10 #include "classlib/utils/StringFormat.h" 11 #include "classlib/utils/StringOps.h" 12 #include "classlib/utils/SystemError.h" 13 #include "classlib/utils/Regexp.h" 26 #define MESSAGE_SIZE_LIMIT (1 * 1024 * 1024) 27 #define SOCKET_BUF_SIZE (1 * 1024 * 1024) 29 #define MESSAGE_SIZE_LIMIT (8 * 1024 * 1024) 30 #define SOCKET_BUF_SIZE (8 * 1024 * 1024) 32 #define SOCKET_READ_SIZE (SOCKET_BUF_SIZE / 8) 33 #define SOCKET_READ_GROWTH (SOCKET_BUF_SIZE) 37 static const Regexp
s_rxmeval(
"<(.*)>(i|f|s|qr)=(.*)</\\1>");
43 return std::cout << now.format(
true,
"%Y-%m-%d %H:%M:%S.") << now.nanoformat(3, 3) <<
" " << appname_ <<
"[" << pid_
49 b->
data.insert(b->
data.end(), (
const unsigned char *)data, (
const unsigned char *)data + len);
71 for (
auto i = waiting_.begin(),
e = waiting_.end();
i !=
e;)
97 (*msg)->
next =
nullptr;
100 words[0] =
sizeof(words) + len;
101 words[1] = DQM_MSG_GET_OBJECT;
103 copydata(*msg, words,
sizeof(words));
104 copydata(*msg, name, len);
115 requestObjectData(owner, !name.empty() ? &name[0] :
nullptr, name.size());
117 waiting_.push_back(wo);
128 (*msg)->
next =
nullptr;
130 releaseFromWait(*msg, *i, o);
132 assert(i->peer->waiting > 0);
139 for (
auto i = waiting_.begin(),
e = waiting_.end();
i !=
e;)
141 releaseFromWait(
i++, o);
151 std::ostringstream qrs;
152 QReports::const_iterator qi, qe;
153 for (qi = qr.begin(), qe = qr.end(); qi != qe; ++qi) {
155 sprintf(buf,
"%d%c%n%.*g", qi->code, 0, &pos, DBL_DIG + 2, qi->qtresult);
156 qrs << buf <<
'\0' << buf + pos <<
'\0' << qi->qtname <<
'\0' << qi->algorithm <<
'\0' << qi->message <<
'\0' 165 const char *qdata = from;
195 qv.
code = atoi(qdata);
236 if (buf.Length() == buf.BufferSize())
240 Int_t
pos = buf.Length();
241 TClass *
c = buf.ReadClass();
242 buf.SetBufferOffset(pos);
244 return c ? buf.ReadObject(c) : 0;
249 DQMNet::reconstructObject(
Object &
o)
251 TBufferFile buf(TBufferFile::kRead, o.
rawdata.size(), &o.
rawdata[0], kFALSE);
271 if (! reconstructObject (o))
276 store->setCurrentFolder(*o.
dirname);
277 switch (o.
flags & DQM_PROP_TYPE_MASK)
279 case DQM_PROP_TYPE_INT:
280 obj = store->bookInt(o.
objname);
284 case DQM_PROP_TYPE_REAL:
285 obj = store->bookFloat(
name);
289 case DQM_PROP_TYPE_STRING:
293 case DQM_PROP_TYPE_TH1F:
294 obj = store->book1D(
name, dynamic_cast<TH1F *>(o.object));
297 case DQM_PROP_TYPE_TH1S:
298 obj = store->book1S(
name, dynamic_cast<TH1S *>(o.object));
301 case DQM_PROP_TYPE_TH1D:
302 obj = store->book1DD(
name, dynamic_cast<TH1D *>(o.object));
305 case DQM_PROP_TYPE_TH2F:
306 obj = store->book2D(
name, dynamic_cast<TH2F *>(o.object));
309 case DQM_PROP_TYPE_TH2S:
310 obj = store->book2S(
name, dynamic_cast<TH2S *>(o.object));
313 case DQM_PROP_TYPE_TH2D:
314 obj = store->book2DD(
name, dynamic_cast<TH2D *>(o.object));
317 case DQM_PROP_TYPE_TH3F:
318 obj = store->book3D(
name, dynamic_cast<TH3F *>(o.object));
321 case DQM_PROP_TYPE_TH3S:
322 obj = store->book3S(
name, dynamic_cast<TH3S *>(o.object));
325 case DQM_PROP_TYPE_TH3D:
326 obj = store->book3DD(
name, dynamic_cast<TH3D *>(o.object));
329 case DQM_PROP_TYPE_PROF:
330 obj = store->bookProfile(
name, dynamic_cast<TProfile *>(o.object));
333 case DQM_PROP_TYPE_PROF2D:
334 obj = store->bookProfile2D(
name, dynamic_cast<TProfile2D *>(o.object));
339 <<
"ERROR: unexpected monitor element of type " 340 << (o.
flags & DQM_PROP_TYPE_MASK) <<
" called '" 365 sendObjectToPeer(msg, *o,
true);
368 words[0] =
sizeof(words) + w.
name.size();
369 words[1] = DQM_REPLY_NONE;
370 words[2] = w.
name.size();
372 msg->
data.reserve(msg->
data.size() + words[0]);
373 copydata(msg, &words[0],
sizeof(words));
374 copydata(msg, &w.
name[0], w.
name.size());
385 if ((flags & DQM_PROP_TYPE_MASK) <= DQM_PROP_TYPE_SCALAR)
392 uint32_t datalen = objdata.size();
393 uint32_t qlen = o.
qdata.size();
398 words[0] = 9 *
sizeof(uint32_t) + namelen + datalen + qlen;
399 words[1] = DQM_REPLY_OBJECT;
401 words[3] = (o.
version >> 0) & 0xffffffff;
402 words[4] = (o.
version >> 32) & 0xffffffff;
408 msg->
data.reserve(msg->
data.size() + words[0]);
409 copydata(msg, &words[0], 9 *
sizeof(uint32_t));
413 copydata(msg,
"/", 1);
417 copydata(msg, &objdata[0], datalen);
419 copydata(msg, &o.
qdata[0], qlen);
427 memcpy(&type, data +
sizeof(uint32_t),
sizeof(type));
429 case DQM_MSG_UPDATE_ME: {
430 if (len != 2 *
sizeof(uint32_t)) {
431 logme() <<
"ERROR: corrupt 'UPDATE_ME' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
436 logme() <<
"DEBUG: received message 'UPDATE ME' from peer " << p->
peeraddr <<
", size " << len << std::endl;
442 case DQM_MSG_LIST_OBJECTS: {
444 logme() <<
"DEBUG: received message 'LIST OBJECTS' from peer " << p->
peeraddr <<
", size " << len << std::endl;
447 sendObjectListToPeer(msg,
true,
false);
451 case DQM_MSG_GET_OBJECT: {
453 logme() <<
"DEBUG: received message 'GET OBJECT' from peer " << p->
peeraddr <<
", size " << len << std::endl;
455 if (len < 3 *
sizeof(uint32_t)) {
456 logme() <<
"ERROR: corrupt 'GET IMAGE' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
461 memcpy(&namelen, data + 2 *
sizeof(uint32_t),
sizeof(namelen));
462 if (len != 3 *
sizeof(uint32_t) + namelen) {
463 logme() <<
"ERROR: corrupt 'GET OBJECT' message of length " << len <<
" from peer " << p->
peeraddr 464 <<
", expected length " << (3 *
sizeof(uint32_t)) <<
" + " << namelen << std::endl;
469 Peer *owner =
nullptr;
470 Object *o = findObject(
nullptr, name, &owner);
472 o->
lastreq = Time::current().ns();
473 if ((o->
rawdata.empty() || (o->
flags & DQM_PROP_STALE)) &&
474 (o->
flags & DQM_PROP_TYPE_MASK) > DQM_PROP_TYPE_SCALAR)
475 waitForData(p, name,
"", owner);
477 sendObjectToPeer(msg, *o,
true);
480 words[0] =
sizeof(words) + name.size();
481 words[1] = DQM_REPLY_NONE;
482 words[2] = name.size();
484 msg->
data.reserve(msg->
data.size() + words[0]);
485 copydata(msg, &words[0],
sizeof(words));
486 copydata(msg, &name[0], name.size());
491 case DQM_REPLY_LIST_BEGIN: {
492 if (len != 4 *
sizeof(uint32_t)) {
493 logme() <<
"ERROR: corrupt 'LIST BEGIN' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
499 memcpy(&flags, data + 3 *
sizeof(uint32_t),
sizeof(uint32_t));
502 logme() <<
"DEBUG: received message 'LIST BEGIN " << (flags ?
"FULL" :
"INCREMENTAL") <<
"' from " 503 << p->
peeraddr <<
", size " << len << std::endl;
516 case DQM_REPLY_LIST_END: {
517 if (len != 4 *
sizeof(uint32_t)) {
518 logme() <<
"ERROR: corrupt 'LIST END' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
524 memcpy(&flags, data + 3 *
sizeof(uint32_t),
sizeof(uint32_t));
534 logme() <<
"DEBUG: received message 'LIST END " << (flags ?
"FULL" :
"INCREMENTAL") <<
"' from " << p->
peeraddr 535 <<
", size " << len << std::endl;
544 case DQM_REPLY_OBJECT: {
546 if (len <
sizeof(words)) {
547 logme() <<
"ERROR: corrupt 'OBJECT' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
551 memcpy(&words[0], data,
sizeof(words));
552 uint32_t &namelen = words[6];
553 uint32_t &datalen = words[7];
554 uint32_t &qlen = words[8];
556 if (len !=
sizeof(words) + namelen + datalen + qlen) {
557 logme() <<
"ERROR: corrupt 'OBJECT' message of length " << len <<
" from peer " << p->
peeraddr 558 <<
", expected length " <<
sizeof(words) <<
" + " << namelen <<
" + " << datalen <<
" + " << qlen
563 unsigned char *namedata = data +
sizeof(words);
564 unsigned char *objdata = namedata + namelen;
565 unsigned char *qdata = objdata + datalen;
566 unsigned char *enddata = qdata + qlen;
568 assert(enddata == data + len);
571 logme() <<
"DEBUG: received message 'OBJECT " << name <<
"' from " << p->
peeraddr <<
", size " << len
578 Object *o = findObject(p, name);
580 o = makeObject(p, name);
582 o->
flags = words[2] | DQM_PROP_NEW | DQM_PROP_RECEIVED;
587 if ((o->
flags & DQM_PROP_TYPE_MASK) <= DQM_PROP_TYPE_SCALAR) {
590 }
else if (datalen) {
593 }
else if (!o->
rawdata.empty())
594 o->
flags |= DQM_PROP_STALE;
595 o->
qdata.insert(o->
qdata.end(), qdata, enddata);
599 if (o->
lastreq && !datalen && (o->
flags & DQM_PROP_TYPE_MASK) > DQM_PROP_TYPE_SCALAR)
600 requestObjectData(p, (namelen ? &name[0] :
nullptr), namelen);
604 releaseWaiters(name, o);
608 case DQM_REPLY_NONE: {
610 if (len <
sizeof(words)) {
611 logme() <<
"ERROR: corrupt 'NONE' message of length " << len <<
" from peer " << p->
peeraddr << std::endl;
615 memcpy(&words[0], data,
sizeof(words));
616 uint32_t &namelen = words[2];
618 if (len !=
sizeof(words) + namelen) {
619 logme() <<
"ERROR: corrupt 'NONE' message of length " << len <<
" from peer " << p->
peeraddr 620 <<
", expected length " <<
sizeof(words) <<
" + " << namelen << std::endl;
624 unsigned char *namedata = data +
sizeof(words);
628 logme() <<
"DEBUG: received message 'NONE " << name <<
"' from " << p->
peeraddr <<
", size " << len
635 if (
Object *o = findObject(p, name)) {
636 o->
flags |= DQM_PROP_DEAD;
641 releaseWaiters(name,
nullptr);
646 logme() <<
"ERROR: unrecognised message of length " << len <<
" and type " << type <<
" from peer " << p->
peeraddr 656 assert(getPeer(dynamic_cast<Socket *>(ev->source)) == p);
664 logme() <<
"WARNING: connection to the DQM server at " << p->
peeraddr 665 <<
" lost (will attempt to reconnect in 15 seconds)\n";
666 losePeer(
nullptr, p, ev);
668 losePeer(
"WARNING: lost peer connection ", p, ev);
678 const void *
data = (len ? (
const void *)&
b->data[p->
sendpos] : (
const void *)&
data);
682 done = (len ? ev->source->write(data, len) : 0);
684 logme() <<
"DEBUG: sent " << done <<
" bytes to peer " << p->
peeraddr << std::endl;
686 losePeer(
"WARNING: unable to write to peer ", p, ev, &e);
708 if (ev->events &
IORead) {
716 if ((sz = ev->source->read(&buf[0], buf.size()))) {
718 logme() <<
"DEBUG: received " << sz <<
" bytes from peer " << p->
peeraddr << std::endl;
720 if (data.capacity() < data.size() + sz)
722 data.insert(data.end(), &buf[0], &buf[0] + sz);
724 while (sz ==
sizeof(buf));
726 auto *
next =
dynamic_cast<SystemError *
>(e.next());
727 if (
next &&
next->portable() == SysErr::ErrTryAgain)
731 losePeer(
"WARNING: failed to read from peer ", p, ev, &e);
740 while (data.size() - consumed >=
sizeof(uint32_t) && p->
waiting < MAX_PEER_WAITREQS) {
742 memcpy(&msglen, &data[0] + consumed,
sizeof(msglen));
745 losePeer(
"WARNING: excessively large message from ", p, ev);
750 if (data.size() - consumed >= msglen) {
752 if (msglen < 2 *
sizeof(uint32_t)) {
753 logme() <<
"ERROR: corrupt peer message of length " << msglen <<
" from peer " << p->
peeraddr << std::endl;
759 valid = onMessage(&msg, p, &data[0] + consumed, msglen);
762 if (!msg.
data.empty()) {
765 prev = &(*prev)->
next;
768 (*prev)->
next =
nullptr;
774 losePeer(
"WARNING: data stream error with ", p, ev);
784 data.erase(data.begin(), data.begin() + consumed);
804 assert(ev->source == server_);
807 Socket *
s = server_->accept();
809 assert(!s->isBlocking());
813 Peer *
p = createPeer(s);
815 if (
auto *inet = dynamic_cast<InetSocket *>(s)) {
816 InetAddress peeraddr = inet->peername();
817 InetAddress myaddr = inet->sockname();
818 p->
peeraddr = StringFormat(
"%1:%2").arg(peeraddr.hostname()).
arg(peeraddr.port()).
value();
819 localaddr = StringFormat(
"%1:%2").arg(myaddr.hostname()).
arg(myaddr.port()).
value();
820 }
else if (
auto *
local = dynamic_cast<LocalSocket *>(s)) {
822 localaddr =
local->sockname().path();
831 logme() <<
"INFO: new peer " << p->
peeraddr <<
" is now connected to " << localaddr << std::endl;
851 unsigned char buf[1024];
852 while ((sz = ev->source->read(buf,
sizeof(buf))))
855 auto *
next =
dynamic_cast<SystemError *
>(e.next());
856 if (
next &&
next->portable() == SysErr::ErrTryAgain)
859 logme() <<
"WARNING: error reading from notification pipe: " << e.explain() << std::endl;
876 unsigned oldmask = p->
mask;
883 if (debug_ && oldmask != p->
mask)
884 logme() <<
"DEBUG: updating mask for " << p->
peeraddr <<
" to " << p->
mask <<
" from " << oldmask << std::endl;
891 logme() <<
"INFO: connection closed to " << p->
peeraddr << std::endl;
892 losePeer(
nullptr, p,
nullptr);
899 appname_(appname.
empty() ?
"DQMNet" : appname.c_str()),
902 version_(
Time::current()),
903 communicate_((pthread_t)-1),
906 waitStale_(0, 0, 0, 0, 500000000 ),
907 waitMax_(0, 0, 0, 5 , 0),
945 logme() <<
"ERROR: DQM server was already started.\n";
950 InetAddress
addr(
"0.0.0.0", port);
951 auto *
s =
new InetSocket(SOCK_STREAM, 0, addr.family());
956 s->setBlocking(
false);
961 logme() <<
"ERROR: Failed to start server at port " << port <<
": " << e.explain() << std::endl;
964 "Failed to start server at port" 967 e.explain().c_str());
970 logme() <<
"INFO: DQM server started at port " << port << std::endl;
978 logme() <<
"ERROR: DQM server was already started.\n";
983 server_ =
new LocalServerSocket(path, 10);
991 logme() <<
"ERROR: Failed to start server at path " << path <<
": " << e.explain() << std::endl;
994 "Failed to start server at path" 997 e.explain().c_str());
1000 logme() <<
"INFO: DQM server started at path " << path << std::endl;
1048 pthread_sigmask(SIG_BLOCK, &sigs,
nullptr);
1056 pthread_mutex_lock(&
lock_);
1062 pthread_mutex_unlock(&
lock_);
1070 logme() <<
"ERROR: DQM networking thread has already been started\n";
1074 pthread_mutex_init(&
lock_,
nullptr);
1086 for (
auto ap : automatic) {
1090 if (!ap->host.empty() && !ap->peer && (now = Time::current()) > ap->next) {
1091 ap->
next = now + TimeSpan(0, 0, 0, 15 , 0);
1092 InetSocket *
s =
nullptr;
1094 InetAddress
addr(ap->host.c_str(), ap->port);
1095 s =
new InetSocket(SOCK_STREAM, 0,
addr.family());
1096 s->setBlocking(
false);
1101 auto *sys =
dynamic_cast<SystemError *
>(e.next());
1102 if (!sys || sys->portable() != SysErr::ErrOperationInProgress) {
1119 InetAddress peeraddr = ((InetSocket *)s)->peername();
1120 InetAddress myaddr = ((InetSocket *)s)->sockname();
1121 p->
peeraddr = StringFormat(
"%1:%2").arg(peeraddr.hostname()).
arg(peeraddr.port()).
value();
1136 logme() <<
"INFO: now connected to " << p->
peeraddr <<
" from " << myaddr.hostname() <<
":" << myaddr.port()
1144 now = Time::current();
1149 if (
flush_ && now > nextFlush) {
1151 nextFlush = now + TimeSpan(0, 0, 0, 15 , 0);
1170 if (
i->time < waitold) {
1171 logme() <<
"WARNING: source not responding in " << (waitMax_.ns() * 1
e-9) <<
"s to retrieval, releasing '" 1172 <<
i->name <<
"' from wait, have " << (o ? o->
rawdata.size() : 0) <<
" data available\n";
1175 logme() <<
"WARNING: source not responding in " << (waitStale_.ns() * 1
e-9) <<
"s to update, releasing '" 1176 <<
i->name <<
"' from wait, have " << o->
rawdata.size() <<
" data available\n";
1193 wakeup_.sink()->write(&byte, 1);
1210 std::pair<ObjectMap::iterator, bool>
info(
local_->objs.insert(o));
1215 auto &old =
const_cast<Object &
>(*
info.first);
1232 ObjectMap::iterator
i,
e;
1233 for (i =
local_->objs.begin(), e =
local_->objs.end(); i !=
e;) {
1235 path.reserve(i->dirname->size() + i->objname.size() + 2);
1236 path += *i->dirname;
1241 if (!known.count(path))
1242 ++removed,
local_->objs.erase(i++);
edm::ErrorSummaryEntry Error
static void unpackQualityData(QReports &qr, uint32_t &flags, const char *from)
DQMNet(const std::string &appname="")
static const uint32_t DQM_PROP_REPORT_WARN
static const uint32_t DQM_MSG_UPDATE_ME
bool onLocalNotify(lat::IOSelectEvent *ev)
virtual void sendObjectListToPeers(bool all)=0
void lock()
Acquire a lock on the DQM net layer.
#define MESSAGE_SIZE_LIMIT
TObject * extractNextObject(TBufferFile &buf)
static void discard(Bucket *&b)
virtual void updatePeerMasks()=0
virtual Peer * createPeer(lat::Socket *s)=0
void releaseWaiters(const std::string &name, Object *o)
void staleObjectWaitLimit(lat::TimeSpan time)
const std::string * dirname
static void * communicate(void *obj)
void shutdown()
Stop the network layer and wait it to finish.
dqm::dqmstoreimpl::DQMStore DQMStore
void waitForData(Peer *p, const std::string &name, const std::string &info, Peer *owner)
virtual bool shouldStop()
void requestObjectData(Peer *p, const char *name, size_t len)
Queue an object request to the data server.
void swap(edm::DataFrameContainer &lhs, edm::DataFrameContainer &rhs)
virtual Object * findObject(Peer *p, const std::string &name, Peer **owner=nullptr)=0
virtual bool onMessage(Bucket *msg, Peer *p, unsigned char *data, size_t len)
bool onPeerData(lat::IOSelectEvent *ev, Peer *p)
Handle communication to a particular client.
static const uint32_t DQM_PROP_REPORT_ERROR
#define SOCKET_READ_GROWTH
static const uint32_t DQM_PROP_REPORT_OTHER
virtual void releaseFromWait(Bucket *msg, WaitObject &w, Object *o)
Peer * createPeer(lat::Socket *s) override
virtual void sendObjectToPeer(Bucket *msg, Object &o, bool data)
unsigned long long uint64_t
bool removeLocalExcept(const std::set< std::string > &known)
bool onPeerConnect(lat::IOSelectEvent *ev)
void startLocalServer(int port)
static void packQualityData(std::string &into, const QReports &qr)
static const uint32_t DQM_PROP_STALE
static const Regexp s_rxmeval("<(.*)>(i|f|s|qr)=(.*)</\\1>")
void losePeer(const char *reason, Peer *peer, lat::IOSelectEvent *event, lat::Error *err=nullptr)
char data[epos_bytes_allocation]
void unlock()
Release the lock on the DQM net layer.
void listenToCollector(const std::string &host, int port)
static const int STATUS_OK
static const uint32_t DQM_MSG_LIST_OBJECTS
static void copydata(Bucket *b, const void *data, size_t len)
void updateLocalObject(Object &o)
std::vector< QValue > QReports
void reserveLocalSpace(uint32_t size)
Give a hint of how much capacity to allocate for local objects.
void updateToCollector(const std::string &host, int port)
std::vector< unsigned char > DataBlob
DQMBasicNet(const std::string &appname="")
void raiseDQMError(const char *context, const char *fmt,...)