4 #include "classlib/iobase/InetServerSocket.h" 5 #include "classlib/iobase/LocalServerSocket.h" 6 #include "classlib/iobase/Filename.h" 7 #include "classlib/sysapi/InetSocket.h" 8 #include "classlib/utils/TimeInfo.h" 9 #include "classlib/utils/StringList.h" 10 #include "classlib/utils/StringFormat.h" 11 #include "classlib/utils/StringOps.h" 12 #include "classlib/utils/SystemError.h" 13 #include "classlib/utils/Regexp.h" 26 # define MESSAGE_SIZE_LIMIT (1*1024*1024) 27 # define SOCKET_BUF_SIZE (1*1024*1024) 29 # define MESSAGE_SIZE_LIMIT (8*1024*1024) 30 # define SOCKET_BUF_SIZE (8*1024*1024) 32 #define SOCKET_READ_SIZE (SOCKET_BUF_SIZE/8) 33 #define SOCKET_READ_GROWTH (SOCKET_BUF_SIZE) 37 static const Regexp
s_rxmeval(
"<(.*)>(i|f|s|qr)=(.*)</\\1>");
44 Time
now = Time::current();
46 << now.format(
true,
"%Y-%m-%d %H:%M:%S.")
47 << now.nanoformat(3, 3)
48 <<
" " << appname_ <<
"[" << pid_ <<
"]: ";
56 (
const unsigned char *)data,
57 (
const unsigned char *)data + len);
85 << (err ?
"; error was: " + err->explain() :
std::string(
""))
90 for (WaitList::iterator
i = waiting_.begin(),
e = waiting_.end();
i !=
e; )
118 (*msg)->
next =
nullptr;
121 words[0] =
sizeof(words) + len;
122 words[1] = DQM_MSG_GET_OBJECT;
124 copydata(*msg, words,
sizeof(words));
125 copydata(*msg, name, len);
138 requestObjectData(owner, !name.empty() ? &name[0] :
nullptr, name.size());
140 waiting_.push_back(wo);
153 (*msg)->
next =
nullptr;
155 releaseFromWait(*msg, *i, o);
157 assert(i->peer->waiting > 0);
166 for (WaitList::iterator
i = waiting_.begin(),
e = waiting_.end();
i !=
e; )
168 releaseFromWait(
i++, o);
180 std::ostringstream qrs;
181 QReports::const_iterator qi, qe;
182 for (qi = qr.begin(), qe = qr.end(); qi != qe; ++qi)
185 sprintf(buf,
"%d%c%n%.*g", qi->code, 0, &pos, DBL_DIG+2, qi->qtresult);
188 << qi->qtname <<
'\0' 189 << qi->algorithm <<
'\0' 190 << qi->message <<
'\0' 201 const char *qdata = from;
208 while (*qdata) ++qdata;
210 while (*qdata) ++qdata;
212 while (*qdata) ++qdata;
214 while (*qdata) ++qdata;
216 while (*qdata) ++qdata;
228 qv.
code = atoi(qdata);
229 while (*qdata) ++qdata;
246 while (*qdata) ++qdata;
249 while (*qdata) ++qdata;
252 while (*qdata) ++qdata;
255 while (*qdata) ++qdata;
265 if (buf.Length() == buf.BufferSize())
269 Int_t
pos = buf.Length();
270 TClass *
c = buf.ReadClass();
271 buf.SetBufferOffset(pos);
273 return c ? buf.ReadObject(c) : 0;
278 DQMNet::reconstructObject(
Object &
o)
280 TBufferFile buf(TBufferFile::kRead, o.
rawdata.size(), &o.
rawdata[0], kFALSE);
300 if (! reconstructObject (o))
305 store->setCurrentFolder(*o.
dirname);
306 switch (o.
flags & DQM_PROP_TYPE_MASK)
308 case DQM_PROP_TYPE_INT:
309 obj = store->bookInt(o.
objname);
313 case DQM_PROP_TYPE_REAL:
314 obj = store->bookFloat(
name);
318 case DQM_PROP_TYPE_STRING:
322 case DQM_PROP_TYPE_TH1F:
323 obj = store->book1D(
name, dynamic_cast<TH1F *>(o.object));
326 case DQM_PROP_TYPE_TH1S:
327 obj = store->book1S(
name, dynamic_cast<TH1S *>(o.object));
330 case DQM_PROP_TYPE_TH1D:
331 obj = store->book1DD(
name, dynamic_cast<TH1D *>(o.object));
334 case DQM_PROP_TYPE_TH2F:
335 obj = store->book2D(
name, dynamic_cast<TH2F *>(o.object));
338 case DQM_PROP_TYPE_TH2S:
339 obj = store->book2S(
name, dynamic_cast<TH2S *>(o.object));
342 case DQM_PROP_TYPE_TH2D:
343 obj = store->book2DD(
name, dynamic_cast<TH2D *>(o.object));
346 case DQM_PROP_TYPE_TH3F:
347 obj = store->book3D(
name, dynamic_cast<TH3F *>(o.object));
350 case DQM_PROP_TYPE_TH3S:
351 obj = store->book3S(
name, dynamic_cast<TH3S *>(o.object));
354 case DQM_PROP_TYPE_TH3D:
355 obj = store->book3DD(
name, dynamic_cast<TH3D *>(o.object));
358 case DQM_PROP_TYPE_PROF:
359 obj = store->bookProfile(
name, dynamic_cast<TProfile *>(o.object));
362 case DQM_PROP_TYPE_PROF2D:
363 obj = store->bookProfile2D(
name, dynamic_cast<TProfile2D *>(o.object));
368 <<
"ERROR: unexpected monitor element of type " 369 << (o.
flags & DQM_PROP_TYPE_MASK) <<
" called '" 400 sendObjectToPeer(msg, *o,
true);
404 words[0] =
sizeof(words) + w.
name.size();
405 words[1] = DQM_REPLY_NONE;
406 words[2] = w.
name.size();
408 msg->
data.reserve(msg->
data.size() + words[0]);
409 copydata(msg, &words[0],
sizeof(words));
410 copydata(msg, &w.
name[0], w.
name.size());
423 if ((flags & DQM_PROP_TYPE_MASK) <= DQM_PROP_TYPE_SCALAR)
424 objdata.insert(objdata.end(),
428 objdata.insert(objdata.end(),
434 uint32_t datalen = objdata.size();
435 uint32_t qlen = o.
qdata.size();
440 words[0] = 9*
sizeof(uint32_t) + namelen + datalen + qlen;
441 words[1] = DQM_REPLY_OBJECT;
443 words[3] = (o.
version >> 0 ) & 0xffffffff;
444 words[4] = (o.
version >> 32) & 0xffffffff;
450 msg->
data.reserve(msg->
data.size() + words[0]);
451 copydata(msg, &words[0], 9*
sizeof(uint32_t));
456 copydata(msg,
"/", 1);
460 copydata(msg, &objdata[0], datalen);
462 copydata(msg, &o.
qdata[0], qlen);
472 memcpy (&type, data +
sizeof(uint32_t),
sizeof (type));
475 case DQM_MSG_UPDATE_ME:
477 if (len != 2*
sizeof(uint32_t))
480 <<
"ERROR: corrupt 'UPDATE_ME' message of length " << len
481 <<
" from peer " << p->
peeraddr << std::endl;
487 <<
"DEBUG: received message 'UPDATE ME' from peer " 488 << p->
peeraddr <<
", size " << len << std::endl;
494 case DQM_MSG_LIST_OBJECTS:
498 <<
"DEBUG: received message 'LIST OBJECTS' from peer " 499 << p->
peeraddr <<
", size " << len << std::endl;
502 sendObjectListToPeer(msg,
true,
false);
506 case DQM_MSG_GET_OBJECT:
510 <<
"DEBUG: received message 'GET OBJECT' from peer " 511 << p->
peeraddr <<
", size " << len << std::endl;
513 if (len < 3*
sizeof(uint32_t))
516 <<
"ERROR: corrupt 'GET IMAGE' message of length " << len
517 <<
" from peer " << p->
peeraddr << std::endl;
522 memcpy (&namelen, data + 2*
sizeof(uint32_t),
sizeof(namelen));
523 if (len != 3*
sizeof(uint32_t) + namelen)
526 <<
"ERROR: corrupt 'GET OBJECT' message of length " << len
528 <<
", expected length " << (3*
sizeof(uint32_t))
529 <<
" + " << namelen << std::endl;
534 Peer *owner =
nullptr;
535 Object *o = findObject(
nullptr, name, &owner);
538 o->
lastreq = Time::current().ns();
540 && (o->
flags & DQM_PROP_TYPE_MASK) > DQM_PROP_TYPE_SCALAR)
541 waitForData(p, name,
"", owner);
543 sendObjectToPeer(msg, *o,
true);
548 words[0] =
sizeof(words) + name.size();
549 words[1] = DQM_REPLY_NONE;
550 words[2] = name.size();
552 msg->
data.reserve(msg->
data.size() + words[0]);
553 copydata(msg, &words[0],
sizeof(words));
554 copydata(msg, &name[0], name.size());
559 case DQM_REPLY_LIST_BEGIN:
561 if (len != 4*
sizeof(uint32_t))
564 <<
"ERROR: corrupt 'LIST BEGIN' message of length " << len
565 <<
" from peer " << p->
peeraddr << std::endl;
571 memcpy(&flags, data + 3*
sizeof(uint32_t),
sizeof(uint32_t));
575 <<
"DEBUG: received message 'LIST BEGIN " 576 << (flags ?
"FULL" :
"INCREMENTAL")
578 <<
", size " << len << std::endl;
591 case DQM_REPLY_LIST_END:
593 if (len != 4*
sizeof(uint32_t))
596 <<
"ERROR: corrupt 'LIST END' message of length " << len
597 <<
" from peer " << p->
peeraddr << std::endl;
603 memcpy(&flags, data + 3*
sizeof(uint32_t),
sizeof(uint32_t));
614 <<
"DEBUG: received message 'LIST END " 615 << (flags ?
"FULL" :
"INCREMENTAL")
617 <<
", size " << len << std::endl;
626 case DQM_REPLY_OBJECT:
629 if (len <
sizeof(words))
632 <<
"ERROR: corrupt 'OBJECT' message of length " << len
633 <<
" from peer " << p->
peeraddr << std::endl;
637 memcpy (&words[0], data,
sizeof(words));
638 uint32_t &namelen = words[6];
639 uint32_t &datalen = words[7];
640 uint32_t &qlen = words[8];
642 if (len !=
sizeof(words) + namelen + datalen + qlen)
645 <<
"ERROR: corrupt 'OBJECT' message of length " << len
647 <<
", expected length " <<
sizeof(words)
655 unsigned char *namedata = data +
sizeof(words);
656 unsigned char *objdata = namedata + namelen;
657 unsigned char *qdata = objdata + datalen;
658 unsigned char *enddata = qdata + qlen;
660 assert (enddata == data + len);
664 <<
"DEBUG: received message 'OBJECT " << name
666 <<
", size " << len << std::endl;
672 Object *o = findObject(p, name);
674 o = makeObject(p, name);
676 o->
flags = words[2] | DQM_PROP_NEW | DQM_PROP_RECEIVED;
681 if ((o->
flags & DQM_PROP_TYPE_MASK) <= DQM_PROP_TYPE_SCALAR)
692 o->
flags |= DQM_PROP_STALE;
693 o->
qdata.insert(o->
qdata.end(), qdata, enddata);
699 && (o->
flags & DQM_PROP_TYPE_MASK) > DQM_PROP_TYPE_SCALAR)
700 requestObjectData(p, (namelen ? &name[0] :
nullptr), namelen);
704 releaseWaiters(name, o);
711 if (len <
sizeof(words))
714 <<
"ERROR: corrupt 'NONE' message of length " << len
715 <<
" from peer " << p->
peeraddr << std::endl;
719 memcpy (&words[0], data,
sizeof(words));
720 uint32_t &namelen = words[2];
722 if (len !=
sizeof(words) + namelen)
725 <<
"ERROR: corrupt 'NONE' message of length " << len
727 <<
", expected length " <<
sizeof(words)
728 <<
" + " << namelen << std::endl;
732 unsigned char *namedata = data +
sizeof(words);
737 <<
"DEBUG: received message 'NONE " << name
739 <<
", size " << len << std::endl;
745 if (
Object *o = findObject(p, name))
747 o->
flags |= DQM_PROP_DEAD;
752 releaseWaiters(name,
nullptr);
758 <<
"ERROR: unrecognised message of length " << len
759 <<
" and type " << type <<
" from peer " << p->
peeraddr 771 assert (getPeer(dynamic_cast<Socket *> (ev->source)) == p);
782 <<
"WARNING: connection to the DQM server at " << p->
peeraddr 783 <<
" lost (will attempt to reconnect in 15 seconds)\n";
784 losePeer(
nullptr, p, ev);
787 losePeer(
"WARNING: lost peer connection ", p, ev);
799 const void *
data = (len ? (
const void *)&
b->data[p->
sendpos]
800 : (
const void *)&
data);
805 done = (len ? ev->source->write (data, len) : 0);
808 <<
"DEBUG: sent " << done <<
" bytes to peer " 813 losePeer(
"WARNING: unable to write to peer ", p, ev, &e);
846 if ((sz = ev->source->read(&buf[0], buf.size())))
850 <<
"DEBUG: received " << sz <<
" bytes from peer " 853 if (data.capacity () < data.size () + sz)
855 data.insert (data.end(), &buf[0], &buf[0] + sz);
857 while (sz ==
sizeof (buf));
861 SystemError *
next =
dynamic_cast<SystemError *
>(e.next());
862 if (next && next->portable() == SysErr::ErrTryAgain)
867 losePeer(
"WARNING: failed to read from peer ", p, ev, &e);
876 while (data.size()-consumed >=
sizeof(uint32_t)
877 && p->
waiting < MAX_PEER_WAITREQS)
880 memcpy (&msglen, &data[0]+consumed,
sizeof(msglen));
884 losePeer(
"WARNING: excessively large message from ", p, ev);
889 if (data.size()-consumed >= msglen)
892 if (msglen < 2*
sizeof(uint32_t))
895 <<
"ERROR: corrupt peer message of length " << msglen
896 <<
" from peer " << p->
peeraddr << std::endl;
904 valid = onMessage(&msg, p, &data[0]+consumed, msglen);
907 if (! msg.
data.empty())
911 prev = &(*prev)->
next;
914 (*prev)->
next =
nullptr;
921 losePeer(
"WARNING: data stream error with ", p, ev);
932 data.erase(data.begin(), data.begin()+consumed);
954 assert (ev->source == server_);
957 Socket *
s = server_->accept();
959 assert (! s->isBlocking());
963 Peer *
p = createPeer(s);
965 if (InetSocket *inet = dynamic_cast<InetSocket *>(s))
967 InetAddress peeraddr = inet->peername();
968 InetAddress myaddr = inet->sockname();
970 .arg(peeraddr.hostname())
972 localaddr = StringFormat(
"%1:%2")
973 .arg(myaddr.hostname())
976 else if (LocalSocket *
local = dynamic_cast<LocalSocket *>(s))
979 localaddr =
local->sockname().path();
990 <<
"INFO: new peer " << p->
peeraddr <<
" is now connected to " 991 << localaddr << std::endl;
1014 unsigned char buf [1024];
1015 while ((sz = ev->source->read(buf,
sizeof(buf))))
1020 SystemError *
next =
dynamic_cast<SystemError *
>(e.next());
1021 if (next && next->portable() == SysErr::ErrTryAgain)
1025 <<
"WARNING: error reading from notification pipe: " 1026 << e.explain() << std::endl;
1045 unsigned oldmask = p->
mask;
1052 if (debug_ && oldmask != p->
mask)
1054 <<
"DEBUG: updating mask for " << p->
peeraddr <<
" to " 1055 << p->
mask <<
" from " << oldmask << std::endl;
1061 assert (! p->
sendq);
1063 logme() <<
"INFO: connection closed to " << p->
peeraddr << std::endl;
1064 losePeer(
nullptr, p,
nullptr);
1071 appname_ (appname.
empty() ?
"DQMNet" : appname.c_str()),
1074 version_ (Time::current()),
1075 communicate_ ((pthread_t) -1),
1078 waitStale_ (0, 0, 0, 0, 500000000 ),
1079 waitMax_ (0, 0, 0, 5 , 0),
1134 logme() <<
"ERROR: DQM server was already started.\n";
1140 InetAddress addr(
"0.0.0.0", port);
1141 InetSocket *
s =
new InetSocket(SOCK_STREAM, 0, addr.family());
1146 s->setBlocking(
false);
1154 <<
"ERROR: Failed to start server at port " << port <<
": " 1155 << e.explain() << std::endl;
1157 raiseDQMError(
"DQMNet::startLocalServer",
"Failed to start server at port" 1158 " %d: %s", port, e.explain().c_str());
1161 logme() <<
"INFO: DQM server started at port " << port << std::endl;
1172 logme() <<
"ERROR: DQM server was already started.\n";
1178 server_ =
new LocalServerSocket(path, 10);
1189 <<
"ERROR: Failed to start server at path " << path <<
": " 1190 << e.explain() << std::endl;
1192 raiseDQMError(
"DQMNet::startLocalServer",
"Failed to start server at path" 1193 " %s: %s", path, e.explain().c_str());
1196 logme() <<
"INFO: DQM server started at path " << path << std::endl;
1208 <<
"ERROR: Already updating another collector at " 1227 <<
"ERROR: Already receiving data from another collector at " 1255 pthread_sigmask (SIG_BLOCK, &sigs,
nullptr);
1265 pthread_mutex_lock(&
lock_);
1273 pthread_mutex_unlock(&
lock_);
1285 <<
"ERROR: DQM networking thread has already been started\n";
1289 pthread_mutex_init(&
lock_,
nullptr);
1304 for (
int i = 0;
i < 2; ++
i)
1311 if (! ap->
host.empty()
1313 && (now = Time::current()) > ap->
next)
1315 ap->
next = now + TimeSpan(0, 0, 0, 15 , 0);
1316 InetSocket *
s =
nullptr;
1319 InetAddress addr(ap->
host.c_str(), ap->
port);
1320 s =
new InetSocket (SOCK_STREAM, 0, addr.family());
1321 s->setBlocking(
false);
1328 SystemError *sys =
dynamic_cast<SystemError *
>(e.next());
1329 if (! sys || sys->portable() != SysErr::ErrOperationInProgress)
1348 InetAddress peeraddr = ((InetSocket *) s)->peername();
1349 InetAddress myaddr = ((InetSocket *) s)->sockname();
1350 p->
peeraddr = StringFormat(
"%1:%2")
1351 .arg(peeraddr.hostname())
1370 <<
"INFO: now connected to " << p->
peeraddr <<
" from " 1371 << myaddr.hostname() <<
":" << myaddr.port() << std::endl;
1378 now = Time::current();
1383 if (
flush_ && now > nextFlush)
1386 nextFlush = now + TimeSpan(0, 0, 0, 15 , 0);
1406 if (
i->time < waitold)
1409 <<
"WARNING: source not responding in " << (waitMax_.ns() * 1
e-9)
1410 <<
"s to retrieval, releasing '" <<
i->name <<
"' from wait, have " 1411 << (o ? o->
rawdata.size() : 0) <<
" data available\n";
1417 <<
"WARNING: source not responding in " << (waitStale_.ns() * 1
e-9)
1418 <<
"s to update, releasing '" <<
i->name <<
"' from wait, have " 1419 << o->
rawdata.size() <<
" data available\n";
1438 wakeup_.sink()->write(&byte, 1);
1454 local_->objs.resize(size);
1463 std::pair<ObjectMap::iterator, bool>
info(
local_->objs.insert(o));
1488 ObjectMap::iterator
i,
e;
1489 for (i =
local_->objs.begin(), e =
local_->objs.end(); i !=
e; )
1492 path.reserve(i->dirname->size() + i->objname.size() + 2);
1493 path += *i->dirname;
1498 if (! known.count(path))
1499 ++removed,
local_->objs.erase(i++);
static void unpackQualityData(QReports &qr, uint32_t &flags, const char *from)
DQMNet(const std::string &appname="")
static const uint32_t DQM_PROP_REPORT_WARN
static const uint32_t DQM_MSG_UPDATE_ME
virtual bool shouldStop(void)
void shutdown(void)
Stop the network layer and wait it to finish.
void sendLocalChanges(void)
bool onLocalNotify(lat::IOSelectEvent *ev)
virtual void sendObjectListToPeers(bool all)=0
#define MESSAGE_SIZE_LIMIT
TObject * extractNextObject(TBufferFile &buf)
static void discard(Bucket *&b)
virtual Peer * createPeer(lat::Socket *s)=0
void releaseWaiters(const std::string &name, Object *o)
std::vector< Variable::Flags > flags
void staleObjectWaitLimit(lat::TimeSpan time)
const std::string * dirname
void lock(void)
Acquire a lock on the DQM net layer.
static void * communicate(void *obj)
void waitForData(Peer *p, const std::string &name, const std::string &info, Peer *owner)
void requestObjectData(Peer *p, const char *name, size_t len)
Queue an object request to the data server.
void swap(edm::DataFrameContainer &lhs, edm::DataFrameContainer &rhs)
virtual Object * findObject(Peer *p, const std::string &name, Peer **owner=nullptr)=0
virtual bool onMessage(Bucket *msg, Peer *p, unsigned char *data, size_t len)
std::vector< unsigned char > DataBlob
std::ostream & logme(void)
bool onPeerData(lat::IOSelectEvent *ev, Peer *p)
Handle communication to a particular client.
static const uint32_t DQM_PROP_REPORT_ERROR
#define SOCKET_READ_GROWTH
static const uint32_t DQM_PROP_REPORT_OTHER
virtual void releaseFromWait(Bucket *msg, WaitObject &w, Object *o)
Peer * createPeer(lat::Socket *s) override
void unlock(void)
Release the lock on the DQM net layer.
virtual void sendObjectToPeer(Bucket *msg, Object &o, bool data)
unsigned long long uint64_t
bool removeLocalExcept(const std::set< std::string > &known)
bool onPeerConnect(lat::IOSelectEvent *ev)
void startLocalServer(int port)
static void packQualityData(std::string &into, const QReports &qr)
static const uint32_t DQM_PROP_STALE
virtual void updatePeerMasks(void)=0
static const Regexp s_rxmeval("<(.*)>(i|f|s|qr)=(.*)</\\1>")
void losePeer(const char *reason, Peer *peer, lat::IOSelectEvent *event, lat::Error *err=nullptr)
char data[epos_bytes_allocation]
void listenToCollector(const std::string &host, int port)
static const int STATUS_OK
static const uint32_t DQM_MSG_LIST_OBJECTS
static void copydata(Bucket *b, const void *data, size_t len)
void updateLocalObject(Object &o)
std::vector< QValue > QReports
void reserveLocalSpace(uint32_t size)
Give a hint of how much capacity to allocate for local objects.
void updateToCollector(const std::string &host, int port)
DQMBasicNet(const std::string &appname="")
void raiseDQMError(const char *context, const char *fmt,...)