7 #include "XrdCl/XrdClFile.hh"
8 #include "XrdCl/XrdClDefaultEnv.hh"
9 #include "XrdCl/XrdClFileSystem.hh"
21 #include "Utilities/XrdAdaptor/src/XrdHostHandler.hh"
23 #define XRD_CL_MAX_CHUNK 512 * 1024
25 #define XRD_ADAPTOR_SHORT_OPEN_DELAY 5
27 #ifdef XRD_FAKE_OPEN_PROBE
28 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 100
29 #define XRD_ADAPTOR_LONG_OPEN_DELAY 20
31 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 0
33 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 10
34 #define XRD_ADAPTOR_LONG_OPEN_DELAY 2 * 60
35 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 100
38 #define XRD_ADAPTOR_CHUNK_THRESHOLD 1000
41 #include <mach/clock.h>
42 #include <mach/mach.h>
43 #define GET_CLOCK_MONOTONIC(ts) \
45 clock_serv_t cclock; \
46 mach_timespec_t mts; \
47 host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); \
48 clock_get_time(cclock, &mts); \
49 mach_port_deallocate(mach_task_self(), cclock); \
50 ts.tv_sec = mts.tv_sec; \
51 ts.tv_nsec = mts.tv_nsec; \
54 #define GET_CLOCK_MONOTONIC(ts) clock_gettime(CLOCK_MONOTONIC, &ts);
60 long long diff = (
a.tv_sec -
b.tv_sec) * 1000;
61 diff += (
a.tv_nsec -
b.tv_nsec) / 1e6;
72 XrdCl::Buffer *
buffer =
nullptr;
74 response->Set(static_cast<int *>(
nullptr));
96 file.GetProperty(
"LastURL", lastUrl);
97 if (jobId && !lastUrl.empty()) {
99 XrdCl::FileSystem fs(
url);
100 if (!(fs.SendInfo(jobId, &
nullHandler, 30).IsOK())) {
102 <<
"Failed to send the monitoring information, monitoring ID is " << jobId <<
".";
104 edm::LogInfo(
"XrdAdaptorInternal") <<
"Set monitoring ID to " << jobId <<
".";
109 : m_serverToAdvertise(nullptr),
110 m_timeout(XRD_DEFAULT_TIMEOUT),
111 m_nextInitialSourceToggle(
false),
115 m_distribution(0, 100),
116 m_excluded_active_count(0) {}
121 XrdCl::Env *
env = XrdCl::DefaultEnv::GetEnv();
134 std::unique_ptr<XrdCl::File>
file;
136 bool validFile =
false;
137 const int retries = 5;
139 for (
int idx = 0;
idx < retries;
idx++) {
143 m_name + (!opaque.empty() ? ((
m_name.find(
"?") ==
m_name.npos) ?
"?" :
"&") + opaque :
"");
144 SyncHostResponseHandler handler;
145 XrdCl::XRootDStatus openStatus =
file->Open(new_filename,
m_flags,
m_perms, &handler);
154 ex <<
"XrdCl::File::Open(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags <<
", permissions=0"
155 << std::oct <<
m_perms <<
std::dec <<
") => error '" << openStatus.ToStr() <<
"' (errno=" << openStatus.errNo
156 <<
", code=" << openStatus.code <<
")";
158 ex.
addAdditionalInfo(
"Remote server already encountered a fatal error; no redirections were performed.");
161 handler.WaitForResponse();
162 std::unique_ptr<XrdCl::XRootDStatus>
status = handler.GetStatus();
163 std::unique_ptr<XrdCl::HostList> hostList = handler.GetHosts();
173 ex <<
"XrdCl::File::Open(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags <<
", permissions=0"
175 <<
", code=" <<
status->code <<
")";
179 file->GetProperty(
"DataServer", dataServer);
180 file->GetProperty(
"LastURL", lastUrl);
181 if (!dataServer.empty()) {
184 if (!lastUrl.empty()) {
186 edm::LogWarning(
"XrdAdaptorInternal") <<
"Failed to open file at URL " << lastUrl <<
".";
190 ex <<
". No additional data servers were found.";
193 if (!dataServer.empty()) {
198 if (lastUrl == new_filename) {
199 edm::LogWarning(
"XrdAdaptorInternal") << lastUrl <<
", " << new_filename;
243 std::unique_ptr<std::string> hostname(hostname_ptr);
252 auto hostname = std::make_unique<std::string>(
id);
262 std::string formatSites(std::vector<std::shared_ptr<Source>>
const &iSources) {
264 if (!iSources.empty()) {
265 siteA = iSources[0]->Site();
267 if (iSources.size() == 2) {
268 siteB = iSources[1]->Site();
271 if (!siteB.empty() && (siteB != siteA)) {
272 siteList = siteA +
", " + siteB;
279 std::vector<std::shared_ptr<Source>>
const &iNew,
281 auto siteList = formatSites(iNew);
282 if (!orig_site.empty() && (orig_site != siteList)) {
283 edm::LogWarning(
"XrdAdaptor") <<
"Data is served from " << siteList <<
" instead of original site " << orig_site;
285 auto oldSites = formatSites(iOld);
286 if (orig_site.empty() && (siteList != oldSites)) {
287 if (!oldSites.empty())
288 edm::LogWarning(
"XrdAdaptor") <<
"Data is now served from " << siteList <<
" instead of previous " << oldSites;
296 std::vector<std::shared_ptr<Source>> &inactiveSources) {
315 std::vector<std::shared_ptr<Source>> &inactiveSources)
const {
320 bool findNewSource =
false;
325 <<
"Removing " <<
activeSources[
a]->PrettyID() <<
" from active sources due to poor quality ("
328 findNewSource =
true;
336 return findNewSource;
342 std::vector<std::shared_ptr<Source>> &inactiveSources) {
343 bool findNewSource =
false;
345 findNewSource =
true;
348 <<
", source 1 quality " <<
activeSources[1]->getQuality() << std::endl;
354 std::vector<std::shared_ptr<Source>> eligibleInactiveSources;
355 eligibleInactiveSources.reserve(inactiveSources.size());
356 for (
const auto &
source : inactiveSources) {
358 eligibleInactiveSources.push_back(
source);
361 auto bestInactiveSource =
362 std::min_element(eligibleInactiveSources.begin(),
363 eligibleInactiveSources.end(),
364 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
365 return s1->getQuality() <
s2->getQuality();
367 auto worstActiveSource = std::max_element(
activeSources.cbegin(),
369 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
370 return s1->getQuality() <
s2->getQuality();
372 if (bestInactiveSource != eligibleInactiveSources.end() && bestInactiveSource->get()) {
373 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Best inactive source: " << (*bestInactiveSource)->PrettyID()
374 <<
", quality " << (*bestInactiveSource)->getQuality();
376 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Worst active source: " << (*worstActiveSource)->PrettyID()
377 <<
", quality " << (*worstActiveSource)->getQuality();
380 if ((bestInactiveSource != eligibleInactiveSources.end()) &&
activeSources.size() == 1 &&
381 ((*bestInactiveSource)->getQuality() < 4 *
activeSources[0]->getQuality())) {
385 for (
auto it = inactiveSources.begin(); it != inactiveSources.end(); it++)
386 if (it->get() == bestInactiveSource->get()) {
387 inactiveSources.erase(it);
391 while ((bestInactiveSource != eligibleInactiveSources.end()) &&
392 (*worstActiveSource)->getQuality() >
395 <<
"Removing " << (*worstActiveSource)->PrettyID() <<
" from active sources due to quality ("
396 << (*worstActiveSource)->getQuality() <<
") and promoting " << (*bestInactiveSource)->PrettyID()
397 <<
" (quality: " << (*bestInactiveSource)->getQuality() <<
")" << std::endl;
398 (*worstActiveSource)->setLastDowngrade(
now);
399 for (
auto it = inactiveSources.begin(); it != inactiveSources.end(); it++)
400 if (it->get() == bestInactiveSource->get()) {
401 inactiveSources.erase(it);
404 inactiveSources.emplace_back(
std::move(*worstActiveSource));
409 eligibleInactiveSources.clear();
410 for (
const auto &
source : inactiveSources)
412 eligibleInactiveSources.push_back(
source);
413 bestInactiveSource = std::min_element(eligibleInactiveSources.begin(),
414 eligibleInactiveSources.end(),
415 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
416 return s1->getQuality() <
s2->getQuality();
420 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
421 return s1->getQuality() <
s2->getQuality();
427 findNewSource =
true;
449 ex <<
"XrdAdaptor::RequestManager::getActiveFile(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags
450 <<
", permissions=0" << std::oct <<
m_perms <<
std::dec <<
") => Source used after fatal exception.";
451 ex.
addContext(
"In XrdAdaptor::RequestManager::handle()");
482 std::vector<std::string>
sources;
495 std::shared_ptr<Source>
source =
nullptr;
508 ex <<
"XrdAdaptor::RequestManager::handle read(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags
509 <<
", permissions=0" << std::oct <<
m_perms <<
std::dec <<
") => Source used after fatal exception.";
510 ex.
addContext(
"In XrdAdaptor::RequestManager::handle()");
525 std::vector<std::shared_ptr<Source>>
activeSources, inactiveSources;
533 std::shared_ptr<void *> guard(
nullptr, [
this, &
activeSources, &inactiveSources](
void *) {
544 return c_ptr->get_future();
548 std::stringstream
ss;
556 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
560 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
565 ss << it.substr(0, it.find(
":")) <<
",";
569 return tmp_str.substr(0, tmp_str.size() - 1);
575 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
577 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Successfully opened new source: " <<
source->PrettyID() << std::endl;
578 for (
const auto &
s : m_activeSources) {
581 <<
"Xrootd server returned excluded source " <<
source->PrettyID() <<
"; ignoring" << std::endl;
582 unsigned returned_count = ++m_excluded_active_count;
584 if (returned_count >= 3) {
590 for (
const auto &
s : m_inactiveSources) {
593 <<
"Xrootd server returned excluded inactive source " <<
source->PrettyID() <<
"; ignoring" << std::endl;
598 if (m_activeSources.size() < 2) {
599 auto oldSources = m_activeSources;
600 m_activeSources.push_back(
source);
601 reportSiteChange(oldSources, m_activeSources);
602 queueUpdateCurrentServer(
source->ID());
604 m_inactiveSources.push_back(
source);
607 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Got failure when trying to open a new source" << std::endl;
616 std::vector<std::shared_ptr<Source>>
activeSources, inactiveSources;
618 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
620 inactiveSources = m_inactiveSources;
623 std::shared_ptr<void *> guard(
nullptr, [
this, &
activeSources, &inactiveSources](
void *) {
624 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
626 m_inactiveSources =
std::move(inactiveSources);
629 updateCurrentServer();
638 auto c_ptr = std::make_shared<XrdAdaptor::ClientRequest>(*
this, iolist);
641 return c_ptr->get_future();
646 ex <<
"XrdAdaptor::RequestManager::handle readv(name='" << m_name <<
"', flags=0x" << std::hex << m_flags
647 <<
", permissions=0" << std::oct << m_perms <<
std::dec <<
") => Source used after fatal exception.";
648 ex.
addContext(
"In XrdAdaptor::RequestManager::handle()");
654 auto req1 = std::make_shared<std::vector<IOPosBuffer>>();
655 auto req2 = std::make_shared<std::vector<IOPosBuffer>>();
658 checkSources(
now, req1->size() + req2->size(),
activeSources, inactiveSources);
661 auto c_ptr = std::make_shared<XrdAdaptor::ClientRequest>(*
this, iolist);
663 return c_ptr->get_future();
666 std::shared_ptr<XrdAdaptor::ClientRequest> c_ptr1, c_ptr2;
667 std::future<IOSize> future1, future2;
668 if (!req1->empty()) {
671 future1 = c_ptr1->get_future();
673 if (!req2->empty()) {
676 future2 = c_ptr2->get_future();
678 if (!req1->empty() && !req2->empty()) {
679 std::future<IOSize>
task = std::async(
680 std::launch::deferred,
681 [](std::future<IOSize>
a, std::future<IOSize>
b) {
695 return b.get() +
a.get();
702 }
else if (!req1->empty()) {
704 }
else if (!req2->empty()) {
707 std::promise<IOSize>
p;
709 return p.get_future();
714 std::shared_ptr<Source> source_ptr = c_ptr->getCurrentSource();
717 if (c_status.code == XrdCl::errInvalidResponse) {
718 edm::LogWarning(
"XrdAdaptorInternal") <<
"Invalid response when reading from " << source_ptr->PrettyID();
720 ex <<
"XrdAdaptor::RequestManager::requestFailure readv(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags
721 <<
", permissions=0" << std::oct <<
m_perms <<
std::dec <<
", old source=" << source_ptr->PrettyID()
722 <<
") => Invalid ReadV response from server";
723 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
727 edm::LogWarning(
"XrdAdaptorInternal") <<
"Request failure when reading from " << source_ptr->PrettyID();
746 std::shared_ptr<Source> new_source;
748 std::shared_future<std::shared_ptr<Source>> future =
m_open_handler->open();
759 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags
760 <<
", permissions=0" << std::oct <<
m_perms <<
std::dec <<
", old source=" << source_ptr->PrettyID()
761 <<
") => timeout when waiting for file open";
762 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
767 new_source = future.get();
769 ex.
addContext(
"Handling XrdAdaptor::RequestManager::requestFailure()");
779 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name <<
"', flags=0x" << std::hex <<
m_flags
780 <<
", permissions=0" << std::oct <<
m_perms <<
std::dec <<
", old source=" << source_ptr->PrettyID()
781 <<
", new source=" << new_source->PrettyID() <<
") => Xrootd server returned an excluded source";
782 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
794 new_source->handle(c_ptr);
798 std::vector<IOPosBuffer> &
input,
799 std::vector<IOPosBuffer> &
output,
804 if (io.
size() > chunksize) {
812 consumed = chunksize;
816 consumed = chunksize;
819 chunksize -= consumed;
822 void *newdata = static_cast<char *>(io.
data()) + consumed;
826 }
else if (io.
size() == 0) {
830 chunksize -= io.
size();
837 std::vector<IOPosBuffer> &
input,
838 std::vector<IOPosBuffer> &
output,
843 if (io.
size() > chunksize) {
851 consumed = chunksize;
855 consumed = chunksize;
858 chunksize -= consumed;
861 void *newdata = static_cast<char *>(io.
data()) + consumed;
865 }
else if (io.
size() == 0) {
869 chunksize -= io.
size();
877 off_t last_offset = -1;
878 for (
const auto &it : req) {
880 assert(it.offset() > last_offset);
881 last_offset = it.offset();
883 assert(it.offset() < 0x1ffffffffff);
885 assert(req.size() <= 1024);
890 std::vector<IOPosBuffer> &req1,
891 std::vector<IOPosBuffer> &req2,
892 std::vector<std::shared_ptr<Source>>
const &
activeSources)
const {
895 std::vector<IOPosBuffer> tmp_iolist(iolist.begin(), iolist.end());
896 req1.reserve(iolist.size() / 2 + 1);
897 req2.reserve(iolist.size() / 2 + 1);
906 static_cast<IOSize>(1024));
908 static_cast<IOSize>(1024));
911 for (
const auto &it : iolist)
912 size_orig += it.size();
914 while (tmp_iolist.size() - front > 0) {
923 ex <<
"XrdAdaptor::RequestManager::splitClientRequest(name='" << m_name <<
"', flags=0x" << std::hex << m_flags
924 <<
", permissions=0" << std::oct << m_perms <<
std::dec
925 <<
") => Unable to split request between active servers. This is an unexpected internal error and should be "
926 "reported to CMSSW developers.";
927 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
929 std::stringstream
ss;
930 ss <<
"Original request size " << iolist.size() <<
"(" << size_orig <<
" bytes)";
932 std::stringstream ss2;
933 ss2 <<
"Quality source 1 " <<
q1 - 5 <<
", quality source 2: " <<
q2 - 5;
945 return left.
offset() < right.offset();
948 return left.
offset() < right.offset();
954 assert(size_orig == size1 + size2);
956 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Original request size " << iolist.size() <<
" (" << size_orig
957 <<
" bytes) split into requests size " << req1.size() <<
" (" << size1
958 <<
" bytes) and " << req2.size() <<
" (" << size2 <<
" bytes)" << std::endl;
969 XrdCl::HostList *hostList_ptr) {
971 std::shared_ptr<OpenHandler>
self = m_self;
978 this, [&](
OpenHandler *) { m_outstanding_open =
false; });
980 std::shared_ptr<Source>
source;
981 std::unique_ptr<XrdCl::XRootDStatus>
status(status_ptr);
982 std::unique_ptr<XrdCl::HostList> hostList(hostList_ptr);
984 auto manager = m_manager.lock();
992 std::unique_ptr<XrdCl::File> releaseFile;
994 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
1005 m_promise.set_value(
source);
1009 ex <<
"XrdCl::File::Open(name='" << manager->m_name <<
"', flags=0x" << std::hex << manager->m_flags
1010 <<
", permissions=0" << std::oct << manager->m_perms <<
std::dec <<
") => error '" <<
status->ToStr()
1011 <<
"' (errno=" <<
status->errNo <<
", code=" <<
status->code <<
")";
1012 ex.
addContext(
"In XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts()");
1013 manager->addConnections(ex);
1015 m_promise.set_exception(std::make_exception_ptr(ex));
1022 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
1024 if (!m_file.get()) {
1025 return "(no open in progress)";
1028 m_file->GetProperty(
"DataServer", dataServer);
1029 if (dataServer.empty()) {
1030 return "(unknown source)";
1036 auto manager_ptr = m_manager.lock();
1039 ex <<
"XrdCl::File::Open() =>"
1040 <<
" error: OpenHandler called within an invalid RequestManager context."
1041 <<
" This is a logic error and should be reported to the CMSSW developers.";
1042 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1046 auto self_ptr = m_self_weak.lock();
1049 ex <<
"XrdCl::File::Open() => error: "
1050 <<
"OpenHandler called after it was deleted. This is a logic error "
1051 <<
"and should be reported to the CMSSW developers.";
1052 ex.
addContext(
"Calling XrdAdapter::RequestManager::OpenHandler::open()");
1065 if (m_outstanding_open) {
1066 return m_shared_future;
1068 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
1069 std::promise<std::shared_ptr<Source>> new_promise;
1070 m_promise.swap(new_promise);
1071 m_shared_future = m_promise.get_future().share();
1075 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Trying to open URL: " << new_name;
1077 m_outstanding_open =
true;
1081 m_outstanding_open =
false;
1085 XrdCl::XRootDStatus
status;
1088 ex <<
"XrdCl::File::Open(name='" << new_name <<
"', flags=0x" << std::hex << manager.
m_flags <<
", permissions=0"
1090 <<
", code=" <<
status.code <<
")";
1091 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1095 exit_guard.release();
1098 return m_shared_future;