7 #include "XrdCl/XrdClFile.hh"
8 #include "XrdCl/XrdClDefaultEnv.hh"
18 #include "Utilities/XrdAdaptor/src/XrdHostHandler.hh"
20 #define XRD_CL_MAX_CHUNK 512*1024
22 #define XRD_ADAPTOR_SHORT_OPEN_DELAY 5
24 #ifdef XRD_FAKE_OPEN_PROBE
25 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 100
26 #define XRD_ADAPTOR_LONG_OPEN_DELAY 20
28 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 0
30 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 10
31 #define XRD_ADAPTOR_LONG_OPEN_DELAY 2*60
32 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 100
36 #include <mach/clock.h>
37 #include <mach/mach.h>
38 #define GET_CLOCK_MONOTONIC(ts) \
40 clock_serv_t cclock; \
41 mach_timespec_t mts; \
42 host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); \
43 clock_get_time(cclock, &mts); \
44 mach_port_deallocate(mach_task_self(), cclock); \
45 ts.tv_sec = mts.tv_sec; \
46 ts.tv_nsec = mts.tv_nsec; \
49 #define GET_CLOCK_MONOTONIC(ts) \
50 clock_gettime(CLOCK_MONOTONIC, &ts);
53 using namespace XrdAdaptor;
57 long long diff = (a.tv_sec - b.tv_sec) * 1000;
58 diff += (a.tv_nsec - b.tv_nsec) / 1e6;
72 XrdCl::Buffer *buffer =
nullptr;
73 response->Get(buffer);
93 file.GetProperty(
"LastURL", lastUrl);
94 if (jobId && lastUrl.size())
96 XrdCl::FileSystem fs = XrdCl::FileSystem(XrdCl::URL(lastUrl));
98 edm::LogInfo(
"XrdAdaptorInternal") <<
"Set monitoring ID to " << jobId <<
".";
104 : m_timeout(XRD_DEFAULT_TIMEOUT),
105 m_nextInitialSourceToggle(
false),
109 m_distribution(0,100),
110 m_excluded_active_count(0)
120 XrdCl::Env *
env = XrdCl::DefaultEnv::GetEnv();
121 if (env) {env->GetInt(
"StreamErrorWindow",
m_timeout);}
133 std::unique_ptr<XrdCl::File>
file;
135 bool validFile =
false;
136 const int retries = 5;
143 SyncHostResponseHandler handler;
144 XrdCl::XRootDStatus openStatus = file->Open(new_filename,
m_flags,
m_perms, &handler);
145 if (!openStatus.IsOK())
153 ex <<
"XrdCl::File::Open(name='" <<
m_name
154 <<
"', flags=0x" << std::hex <<
m_flags
156 <<
") => error '" << openStatus.ToStr()
157 <<
"' (errno=" << openStatus.errNo <<
", code=" << openStatus.code <<
")";
159 ex.
addAdditionalInfo(
"Remote server already encountered a fatal error; no redirections were performed.");
162 handler.WaitForResponse();
163 std::unique_ptr<XrdCl::XRootDStatus>
status = handler.GetStatus();
164 std::unique_ptr<XrdCl::HostList> hostList = handler.GetHosts();
177 ex <<
"XrdCl::File::Open(name='" <<
m_name
178 <<
"', flags=0x" << std::hex <<
m_flags
180 <<
") => error '" << status->ToStr()
181 <<
"' (errno=" << status->errNo <<
", code=" << status->code <<
")";
185 file->GetProperty(
"DataServer", dataServer);
186 file->GetProperty(
"LastURL", lastUrl);
187 if (dataServer.size())
194 edm::LogWarning(
"XrdAdaptorInternal") <<
"Failed to open file at URL " << lastUrl <<
".";
198 ex <<
". No additional data servers were found.";
201 if (dataServer.size())
207 if (lastUrl == new_filename)
209 edm::LogWarning(
"XrdAdaptorInternal") << lastUrl <<
", " << new_filename;
243 if (siteB.size() && (siteB != siteA)) {siteList = siteA +
", " + siteB;}
244 if (orig_site.size() && (orig_site != siteList))
246 edm::LogWarning(
"XrdAdaptor") <<
"Data is served from " << siteList <<
" instead of original site " << orig_site;
252 m_activeSites = siteList;
284 bool findNewSource =
false;
289 <<
m_activeSources[
a]->PrettyID() <<
" from active sources due to poor quality ("
291 if (
m_activeSources[a]->getLastDowngrade().tv_sec != 0) {findNewSource =
true;}
297 return findNewSource;
305 bool findNewSource =
false;
308 findNewSource =
true;
318 std::vector<std::shared_ptr<Source> > eligibleInactiveSources; eligibleInactiveSources.reserve(
m_inactiveSources.size());
323 std::vector<std::shared_ptr<Source> >::iterator bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(),
324 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
326 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
327 if (bestInactiveSource != eligibleInactiveSources.end() && bestInactiveSource->get())
329 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Best inactive source: " <<(*bestInactiveSource)->PrettyID()
330 <<
", quality " << (*bestInactiveSource)->getQuality();
332 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Worst active source: " <<(*worstActiveSource)->PrettyID()
333 <<
", quality " << (*worstActiveSource)->getQuality();
336 if ((bestInactiveSource != eligibleInactiveSources.end()) &&
m_activeSources.size() == 1 && ((*bestInactiveSource)->getQuality() < 4*
m_activeSources[0]->getQuality()))
340 for (
auto it = m_inactiveSources.begin(); it != m_inactiveSources.end(); it++)
if (it->get() == bestInactiveSource->get()) {m_inactiveSources.erase(it);
break;}
342 else while ((bestInactiveSource != eligibleInactiveSources.end()) && (*worstActiveSource)->getQuality() > (*bestInactiveSource)->getQuality()+
XRD_ADAPTOR_SOURCE_QUALITY_FUDGE)
344 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Removing " << (*worstActiveSource)->PrettyID()
345 <<
" from active sources due to quality (" << (*worstActiveSource)->getQuality()
346 <<
") and promoting " << (*bestInactiveSource)->PrettyID() <<
" (quality: "
347 << (*bestInactiveSource)->getQuality() <<
")" << std::endl;
348 (*worstActiveSource)->setLastDowngrade(now);
349 for (
auto it = m_inactiveSources.begin(); it != m_inactiveSources.end(); it++)
if (it->get() == bestInactiveSource->get()) {m_inactiveSources.erase(it);
break;}
350 m_inactiveSources.emplace_back(
std::move(*worstActiveSource));
354 eligibleInactiveSources.clear();
356 bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(),
357 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
359 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
366 findNewSource =
true;
388 std::shared_ptr<XrdCl::File>
401 sources.push_back(
source->ID());
411 sources.push_back(
source->PrettyID());
421 sources.push_back(
source);
428 std::vector<std::string> sources;
430 for (
auto const&
source : sources)
436 for (
auto const&
source : sources)
442 std::shared_ptr<Source>
445 std::shared_ptr<Source>
source =
nullptr;
478 source->handle(c_ptr);
479 return c_ptr->get_future();
486 std::stringstream
ss;
492 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
497 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
502 ss << it.substr(0, it.find(
":")) <<
",";
507 return tmp_str.substr(0, tmp_str.size()-1);
515 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
518 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Successfully opened new source: " << source->PrettyID() << std::endl;
519 for (
const auto &
s : m_activeSources)
521 if (source->ID() ==
s->ID())
523 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Xrootd server returned excluded source " << source->PrettyID()
524 <<
"; ignoring" << std::endl;
525 unsigned returned_count = ++m_excluded_active_count;
531 for (
const auto &
s : m_inactiveSources)
533 if (source->ID() ==
s->ID())
535 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Xrootd server returned excluded inactive source " << source->PrettyID()
536 <<
"; ignoring" << std::endl;
541 if (m_activeSources.size() < 2)
543 m_activeSources.push_back(source);
548 m_inactiveSources.push_back(source);
553 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Got failure when trying to open a new source" << std::endl;
561 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
569 assert(m_activeSources.size());
570 if (m_activeSources.size() == 1)
573 checkSources(now, c_ptr->getSize());
574 m_activeSources[0]->handle(c_ptr);
575 return c_ptr->get_future();
579 std::shared_ptr<std::vector<IOPosBuffer> > req1(
new std::vector<IOPosBuffer>);
580 std::shared_ptr<std::vector<IOPosBuffer> > req2(
new std::vector<IOPosBuffer>);
581 splitClientRequest(*iolist, *req1, *req2);
583 checkSources(now, req1->size() + req2->size());
585 if (m_activeSources.size() == 1)
588 m_activeSources[0]->handle(c_ptr);
589 return c_ptr->get_future();
592 std::shared_ptr<XrdAdaptor::ClientRequest> c_ptr1, c_ptr2;
593 std::future<IOSize> future1, future2;
597 m_activeSources[0]->handle(c_ptr1);
598 future1 = c_ptr1->get_future();
603 m_activeSources[1]->handle(c_ptr2);
604 future2 = c_ptr2->get_future();
606 if (req1->size() && req2->size())
608 std::future<IOSize> task = std::async(std::launch::deferred,
609 [](std::future<IOSize>
a, std::future<IOSize>
b){
622 return b.get() + a.get();
630 else if (req1->size()) {
return future1; }
631 else if (req2->size()) {
return future2; }
634 std::promise<IOSize>
p; p.set_value(0);
635 return p.get_future();
643 std::shared_ptr<Source> source_ptr = c_ptr->getCurrentSource();
646 if (c_status.code == XrdCl::errInvalidResponse)
648 edm::LogWarning(
"XrdAdaptorInternal") <<
"Invalid response when reading from " << source_ptr->PrettyID();
650 ex <<
"XrdAdaptor::RequestManager::requestFailure readv(name='" <<
m_name
651 <<
"', flags=0x" << std::hex <<
m_flags
653 <<
", old source=" << source_ptr->PrettyID()
654 <<
") => Invalid ReadV response from server";
655 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
659 edm::LogWarning(
"XrdAdaptorInternal") <<
"Request failure when reading from " << source_ptr->PrettyID();
678 std::shared_ptr<Source> new_source;
681 std::shared_future<std::shared_ptr<Source> > future =
m_open_handler->open();
690 if (status == std::future_status::timeout)
693 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name
694 <<
"', flags=0x" << std::hex <<
m_flags
696 <<
", old source=" << source_ptr->PrettyID()
698 <<
") => timeout when waiting for file open";
699 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
707 new_source = future.get();
711 ex.
addContext(
"Handling XrdAdaptor::RequestManager::requestFailure()");
722 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name
723 <<
"', flags=0x" << std::hex <<
m_flags
725 <<
", old source=" << source_ptr->PrettyID()
726 <<
", new source=" << new_source->PrettyID() <<
") => Xrootd server returned an excluded source";
727 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
738 new_source->handle(c_ptr);
744 while ((chunksize > 0) && (front < input.size()))
748 if (io.
size() > chunksize)
760 consumed = chunksize;
766 consumed = chunksize;
769 chunksize -= consumed;
772 void* newdata =
static_cast<char*
>(io.
data()) + consumed;
777 else if (io.
size() == 0)
783 output.push_back(io);
784 chunksize -= io.
size();
793 while ((chunksize > 0) && (front < input.size()))
797 if (io.
size() > chunksize)
809 consumed = chunksize;
815 consumed = chunksize;
818 chunksize -= consumed;
821 void* newdata =
static_cast<char*
>(io.
data()) + consumed;
826 else if (io.
size() == 0)
832 output.push_back(io);
833 chunksize -= io.
size();
842 off_t last_offset = -1;
843 for (
const auto & it : req)
846 assert(it.offset() > last_offset);
847 last_offset = it.offset();
849 assert(it.offset() < 0x1ffffffffff);
851 assert(req.size() <= 1024);
858 if (iolist.size() == 0)
return;
859 std::vector<IOPosBuffer> tmp_iolist(iolist.begin(), iolist.end());
860 req1.reserve(iolist.size()/2+1);
861 req2.reserve(iolist.size()/2+1);
865 float q1 =
static_cast<float>(m_activeSources[0]->getQuality())+5;
866 float q2 =
static_cast<float>(m_activeSources[1]->getQuality())+5;
872 for (
const auto & it : iolist) size_orig += it.size();
874 while (tmp_iolist.size()-front > 0)
876 if ((req1.size() >= 1000) && (req2.size() >= 1000))
883 ex <<
"XrdAdaptor::RequestManager::splitClientRequest(name='" << m_name
884 <<
"', flags=0x" << std::hex << m_flags
885 <<
", permissions=0" << std::oct << m_perms <<
std::dec
886 <<
") => Unable to split request between active servers. This is an unexpected internal error and should be reported to CMSSW developers.";
887 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
889 std::stringstream
ss; ss <<
"Original request size " << iolist.size() <<
"(" << size_orig <<
" bytes)";
891 std::stringstream ss2; ss2 <<
"Quality source 1 " << q1-5 <<
", quality source 2: " << q2-5;
896 if (req2.size() < 1000) {
consumeChunkBack(front, tmp_iolist, req2, chunk2);}
904 assert(size_orig == size1 + size2);
906 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Original request size " << iolist.size() <<
" (" << size_orig <<
" bytes) split into requests size " << req1.size() <<
" (" << size1 <<
" bytes) and " << req2.size() <<
" (" << size2 <<
" bytes)" << std::endl;
925 std::shared_ptr<Source>
source;
926 std::unique_ptr<XrdCl::XRootDStatus>
status(status_ptr);
927 std::unique_ptr<XrdCl::HostList> hostList(hostList_ptr);
930 std::shared_ptr<OpenHandler>
self = m_self;
933 auto manager = m_manager.lock();
942 std::unique_ptr<XrdCl::File> releaseFile;
944 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
956 m_promise.set_value(source);
962 ex <<
"XrdCl::File::Open(name='" << manager->m_name
963 <<
"', flags=0x" << std::hex << manager->m_flags
964 <<
", permissions=0" << std::oct << manager->m_perms <<
std::dec
965 <<
") => error '" << status->ToStr()
966 <<
"' (errno=" << status->errNo <<
", code=" << status->code <<
")";
967 ex.
addContext(
"In XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts()");
968 manager->addConnections(ex);
970 m_promise.set_exception(std::make_exception_ptr(ex));
973 manager->handleOpen(*status, source);
979 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
983 return "(no open in progress)";
986 m_file->GetProperty(
"DataServer", dataServer);
987 if (!dataServer.size()) {
return "(unknown source)"; }
991 std::shared_future<std::shared_ptr<Source> >
994 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
995 auto manager_ptr = m_manager.lock();
999 ex <<
"XrdCl::File::Open() =>"
1000 <<
" error: OpenHandler called within an invalid RequestManager context."
1001 <<
" This is a logic error and should be reported to the CMSSW developers.";
1002 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1006 auto self_ptr = m_self_weak.lock();
1010 ex <<
"XrdCl::File::Open() => error: "
1011 <<
"OpenHandler called after it was deleted. This is a logic error "
1012 <<
"and should be reported to the CMSSW developers.";
1013 ex.
addContext(
"Calling XrdAdapter::RequestManager::OpenHandler::open()");
1019 return m_shared_future;
1021 std::promise<std::shared_ptr<Source> > new_promise;
1022 m_promise.
swap(new_promise);
1023 m_shared_future = m_promise.get_future().share();
1027 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Trying to open URL: " << new_name;
1029 XrdCl::XRootDStatus
status;
1030 if (!(status = m_file->Open(new_name, manager.
m_flags, manager.
m_perms,
this)).IsOK())
1033 ex <<
"XrdCl::File::Open(name='" << new_name
1034 <<
"', flags=0x" << std::hex << manager.
m_flags
1036 <<
") => error '" << status.ToStr()
1037 <<
"' (errno=" << status.errNo <<
", code=" << status.code <<
")";
1038 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1044 return m_shared_future;
std::shared_future< std::shared_ptr< Source > > open()
RequestManager(const std::string &filename, XrdCl::OpenFlags::Flags flags, XrdCl::Access::Mode perms)
#define GET_CLOCK_MONOTONIC(ts)
std::uniform_real_distribution< float > m_distribution
static void determineHostExcludeString(XrdCl::File &file, const XrdCl::HostList *hostList, std::string &exclude)
std::vector< Variable::Flags > flags
void updateSiteInfo(std::string orig_site="")
std::set< std::string > m_disabledSourceStrings
OpenHandler(std::weak_ptr< RequestManager > manager)
virtual void handleOpen(XrdCl::XRootDStatus &status, std::shared_ptr< Source >)
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
SendMonitoringInfoHandler nullHandler
static std::string const input
bool compareSources(const timespec &now, unsigned a, unsigned b)
#define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE
static void SendMonitoringInfo(XrdCl::File &file)
void set_data(void *new_buffer)
void set_size(IOSize new_size)
void swap(Exception &other)
void addConnections(cms::Exception &)
long long timeDiffMS(const timespec &a, const timespec &b)
std::set< std::shared_ptr< Source > > m_disabledSources
void set_offset(IOOffset new_offset)
static bool getDomain(const std::string &host, std::string &domain)
std::vector< std::shared_ptr< Source > > m_inactiveSources
static bool isDCachePool(XrdCl::File &file, const XrdCl::HostList *hostList=nullptr)
#define XRD_ADAPTOR_OPEN_PROBE_PERCENT
void addAdditionalInfo(std::string const &info)
timespec m_nextActiveSourceCheck
std::set< std::string > m_disabledExcludeStrings
std::shared_ptr< XrdCl::File > getActiveFile()
std::string current_source()
std::shared_ptr< OpenHandler > m_open_handler
IOOffset offset(void) const
void requestFailure(std::shared_ptr< XrdAdaptor::ClientRequest > c_ptr, XrdCl::Status &c_status)
virtual void HandleResponseWithHosts(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response, XrdCl::HostList *hostList) override
std::string prepareOpaqueString()
XrdCl::OpenFlags::Flags m_flags
virtual void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override
static std::shared_ptr< OpenHandler > getInstance(std::weak_ptr< RequestManager > manager)
static void consumeChunkFront(size_t &front, std::vector< IOPosBuffer > &input, std::vector< IOPosBuffer > &output, IOSize chunksize)
tuple idx
DEBUGGING if hasattr(process,"trackMonIterativeTracking2012"): print "trackMonIterativeTracking2012 D...
void getActiveSourceNames(std::vector< std::string > &sources)
void checkSources(timespec &now, IOSize requestSize)
std::string m_activeSites
#define XRD_ADAPTOR_LONG_OPEN_DELAY
std::vector< std::shared_ptr< Source > > m_activeSources
XrdCl::Access::Mode m_perms
void addContext(std::string const &context)
void checkSourcesImpl(timespec &now, IOSize requestSize)
static const char * getJobID()
std::shared_ptr< Source > pickSingleSource()
static bool getXrootdSiteFromURL(std::string url, std::string &site)
void getDisabledSourceNames(std::vector< std::string > &sources)
static IOSize validateList(const std::vector< IOPosBuffer > req)
void getPrettyActiveSourceNames(std::vector< std::string > &sources)
#define XRD_ADAPTOR_SHORT_OPEN_DELAY
volatile std::atomic< bool > shutdown_flag false
static void consumeChunkBack(size_t front, std::vector< IOPosBuffer > &input, std::vector< IOPosBuffer > &output, IOSize chunksize)
void splitClientRequest(const std::vector< IOPosBuffer > &iolist, std::vector< IOPosBuffer > &req1, std::vector< IOPosBuffer > &req2)
std::future< IOSize > handle(void *into, IOSize size, IOOffset off)
static bool getHostname(const std::string &id, std::string &hostname)
static std::string const source
void clearAdditionalInfo()
std::recursive_mutex m_source_mutex
bool m_nextInitialSourceToggle
timespec m_lastSourceCheck
void initialize(std::weak_ptr< RequestManager > selfref)