7 #include "XrdCl/XrdClFile.hh"
8 #include "XrdCl/XrdClDefaultEnv.hh"
18 #include "Utilities/XrdAdaptor/src/XrdHostHandler.hh"
20 #define XRD_CL_MAX_CHUNK 512*1024
22 #define XRD_ADAPTOR_SHORT_OPEN_DELAY 5
24 #ifdef XRD_FAKE_OPEN_PROBE
25 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 100
26 #define XRD_ADAPTOR_LONG_OPEN_DELAY 20
28 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 0
30 #define XRD_ADAPTOR_OPEN_PROBE_PERCENT 10
31 #define XRD_ADAPTOR_LONG_OPEN_DELAY 2*60
32 #define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE 100
35 #define XRD_ADAPTOR_CHUNK_THRESHOLD 1000
39 #include <mach/clock.h>
40 #include <mach/mach.h>
41 #define GET_CLOCK_MONOTONIC(ts) \
43 clock_serv_t cclock; \
44 mach_timespec_t mts; \
45 host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, &cclock); \
46 clock_get_time(cclock, &mts); \
47 mach_port_deallocate(mach_task_self(), cclock); \
48 ts.tv_sec = mts.tv_sec; \
49 ts.tv_nsec = mts.tv_nsec; \
52 #define GET_CLOCK_MONOTONIC(ts) \
53 clock_gettime(CLOCK_MONOTONIC, &ts);
56 using namespace XrdAdaptor;
60 long long diff = (a.tv_sec - b.tv_sec) * 1000;
61 diff += (a.tv_nsec - b.tv_nsec) / 1e6;
75 XrdCl::Buffer *buffer =
nullptr;
76 response->Get(buffer);
96 file.GetProperty(
"LastURL", lastUrl);
97 if (jobId && lastUrl.size())
99 XrdCl::FileSystem fs = XrdCl::FileSystem(XrdCl::URL(lastUrl));
101 edm::LogInfo(
"XrdAdaptorInternal") <<
"Set monitoring ID to " << jobId <<
".";
107 : m_timeout(XRD_DEFAULT_TIMEOUT),
108 m_nextInitialSourceToggle(
false),
112 m_distribution(0,100),
113 m_excluded_active_count(0)
123 XrdCl::Env *
env = XrdCl::DefaultEnv::GetEnv();
124 if (env) {env->GetInt(
"StreamErrorWindow",
m_timeout);}
136 std::unique_ptr<XrdCl::File>
file;
138 bool validFile =
false;
139 const int retries = 5;
146 SyncHostResponseHandler handler;
147 XrdCl::XRootDStatus openStatus = file->Open(new_filename,
m_flags,
m_perms, &handler);
148 if (!openStatus.IsOK())
156 ex <<
"XrdCl::File::Open(name='" <<
m_name
157 <<
"', flags=0x" << std::hex <<
m_flags
159 <<
") => error '" << openStatus.ToStr()
160 <<
"' (errno=" << openStatus.errNo <<
", code=" << openStatus.code <<
")";
162 ex.
addAdditionalInfo(
"Remote server already encountered a fatal error; no redirections were performed.");
165 handler.WaitForResponse();
166 std::unique_ptr<XrdCl::XRootDStatus>
status = handler.GetStatus();
167 std::unique_ptr<XrdCl::HostList> hostList = handler.GetHosts();
180 ex <<
"XrdCl::File::Open(name='" <<
m_name
181 <<
"', flags=0x" << std::hex <<
m_flags
183 <<
") => error '" << status->ToStr()
184 <<
"' (errno=" << status->errNo <<
", code=" << status->code <<
")";
188 file->GetProperty(
"DataServer", dataServer);
189 file->GetProperty(
"LastURL", lastUrl);
190 if (dataServer.size())
197 edm::LogWarning(
"XrdAdaptorInternal") <<
"Failed to open file at URL " << lastUrl <<
".";
201 ex <<
". No additional data servers were found.";
204 if (dataServer.size())
210 if (lastUrl == new_filename)
212 edm::LogWarning(
"XrdAdaptorInternal") << lastUrl <<
", " << new_filename;
226 std::shared_ptr<Source>
source(
new Source(ts, std::move(file), excludeString));
246 if (siteB.size() && (siteB != siteA)) {siteList = siteA +
", " + siteB;}
247 if (orig_site.size() && (orig_site != siteList))
249 edm::LogWarning(
"XrdAdaptor") <<
"Data is served from " << siteList <<
" instead of original site " << orig_site;
255 m_activeSites = siteList;
287 bool findNewSource =
false;
292 <<
m_activeSources[
a]->PrettyID() <<
" from active sources due to poor quality ("
294 if (
m_activeSources[a]->getLastDowngrade().tv_sec != 0) {findNewSource =
true;}
300 return findNewSource;
308 bool findNewSource =
false;
311 findNewSource =
true;
321 std::vector<std::shared_ptr<Source> > eligibleInactiveSources; eligibleInactiveSources.reserve(
m_inactiveSources.size());
326 std::vector<std::shared_ptr<Source> >::iterator bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(),
327 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
329 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
330 if (bestInactiveSource != eligibleInactiveSources.end() && bestInactiveSource->get())
332 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Best inactive source: " <<(*bestInactiveSource)->PrettyID()
333 <<
", quality " << (*bestInactiveSource)->getQuality();
335 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Worst active source: " <<(*worstActiveSource)->PrettyID()
336 <<
", quality " << (*worstActiveSource)->getQuality();
339 if ((bestInactiveSource != eligibleInactiveSources.end()) &&
m_activeSources.size() == 1 && ((*bestInactiveSource)->getQuality() < 4*
m_activeSources[0]->getQuality()))
343 for (
auto it = m_inactiveSources.begin(); it != m_inactiveSources.end(); it++)
if (it->get() == bestInactiveSource->get()) {m_inactiveSources.erase(it);
break;}
345 else while ((bestInactiveSource != eligibleInactiveSources.end()) && (*worstActiveSource)->getQuality() > (*bestInactiveSource)->getQuality()+
XRD_ADAPTOR_SOURCE_QUALITY_FUDGE)
347 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Removing " << (*worstActiveSource)->PrettyID()
348 <<
" from active sources due to quality (" << (*worstActiveSource)->getQuality()
349 <<
") and promoting " << (*bestInactiveSource)->PrettyID() <<
" (quality: "
350 << (*bestInactiveSource)->getQuality() <<
")" << std::endl;
351 (*worstActiveSource)->setLastDowngrade(now);
352 for (
auto it = m_inactiveSources.begin(); it != m_inactiveSources.end(); it++)
if (it->get() == bestInactiveSource->get()) {m_inactiveSources.erase(it);
break;}
353 m_inactiveSources.emplace_back(std::move(*worstActiveSource));
357 eligibleInactiveSources.clear();
359 bestInactiveSource = std::min_element(eligibleInactiveSources.begin(), eligibleInactiveSources.end(),
360 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
362 [](
const std::shared_ptr<Source> &s1,
const std::shared_ptr<Source> &
s2) {
return s1->getQuality() <
s2->getQuality();});
369 findNewSource =
true;
391 std::shared_ptr<XrdCl::File>
404 sources.push_back(
source->ID());
414 sources.push_back(
source->PrettyID());
424 sources.push_back(
source);
431 std::vector<std::string> sources;
433 for (
auto const&
source : sources)
439 for (
auto const&
source : sources)
445 std::shared_ptr<Source>
448 std::shared_ptr<Source>
source =
nullptr;
481 source->handle(c_ptr);
482 return c_ptr->get_future();
489 std::stringstream
ss;
495 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
500 ss << it->ExcludeID().substr(0, it->ExcludeID().find(
":")) <<
",";
505 ss << it.substr(0, it.find(
":")) <<
",";
510 return tmp_str.substr(0, tmp_str.size()-1);
518 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
521 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Successfully opened new source: " << source->PrettyID() << std::endl;
522 for (
const auto &
s : m_activeSources)
524 if (source->ID() ==
s->ID())
526 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Xrootd server returned excluded source " << source->PrettyID()
527 <<
"; ignoring" << std::endl;
528 unsigned returned_count = ++m_excluded_active_count;
534 for (
const auto &
s : m_inactiveSources)
536 if (source->ID() ==
s->ID())
538 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Xrootd server returned excluded inactive source " << source->PrettyID()
539 <<
"; ignoring" << std::endl;
544 if (m_activeSources.size() < 2)
546 m_activeSources.push_back(source);
551 m_inactiveSources.push_back(source);
556 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Got failure when trying to open a new source" << std::endl;
564 std::lock_guard<std::recursive_mutex> sentry(m_source_mutex);
572 assert(m_activeSources.size());
573 if (m_activeSources.size() == 1)
576 checkSources(now, c_ptr->getSize());
577 m_activeSources[0]->handle(c_ptr);
578 return c_ptr->get_future();
582 std::shared_ptr<std::vector<IOPosBuffer> > req1(
new std::vector<IOPosBuffer>);
583 std::shared_ptr<std::vector<IOPosBuffer> > req2(
new std::vector<IOPosBuffer>);
584 splitClientRequest(*iolist, *req1, *req2);
586 checkSources(now, req1->size() + req2->size());
588 if (m_activeSources.size() == 1)
591 m_activeSources[0]->handle(c_ptr);
592 return c_ptr->get_future();
595 std::shared_ptr<XrdAdaptor::ClientRequest> c_ptr1, c_ptr2;
596 std::future<IOSize> future1, future2;
600 m_activeSources[0]->handle(c_ptr1);
601 future1 = c_ptr1->get_future();
606 m_activeSources[1]->handle(c_ptr2);
607 future2 = c_ptr2->get_future();
609 if (req1->size() && req2->size())
611 std::future<IOSize> task = std::async(std::launch::deferred,
612 [](std::future<IOSize>
a, std::future<IOSize>
b){
625 return b.get() + a.get();
633 else if (req1->size()) {
return future1; }
634 else if (req2->size()) {
return future2; }
637 std::promise<IOSize>
p; p.set_value(0);
638 return p.get_future();
646 std::shared_ptr<Source> source_ptr = c_ptr->getCurrentSource();
649 if (c_status.code == XrdCl::errInvalidResponse)
651 edm::LogWarning(
"XrdAdaptorInternal") <<
"Invalid response when reading from " << source_ptr->PrettyID();
653 ex <<
"XrdAdaptor::RequestManager::requestFailure readv(name='" <<
m_name
654 <<
"', flags=0x" << std::hex <<
m_flags
656 <<
", old source=" << source_ptr->PrettyID()
657 <<
") => Invalid ReadV response from server";
658 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
662 edm::LogWarning(
"XrdAdaptorInternal") <<
"Request failure when reading from " << source_ptr->PrettyID();
681 std::shared_ptr<Source> new_source;
684 std::shared_future<std::shared_ptr<Source> > future =
m_open_handler->open();
693 if (status == std::future_status::timeout)
696 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name
697 <<
"', flags=0x" << std::hex <<
m_flags
699 <<
", old source=" << source_ptr->PrettyID()
700 <<
") => timeout when waiting for file open";
701 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
709 new_source = future.get();
713 ex.
addContext(
"Handling XrdAdaptor::RequestManager::requestFailure()");
724 ex <<
"XrdAdaptor::RequestManager::requestFailure Open(name='" <<
m_name
725 <<
"', flags=0x" << std::hex <<
m_flags
727 <<
", old source=" << source_ptr->PrettyID()
728 <<
", new source=" << new_source->PrettyID() <<
") => Xrootd server returned an excluded source";
729 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
740 new_source->handle(c_ptr);
750 if (io.
size() > chunksize)
762 consumed = chunksize;
768 consumed = chunksize;
771 chunksize -= consumed;
774 void* newdata =
static_cast<char*
>(io.
data()) + consumed;
779 else if (io.
size() == 0)
785 output.push_back(io);
786 chunksize -= io.
size();
799 if (io.
size() > chunksize)
811 consumed = chunksize;
817 consumed = chunksize;
820 chunksize -= consumed;
823 void* newdata =
static_cast<char*
>(io.
data()) + consumed;
828 else if (io.
size() == 0)
834 output.push_back(io);
835 chunksize -= io.
size();
844 off_t last_offset = -1;
845 for (
const auto & it : req)
848 assert(it.offset() > last_offset);
849 last_offset = it.offset();
851 assert(it.offset() < 0x1ffffffffff);
853 assert(req.size() <= 1024);
860 if (iolist.size() == 0)
return;
861 std::vector<IOPosBuffer> tmp_iolist(iolist.begin(), iolist.end());
862 req1.reserve(iolist.size()/2+1);
863 req2.reserve(iolist.size()/2+1);
867 float q1 =
static_cast<float>(m_activeSources[0]->getQuality())+5;
868 float q2 =
static_cast<float>(m_activeSources[1]->getQuality())+5;
871 chunk1 =
std::max(static_cast<IOSize>(static_cast<float>(
XRD_CL_MAX_CHUNK)*(q2*q2/(q1*q1+q2*q2))), static_cast<IOSize>(1024));
872 chunk2 =
std::max(static_cast<IOSize>(static_cast<float>(
XRD_CL_MAX_CHUNK)*(q1*q1/(q1*q1+q2*q2))), static_cast<IOSize>(1024));
875 for (
const auto & it : iolist) size_orig += it.size();
877 while (tmp_iolist.size()-front > 0)
886 ex <<
"XrdAdaptor::RequestManager::splitClientRequest(name='" << m_name
887 <<
"', flags=0x" << std::hex << m_flags
888 <<
", permissions=0" << std::oct << m_perms <<
std::dec
889 <<
") => Unable to split request between active servers. This is an unexpected internal error and should be reported to CMSSW developers.";
890 ex.
addContext(
"In XrdAdaptor::RequestManager::requestFailure()");
892 std::stringstream
ss; ss <<
"Original request size " << iolist.size() <<
"(" << size_orig <<
" bytes)";
894 std::stringstream ss2; ss2 <<
"Quality source 1 " << q1-5 <<
", quality source 2: " << q2-5;
907 assert(size_orig == size1 + size2);
909 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Original request size " << iolist.size() <<
" (" << size_orig <<
" bytes) split into requests size " << req1.size() <<
" (" << size1 <<
" bytes) and " << req2.size() <<
" (" << size2 <<
" bytes)" << std::endl;
928 std::shared_ptr<Source>
source;
929 std::unique_ptr<XrdCl::XRootDStatus>
status(status_ptr);
930 std::unique_ptr<XrdCl::HostList> hostList(hostList_ptr);
933 std::shared_ptr<OpenHandler>
self = m_self;
936 auto manager = m_manager.lock();
945 std::unique_ptr<XrdCl::File> releaseFile;
947 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
958 source.reset(
new Source(now, std::move(m_file), excludeString));
959 m_promise.set_value(source);
963 releaseFile = std::move(m_file);
965 ex <<
"XrdCl::File::Open(name='" << manager->m_name
966 <<
"', flags=0x" << std::hex << manager->m_flags
967 <<
", permissions=0" << std::oct << manager->m_perms <<
std::dec
968 <<
") => error '" << status->ToStr()
969 <<
"' (errno=" << status->errNo <<
", code=" << status->code <<
")";
970 ex.
addContext(
"In XrdAdaptor::RequestManager::OpenHandler::HandleResponseWithHosts()");
971 manager->addConnections(ex);
973 m_promise.set_exception(std::make_exception_ptr(ex));
976 manager->handleOpen(*status, source);
982 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
986 return "(no open in progress)";
989 m_file->GetProperty(
"DataServer", dataServer);
990 if (!dataServer.size()) {
return "(unknown source)"; }
994 std::shared_future<std::shared_ptr<Source> >
997 auto manager_ptr = m_manager.lock();
1001 ex <<
"XrdCl::File::Open() =>"
1002 <<
" error: OpenHandler called within an invalid RequestManager context."
1003 <<
" This is a logic error and should be reported to the CMSSW developers.";
1004 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1008 auto self_ptr = m_self_weak.lock();
1012 ex <<
"XrdCl::File::Open() => error: "
1013 <<
"OpenHandler called after it was deleted. This is a logic error "
1014 <<
"and should be reported to the CMSSW developers.";
1015 ex.
addContext(
"Calling XrdAdapter::RequestManager::OpenHandler::open()");
1030 return m_shared_future;
1032 std::lock_guard<std::recursive_mutex> sentry(m_mutex);
1033 std::promise<std::shared_ptr<Source> > new_promise;
1034 m_promise.swap(new_promise);
1035 m_shared_future = m_promise.get_future().share();
1039 edm::LogVerbatim(
"XrdAdaptorInternal") <<
"Trying to open URL: " << new_name;
1041 XrdCl::XRootDStatus
status;
1042 if (!(status = m_file->Open(new_name, manager.
m_flags, manager.
m_perms,
this)).IsOK())
1045 ex <<
"XrdCl::File::Open(name='" << new_name
1046 <<
"', flags=0x" << std::hex << manager.
m_flags
1048 <<
") => error '" << status.ToStr()
1049 <<
"' (errno=" << status.errNo <<
", code=" << status.code <<
")";
1050 ex.
addContext(
"Calling XrdAdaptor::RequestManager::OpenHandler::open()");
1056 return m_shared_future;
std::shared_future< std::shared_ptr< Source > > open()
RequestManager(const std::string &filename, XrdCl::OpenFlags::Flags flags, XrdCl::Access::Mode perms)
#define GET_CLOCK_MONOTONIC(ts)
std::uniform_real_distribution< float > m_distribution
static void determineHostExcludeString(XrdCl::File &file, const XrdCl::HostList *hostList, std::string &exclude)
std::vector< Variable::Flags > flags
void updateSiteInfo(std::string orig_site="")
std::set< std::string > m_disabledSourceStrings
OpenHandler(std::weak_ptr< RequestManager > manager)
virtual void handleOpen(XrdCl::XRootDStatus &status, std::shared_ptr< Source >)
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
SendMonitoringInfoHandler nullHandler
static std::string const input
bool compareSources(const timespec &now, unsigned a, unsigned b)
#define XRD_ADAPTOR_CHUNK_THRESHOLD
#define XRD_ADAPTOR_SOURCE_QUALITY_FUDGE
static void SendMonitoringInfo(XrdCl::File &file)
void set_data(void *new_buffer)
void set_size(IOSize new_size)
void addConnections(cms::Exception &)
long long timeDiffMS(const timespec &a, const timespec &b)
std::set< std::shared_ptr< Source > > m_disabledSources
void set_offset(IOOffset new_offset)
static bool getDomain(const std::string &host, std::string &domain)
std::vector< std::shared_ptr< Source > > m_inactiveSources
static bool isDCachePool(XrdCl::File &file, const XrdCl::HostList *hostList=nullptr)
#define XRD_ADAPTOR_OPEN_PROBE_PERCENT
void addAdditionalInfo(std::string const &info)
timespec m_nextActiveSourceCheck
std::set< std::string > m_disabledExcludeStrings
std::shared_ptr< XrdCl::File > getActiveFile()
std::string current_source()
std::shared_ptr< OpenHandler > m_open_handler
IOOffset offset(void) const
void requestFailure(std::shared_ptr< XrdAdaptor::ClientRequest > c_ptr, XrdCl::Status &c_status)
virtual void HandleResponseWithHosts(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response, XrdCl::HostList *hostList) override
std::string prepareOpaqueString()
XrdCl::OpenFlags::Flags m_flags
virtual void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override
static std::shared_ptr< OpenHandler > getInstance(std::weak_ptr< RequestManager > manager)
static void consumeChunkFront(size_t &front, std::vector< IOPosBuffer > &input, std::vector< IOPosBuffer > &output, IOSize chunksize)
tuple idx
DEBUGGING if hasattr(process,"trackMonIterativeTracking2012"): print "trackMonIterativeTracking2012 D...
void getActiveSourceNames(std::vector< std::string > &sources)
void checkSources(timespec &now, IOSize requestSize)
std::string m_activeSites
#define XRD_ADAPTOR_LONG_OPEN_DELAY
std::vector< std::shared_ptr< Source > > m_activeSources
XrdCl::Access::Mode m_perms
void addContext(std::string const &context)
void checkSourcesImpl(timespec &now, IOSize requestSize)
static const char * getJobID()
std::shared_ptr< Source > pickSingleSource()
static bool getXrootdSiteFromURL(std::string url, std::string &site)
void getDisabledSourceNames(std::vector< std::string > &sources)
static IOSize validateList(const std::vector< IOPosBuffer > req)
void getPrettyActiveSourceNames(std::vector< std::string > &sources)
#define XRD_ADAPTOR_SHORT_OPEN_DELAY
volatile std::atomic< bool > shutdown_flag false
static void consumeChunkBack(size_t front, std::vector< IOPosBuffer > &input, std::vector< IOPosBuffer > &output, IOSize chunksize)
void splitClientRequest(const std::vector< IOPosBuffer > &iolist, std::vector< IOPosBuffer > &req1, std::vector< IOPosBuffer > &req2)
std::future< IOSize > handle(void *into, IOSize size, IOOffset off)
static bool getHostname(const std::string &id, std::string &hostname)
static std::string const source
void clearAdditionalInfo()
std::recursive_mutex m_source_mutex
bool m_nextInitialSourceToggle
timespec m_lastSourceCheck
void initialize(std::weak_ptr< RequestManager > selfref)