CMS 3D CMS Logo

XrdSource.cc
Go to the documentation of this file.
1 
2 // See http://stackoverflow.com/questions/12523122/what-is-glibcxx-use-nanosleep-all-about
3 #define _GLIBCXX_USE_NANOSLEEP
4 #include <memory>
5 
6 #include <thread>
7 #include <chrono>
8 #include <atomic>
9 #include <iostream>
10 #include <cassert>
11 #include <netdb.h>
12 
13 #include "XrdCl/XrdClFile.hh"
14 
17 
18 #include "XrdSource.h"
19 #include "XrdRequest.h"
20 #include "QualityMetric.h"
21 #include "XrdStatistics.h"
22 
23 #define MAX_REQUEST 256 * 1024
24 #define XRD_CL_MAX_CHUNK 512 * 1024
25 
26 #ifdef XRD_FAKE_SLOW
27 //#define XRD_DELAY 5140
28 #define XRD_DELAY 1000
29 #define XRD_SLOW_RATE 2
30 std::atomic<int> g_delayCount{0};
31 #else
32 std::atomic<int> g_delayCount{0};
33 #endif
34 
35 using namespace XrdAdaptor;
36 
37 // File::Close() can take awhile - slow servers (which are probably
38 // inactive anyway!) can even timeout. Rather than wait around for
39 // a few minutes in the main thread, this class asynchronously closes
40 // and deletes the XrdCl::File
41 class DelayedClose : public XrdCl::ResponseHandler {
42 public:
43  DelayedClose(const DelayedClose &) = delete;
44  DelayedClose &operator=(const DelayedClose &) = delete;
45 
46  DelayedClose(std::shared_ptr<XrdCl::File> fh, const std::string &id, const std::string &site)
47  : m_fh(std::move(fh)), m_id(id), m_site(site) {
48  if (m_fh && m_fh->IsOpen()) {
49  if (!m_fh->Close(this).IsOK()) {
50  delete this;
51  }
52  }
53  }
54 
55  ~DelayedClose() override = default;
56 
57  void HandleResponseWithHosts(XrdCl::XRootDStatus *status,
58  XrdCl::AnyObject *response,
59  XrdCl::HostList *hostList) override {
60  if (status && !status->IsOK()) {
61  edm::LogWarning("XrdFileWarning") << "Source delayed close failed with error '" << status->ToStr()
62  << "' (errno=" << status->errNo << ", code=" << status->code
63  << ", server=" << m_id << ", site=" << m_site << ")";
64  }
65  delete status;
66  delete hostList;
67  // NOTE: we do not delete response (copying behavior from XrdCl).
68  delete this;
69  }
70 
71 private:
75 };
76 
82 class QueryAttrHandler : public XrdCl::ResponseHandler {
83  friend std::unique_ptr<QueryAttrHandler> std::make_unique<QueryAttrHandler>();
84 
85 public:
86  ~QueryAttrHandler() override = default;
87  QueryAttrHandler(const QueryAttrHandler &) = delete;
88  QueryAttrHandler &operator=(const QueryAttrHandler &) = delete;
89 
90  static XrdCl::XRootDStatus query(XrdCl::FileSystem &fs,
91  const std::string &attr,
92  std::chrono::milliseconds timeout,
94  auto handler = std::make_unique<QueryAttrHandler>();
95  auto l_state = std::make_shared<QueryAttrState>();
96  handler->m_state = l_state;
97  XrdCl::Buffer arg(attr.size());
98  arg.FromString(attr);
99 
100  XrdCl::XRootDStatus st = fs.Query(XrdCl::QueryCode::Config, arg, handler.get());
101  if (!st.IsOK()) {
102  return st;
103  }
104 
105  // Successfully registered the callback; it will always delete itself, so we shouldn't.
106  handler.release();
107 
108  std::unique_lock<std::mutex> guard(l_state->m_mutex);
109  // Wait until some status is available or a timeout.
110  l_state->m_condvar.wait_for(guard, timeout, [&] { return l_state->m_status.get(); });
111 
112  if (l_state->m_status) {
113  if (l_state->m_status->IsOK()) {
114  result = l_state->m_response->ToString();
115  }
116  return *(l_state->m_status);
117  } else { // We had a timeout; construct a reasonable message.
118  return XrdCl::XRootDStatus(
119  XrdCl::stError, XrdCl::errSocketTimeout, 1, "Timeout when waiting for query callback.");
120  }
121  }
122 
123 private:
125 
126  void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override {
127  // NOTE: we own the status and response pointers.
128  std::unique_ptr<XrdCl::AnyObject> response_mgr;
129  response_mgr.reset(response);
130 
131  // Lock our state information then dispose of our object.
132  auto l_state = m_state.lock();
133  delete this;
134  if (!l_state) {
135  return;
136  }
137 
138  // On function exit, notify any waiting threads.
139  std::unique_ptr<char, std::function<void(char *)>> notify_guard(nullptr,
140  [&](char *) { l_state->m_condvar.notify_all(); });
141 
142  {
143  // On exit from the block, make sure m_status is set; it needs to be set before we notify threads.
144  std::unique_ptr<char, std::function<void(char *)>> exit_guard(nullptr, [&](char *) {
145  if (!l_state->m_status)
146  l_state->m_status = std::make_unique<XrdCl::XRootDStatus>(XrdCl::stError, XrdCl::errInternal);
147  });
148  if (!status) {
149  return;
150  }
151  if (status->IsOK()) {
152  if (!response) {
153  return;
154  }
155  XrdCl::Buffer *buf_ptr;
156  response->Get(buf_ptr);
157  // AnyObject::Set lacks specialization for nullptr
158  response->Set(static_cast<int *>(nullptr));
159  l_state->m_response.reset(buf_ptr);
160  }
161  l_state->m_status.reset(status);
162  }
163  }
164 
165  // Represents the current state of the callback. The parent class only manages a weak_ptr
166  // to the state. If the asynchronous callback cannot lock the weak_ptr, then it assumes the
167  // main thread has given up and doesn't touch any of the state variables.
168  struct QueryAttrState {
169  // Synchronize between the callback thread and the main thread; condvar predicate
170  // is having m_status set. m_mutex protects m_status.
172  std::condition_variable m_condvar;
173 
174  // Results from the server
175  std::unique_ptr<XrdCl::XRootDStatus> m_status;
176  std::unique_ptr<XrdCl::Buffer> m_response;
177  };
178  std::weak_ptr<QueryAttrState> m_state;
179 };
180 
181 Source::Source(timespec now, std::unique_ptr<XrdCl::File> fh, const std::string &exclude)
182  : m_lastDowngrade({0, 0}),
183  m_id("(unknown)"),
184  m_exclude(exclude),
185  m_fh(std::move(fh)),
186  m_stats(nullptr)
187 #ifdef XRD_FAKE_SLOW
188  ,
189  m_slow(++g_delayCount % XRD_SLOW_RATE == 0)
190 //, m_slow(++g_delayCount >= XRD_SLOW_RATE)
191 //, m_slow(true)
192 #endif
193 {
194  if (m_fh.get()) {
195  if (!m_fh->GetProperty("DataServer", m_id)) {
196  edm::LogWarning("XrdFileWarning") << "Source::Source() failed to determine data server name.'";
197  }
198  if (m_exclude.empty()) {
199  m_exclude = m_id;
200  }
201  }
202  m_qm = QualityMetricFactory::get(now, m_id);
203  m_prettyid = m_id + " (unknown site)";
204  std::string domain_id;
205  if (getDomain(m_id, domain_id)) {
206  m_site = domain_id;
207  } else {
208  m_site = "Unknown (" + m_id + ")";
209  }
210  setXrootdSite();
211  assert(m_qm.get());
212  assert(m_fh.get());
214  if (statsService) {
215  m_stats = statsService->getStatisticsForSite(m_site);
216  }
217 }
218 
219 bool Source::getHostname(const std::string &id, std::string &hostname) {
220  size_t pos = id.find(':');
221  hostname = id;
222  if ((pos != std::string::npos) && (pos > 0)) {
223  hostname = id.substr(0, pos);
224  }
225 
226  bool retval = true;
227  if (!hostname.empty() && ((hostname[0] == '[') || isdigit(hostname[0]))) {
228  retval = false;
229  struct addrinfo hints;
230  memset(&hints, 0, sizeof(struct addrinfo));
231  hints.ai_family = AF_UNSPEC;
232  struct addrinfo *result;
233  if (!getaddrinfo(hostname.c_str(), nullptr, &hints, &result)) {
234  std::vector<char> host;
235  host.reserve(256);
236  if (!getnameinfo(result->ai_addr, result->ai_addrlen, &host[0], 255, nullptr, 0, NI_NAMEREQD)) {
237  hostname = &host[0];
238  retval = true;
239  }
240  freeaddrinfo(result);
241  }
242  }
243  return retval;
244 }
245 
247  getHostname(host, domain);
248  size_t pos = domain.find('.');
249  if (pos != std::string::npos && (pos < domain.size())) {
250  domain = domain.substr(pos + 1);
251  }
252 
253  return !domain.empty();
254 }
255 
256 bool Source::isDCachePool(XrdCl::File &file, const XrdCl::HostList *hostList) {
257  // WORKAROUND: On open-file recovery in the Xrootd client, it'll carry around the
258  // dCache opaque information to other sites, causing isDCachePool to erroneously return
259  // true. We are working with the upstream developers to solve this.
260  //
261  // For now, we see if the previous server also looks like a dCache pool - something that
262  // wouldn't happen at a real site, as the previous server should look like a dCache door.
263  std::string lastUrl;
264  file.GetProperty("LastURL", lastUrl);
265  if (!lastUrl.empty()) {
266  bool result = isDCachePool(lastUrl);
267  if (result && hostList && (hostList->size() > 1)) {
268  if (isDCachePool((*hostList)[hostList->size() - 2].url.GetURL())) {
269  return false;
270  }
271  return true;
272  }
273  return result;
274  }
275  return false;
276 }
277 
278 bool Source::isDCachePool(const std::string &lastUrl) {
279  XrdCl::URL url(lastUrl);
280  XrdCl::URL::ParamsMap map = url.GetParams();
281  // dCache pools always utilize this opaque identifier.
282  if (map.find("org.dcache.uuid") != map.end()) {
283  return true;
284  }
285  return false;
286 }
287 
288 void Source::determineHostExcludeString(XrdCl::File &file, const XrdCl::HostList *hostList, std::string &exclude) {
289  // Detect a dCache pool and, if we are in the federation context, give a custom
290  // exclude parameter.
291  // We assume this is a federation context if there's at least a regional, dCache door,
292  // and dCache pool server (so, more than 2 servers!).
293 
294  exclude = "";
295  if (hostList && (hostList->size() > 3) && isDCachePool(file, hostList)) {
296  const XrdCl::HostInfo &info = (*hostList)[hostList->size() - 3];
297  exclude = info.url.GetHostName();
298  std::string lastUrl;
299  file.GetProperty("LastURL", lastUrl);
300  edm::LogVerbatim("XrdAdaptorInternal") << "Changing exclude list for URL " << lastUrl << " to " << exclude;
301  }
302 }
303 
305  std::string lastUrl;
306  fh.GetProperty("LastURL", lastUrl);
307  if (lastUrl.empty() || isDCachePool(lastUrl)) {
309  if (!fh.GetProperty("DataServer", server)) {
310  id = "(unknown)";
311  } else {
312  id = server;
313  }
314  if (lastUrl.empty()) {
315  edm::LogWarning("XrdFileWarning") << "Unable to determine the URL associated with server " << id;
316  }
317  site = "Unknown";
318  if (!server.empty()) {
319  getDomain(server, site);
320  }
321  return false;
322  }
323  return getXrootdSiteFromURL(lastUrl, site);
324 }
325 
327  const std::string attr = "sitename";
328  XrdCl::Buffer *response = nullptr;
329  XrdCl::Buffer arg(attr.size());
330  arg.FromString(attr);
331 
332  XrdCl::FileSystem fs(url);
333  std::string rsite;
334  XrdCl::XRootDStatus st = QueryAttrHandler::query(fs, "sitename", std::chrono::seconds(1), rsite);
335  if (!st.IsOK()) {
336  XrdCl::URL xurl(url);
337  getDomain(xurl.GetHostName(), site);
338  delete response;
339  return false;
340  }
341  if (!rsite.empty() && (rsite[rsite.size() - 1] == '\n')) {
342  rsite = rsite.substr(0, rsite.size() - 1);
343  }
344  if (rsite == "sitename") {
345  XrdCl::URL xurl(url);
346  getDomain(xurl.GetHostName(), site);
347  return false;
348  }
349  site = rsite;
350  return true;
351 }
352 
354  std::string site;
355  bool goodSitename = getXrootdSite(*m_fh, site);
356  if (!goodSitename) {
357  edm::LogInfo("XrdAdaptorInternal") << "Xrootd server at " << m_id
358  << " did not provide a sitename. Monitoring may be incomplete.";
359  } else {
360  m_site = site;
361  m_prettyid = m_id + " (site " + m_site + ")";
362  }
363  edm::LogInfo("XrdAdaptorInternal") << "Reading from new server " << m_id << " at site " << m_site;
364 }
365 
367 
368 std::shared_ptr<XrdCl::File> Source::getFileHandle() { return fh(); }
369 
370 static void validateList(const XrdCl::ChunkList &cl) {
371  off_t last_offset = -1;
372  for (const auto &ci : cl) {
373  assert(static_cast<off_t>(ci.offset) > last_offset);
374  last_offset = ci.offset;
375  assert(ci.length <= XRD_CL_MAX_CHUNK);
376  assert(ci.offset < 0x1ffffffffff);
377  assert(ci.offset > 0);
378  }
379  assert(cl.size() <= 1024);
380 }
381 
382 void Source::handle(std::shared_ptr<ClientRequest> c) {
383  edm::LogVerbatim("XrdAdaptorInternal") << "Reading from " << ID() << ", quality " << m_qm->get() << std::endl;
384  c->m_source = shared_from_this();
385  c->m_self_reference = c;
386  m_qm->startWatch(c->m_qmw);
387  if (m_stats) {
388  std::shared_ptr<XrdReadStatistics> readStats = XrdSiteStatistics::startRead(stats(), c);
389  c->setStatistics(readStats);
390  }
391 #ifdef XRD_FAKE_SLOW
392  if (m_slow)
393  std::this_thread::sleep_for(std::chrono::milliseconds(XRD_DELAY));
394 #endif
395 
396  XrdCl::XRootDStatus status;
397  if (c->m_into) {
398  // See notes in ClientRequest definition to understand this voodoo.
399  status = m_fh->Read(c->m_off, c->m_size, c->m_into, c.get());
400  } else {
401  XrdCl::ChunkList cl;
402  cl.reserve(c->m_iolist->size());
403  for (const auto &it : *c->m_iolist) {
404  cl.emplace_back(it.offset(), it.size(), it.data());
405  }
406  validateList(cl);
407  status = m_fh->VectorRead(cl, nullptr, c.get());
408  }
409 
410  if (!status.IsOK()) {
412  ex << "XrdFile::Read or XrdFile::VectorRead failed with error: '" << status.ToStr() << "' (errNo = " << status.errNo
413  << ")";
414  ex.addContext("Calling Source::handle");
415  throw ex;
416  }
417 }
XrdAdaptor::XrdSiteStatisticsInformation
Definition: XrdStatistics.h:55
XrdAdaptor::XrdSiteStatisticsInformation::getInstance
static XrdSiteStatisticsInformation * getInstance()
Definition: XrdStatistics.cc:86
XrdAdaptor::Source::m_id
std::string m_id
Definition: XrdSource.h:77
XrdAdaptor::Source::Source
Source(const Source &)=delete
g_delayCount
std::atomic< int > g_delayCount
Definition: XrdSource.cc:32
XrdAdaptor::Source::stats
std::shared_ptr< XrdSiteStatistics const > stats() const
Definition: XrdSource.h:73
QueryAttrHandler::QueryAttrHandler
QueryAttrHandler()
Definition: XrdSource.cc:124
XrdAdaptor::Source::determineHostExcludeString
static void determineHostExcludeString(XrdCl::File &file, const XrdCl::HostList *hostList, std::string &exclude)
Definition: XrdSource.cc:288
DelayedClose::DelayedClose
DelayedClose(std::shared_ptr< XrdCl::File > fh, const std::string &id, const std::string &site)
Definition: XrdSource.cc:46
relmon_authenticated_wget.url
url
Definition: relmon_authenticated_wget.py:22
MessageLogger.h
XrdStatistics.h
XrdAdaptor::Source::m_site
std::string m_site
Definition: XrdSource.h:79
cms::Exception::addContext
void addContext(std::string const &context)
Definition: Exception.cc:165
QueryAttrHandler::QueryAttrState::m_response
std::unique_ptr< XrdCl::Buffer > m_response
Definition: XrdSource.cc:176
XrdAdaptor::Source::m_qm
edm::propagate_const< std::unique_ptr< QualityMetricSource > > m_qm
Definition: XrdSource.h:83
submitPVValidationJobs.now
now
Definition: submitPVValidationJobs.py:639
mps_update.status
status
Definition: mps_update.py:68
QueryAttrHandler::QueryAttrState
Definition: XrdSource.cc:168
XrdAdaptor::Source::fh
std::shared_ptr< XrdCl::File const > fh() const
Definition: XrdSource.h:71
pos
Definition: PixelAliasList.h:18
XrdSource.h
validateList
static void validateList(const XrdCl::ChunkList &cl)
Definition: XrdSource.cc:370
cms::cuda::assert
assert(be >=bs)
info
static const TGPicture * info(bool iBackgroundIsBlack)
Definition: FWCollectionSummaryWidget.cc:153
XrdAdaptor::Source::m_stats
edm::propagate_const< std::shared_ptr< XrdSiteStatistics > > m_stats
Definition: XrdSource.h:84
XrdAdaptor::Source::handle
void handle(std::shared_ptr< ClientRequest >)
Definition: XrdSource.cc:382
edm::LogInfo
Log< level::Info, false > LogInfo
Definition: MessageLogger.h:125
query.host
string host
Definition: query.py:115
edm::LogWarning
Log< level::Warning, false > LogWarning
Definition: MessageLogger.h:122
edm::Exception
Definition: EDMException.h:77
EDMException.h
GetRecoTauVFromDQM_MC_cff.cl
cl
Definition: GetRecoTauVFromDQM_MC_cff.py:38
edm::propagate_const
Definition: propagate_const.h:32
XrdAdaptor::Source::m_fh
edm::propagate_const< std::shared_ptr< XrdCl::File > > m_fh
Definition: XrdSource.h:81
XrdAdaptor::XrdSiteStatistics::startRead
static std::shared_ptr< XrdReadStatistics > startRead(std::shared_ptr< XrdSiteStatistics > parent, std::shared_ptr< ClientRequest > req)
Definition: XrdStatistics.cc:110
seconds
double seconds()
QueryAttrHandler::query
static XrdCl::XRootDStatus query(XrdCl::FileSystem &fs, const std::string &attr, std::chrono::milliseconds timeout, std::string &result)
Definition: XrdSource.cc:90
XrdAdaptor::Source::getDomain
static bool getDomain(const std::string &host, std::string &domain)
Definition: XrdSource.cc:246
XRD_CL_MAX_CHUNK
#define XRD_CL_MAX_CHUNK
Definition: XrdSource.cc:24
DelayedClose::m_fh
edm::propagate_const< std::shared_ptr< XrdCl::File > > m_fh
Definition: XrdSource.cc:72
geometryDiff.file
file
Definition: geometryDiff.py:13
XrdAdaptor::Source::isDCachePool
static bool isDCachePool(XrdCl::File &file, const XrdCl::HostList *hostList=nullptr)
Definition: XrdSource.cc:256
XrdRequest.h
DelayedClose::m_id
std::string m_id
Definition: XrdSource.cc:73
contentValuesFiles.server
server
Definition: contentValuesFiles.py:37
XrdAdaptor::Source::getXrootdSiteFromURL
static bool getXrootdSiteFromURL(std::string url, std::string &site)
Definition: XrdSource.cc:326
mutex
static std::mutex mutex
Definition: Proxy.cc:8
QueryAttrHandler::HandleResponse
void HandleResponse(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response) override
Definition: XrdSource.cc:126
QueryAttrHandler::QueryAttrState::m_condvar
std::condition_variable m_condvar
Definition: XrdSource.cc:172
timeout
Definition: timeout.py:1
XrdAdaptor::Source::m_prettyid
std::string m_prettyid
Definition: XrdSource.h:78
AlCaHLTBitMon_QueryRunRegistry.string
string string
Definition: AlCaHLTBitMon_QueryRunRegistry.py:256
DelayedClose::m_site
std::string m_site
Definition: XrdSource.cc:74
QualityMetric.h
cms::cuda::device::unique_ptr
std::unique_ptr< T, impl::DeviceDeleter > unique_ptr
Definition: device_unique_ptr.h:33
VtxSmearedBeamProfile_cfi.File
File
Definition: VtxSmearedBeamProfile_cfi.py:30
eostools.move
def move(src, dest)
Definition: eostools.py:511
XrdAdaptor::QualityMetricFactory::get
static std::unique_ptr< QualityMetricSource > get(timespec now, const std::string &id)
Definition: QualityMetric.cc:134
std
Definition: JetResolutionObject.h:76
edm::LogVerbatim
Log< level::Info, true > LogVerbatim
Definition: MessageLogger.h:128
triggerObjects_cff.id
id
Definition: triggerObjects_cff.py:29
XrdAdaptor::Source::getXrootdSite
static bool getXrootdSite(XrdCl::File &file, std::string &site)
Definition: XrdSource.cc:304
HiBiasedCentrality_cfi.function
function
Definition: HiBiasedCentrality_cfi.py:4
XrdAdaptor::Source::getFileHandle
std::shared_ptr< XrdCl::File > getFileHandle()
Definition: XrdSource.cc:368
funct::void
TEMPL(T2) struct Divides void
Definition: Factorize.h:24
DelayedClose
Definition: XrdSource.cc:41
funct::arg
A arg
Definition: Factorize.h:31
hcal_runs.URL
URL
Definition: hcal_runs.py:4
XrdAdaptor::Source::setXrootdSite
void setXrootdSite()
Definition: XrdSource.cc:353
mps_fire.result
result
Definition: mps_fire.py:311
genParticles_cff.map
map
Definition: genParticles_cff.py:11
XrdAdaptor::Source::ID
const std::string & ID() const
Definition: XrdSource.h:38
c
auto & c
Definition: CAHitNtupletGeneratorKernelsImpl.h:56
QueryAttrHandler::m_state
std::weak_ptr< QueryAttrState > m_state
Definition: XrdSource.cc:178
edm::errors::FileReadError
Definition: EDMException.h:50
DelayedClose::HandleResponseWithHosts
void HandleResponseWithHosts(XrdCl::XRootDStatus *status, XrdCl::AnyObject *response, XrdCl::HostList *hostList) override
Definition: XrdSource.cc:57
QueryAttrHandler
Definition: XrdSource.cc:82
XrdAdaptor::Source::getHostname
static bool getHostname(const std::string &id, std::string &hostname)
Definition: XrdSource.cc:219
XrdAdaptor
Definition: QualityMetric.h:14
QueryAttrHandler::QueryAttrState::m_status
std::unique_ptr< XrdCl::XRootDStatus > m_status
Definition: XrdSource.cc:175
helper.Config
Config
Definition: helper.py:10
QueryAttrHandler::QueryAttrState::m_mutex
std::mutex m_mutex
Definition: XrdSource.cc:171
XrdAdaptor::Source::~Source
~Source()
Definition: XrdSource.cc:366