CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
4 
5 #include <boost/algorithm/string.hpp>
6 #include <boost/algorithm/string/replace.hpp>
7 #include <boost/property_tree/json_parser.hpp>
8 
9 #include <filesystem>
10 #include <cstdlib>
11 #include <stdexcept>
12 #include <fstream>
13 #include <sstream>
14 
15 namespace pt = boost::property_tree;
16 
17 namespace {
18 
19  std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
20  std::string result = outputFormat;
21  std::stringstream str;
22 
23  for (size_t i = 1; i < matches.size(); ++i) {
24  str.str("");
25  str << "$" << i;
26  std::string const matchedString(matches[i].first, matches[i].second);
27  if (!matchedString.empty())
28  boost::algorithm::replace_all(result, str.str(), matchedString);
29  }
30  return result;
31  }
32 
33  constexpr char const* const kEmptyString = "";
34  constexpr char const* const kLFNPrefix = "/store/";
35 
36  const char* safe(const char* iCheck) {
37  if (iCheck == nullptr) {
38  return kEmptyString;
39  }
40  return iCheck;
41  }
42 
43 } // namespace
44 
45 namespace pt = boost::property_tree;
46 
47 namespace edm {
48 
49  FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
50  init_trivialCatalog(catUrl, iCatalog);
51  }
52 
54  unsigned iCatalog,
55  std::string const& storageDescriptionPath)
56  : m_destination("any") {
57  init(catAttr, iCatalog, storageDescriptionPath);
58  }
59 
61 
63  if (catType == edm::CatalogType::TrivialCatalog)
64  return convert(ilfn, m_directRules_trivialCatalog, true);
65  return convert(ilfn, m_directRules, true);
66  }
67 
69  std::string out = "";
70  //check if input is an authentic LFN
71  if (input.compare(0, 7, kLFNPrefix) != 0)
72  return out;
73  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
75  if (!out.empty()) {
76  return out;
77  }
78  }
79  return out;
80  }
81 
82  void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
83  if (!ruleElement) {
84  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
85  }
86 
87  auto const protocol = safe(ruleElement->Attribute("protocol"));
88  auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
89  if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
90  destinationMatchRegexp = ".*";
91  }
92 
93  auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
94  auto const result = safe(ruleElement->Attribute("result"));
95  auto const chain = safe(ruleElement->Attribute("chain"));
96 
97  Rule rule;
98  rule.pathMatch.assign(pathMatchRegexp);
99  rule.destinationMatch.assign(destinationMatchRegexp);
100  rule.result = result;
101  rule.chain = chain;
102  rules[protocol].emplace_back(std::move(rule));
103  }
104 
106  std::string const& protocol,
107  ProtocolRules& rules) {
108  if (storageRule.second.empty()) {
109  throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
110  }
111  auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
112  auto const result = storageRule.second.get<std::string>("pfn");
113  auto const chain = storageRule.second.get("chain", kEmptyString);
114  Rule rule;
115  rule.pathMatch.assign(pathMatchRegexp);
116  rule.destinationMatch.assign(".*");
117  rule.result = result;
118  rule.chain = chain;
119  rules[protocol].emplace_back(std::move(rule));
120  }
121 
122  void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
123  std::string url = catUrl;
124  if (url.empty()) {
125  Service<SiteLocalConfig> localconfservice;
126  if (!localconfservice.isAvailable())
127  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
128  if (iCatalog >= localconfservice->trivialDataCatalogs().size())
129  throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
130  url = localconfservice->trivialDataCatalogs()[iCatalog];
131  }
132 
133  if (url.find("file:") == std::string::npos) {
134  throw cms::Exception("TrivialFileCatalog",
135  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
136  }
137 
138  url = url.erase(0, url.find(':') + 1);
139 
140  std::vector<std::string> tokens;
141  boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
142  m_filename = tokens[0];
143 
144  if (tokens.size() == 2) {
145  std::string const options = tokens[1];
146  std::vector<std::string> optionTokens;
147  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
148 
149  std::string const equalSign("=");
150  std::string const comma(",");
151 
152  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
153  std::string const option = optionTokens[oi];
154  std::vector<std::string> argTokens;
155  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
156 
157  if (argTokens.size() != 2) {
158  throw cms::Exception("TrivialFileCatalog",
159  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
160  }
161 
162  if (argTokens[0] == "protocol") {
163  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
164  } else if (argTokens[0] == "destination") {
165  m_destination = argTokens[1];
166  }
167  }
168  }
169 
170  if (m_protocols.empty()) {
171  throw cms::Exception("TrivialFileCatalog",
172  "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
173  }
174 
175  std::ifstream configFile;
176  configFile.open(m_filename.c_str());
177 
178  if (!configFile.good() || !configFile.is_open()) {
179  throw cms::Exception("TrivialFileCatalog",
180  "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
181  }
182 
183  configFile.close();
184 
185  tinyxml2::XMLDocument doc;
186  auto loadErr = doc.LoadFile(m_filename.c_str());
187  if (loadErr != tinyxml2::XML_SUCCESS) {
188  throw cms::Exception("TrivialFileCatalog")
189  << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
190  }
191  /* trivialFileCatalog matches the following xml schema
192  FIXME: write a proper DTD
193  <storage-mapping>
194  <lfn-to-pfn protocol="direct" destination-match=".*"
195  path-match="lfn/guid match regular expression"
196  result="/castor/cern.ch/cms/$1"/>
197  <pfn-to-lfn protocol="srm"
198  path-match="lfn/guid match regular expression"
199  result="$1"/>
200  </storage-mapping>
201  */
202  auto rootElement = doc.RootElement();
203  /*first of all do the lfn-to-pfn bit*/
204  for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
205  el = el->NextSiblingElement("lfn-to-pfn")) {
207  }
208 
209  /*Then we handle the pfn-to-lfn bit*/
210  for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
211  el = el->NextSiblingElement("pfn-to-lfn")) {
213  }
214  }
215 
216  void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
217  unsigned iCatalog,
218  std::string const& storageDescriptionPath) {
219  Service<SiteLocalConfig> localconfservice;
220  edm::CatalogAttributes aCatalog = input_dataCatalog;
221  if (input_dataCatalog.empty()) {
222  if (!localconfservice.isAvailable()) {
223  cms::Exception ex("FileCatalog");
224  ex << "edm::SiteLocalConfigService is not available";
225  ex.addContext("Calling edm::FileLocator::init()");
226  throw ex;
227  }
228  if (iCatalog >= localconfservice->dataCatalogs().size()) {
229  cms::Exception ex("FileCatalog");
230  ex << "Request nonexistence data catalog";
231  ex.addContext("Calling edm::FileLocator::init()");
232  throw ex;
233  }
234  aCatalog = localconfservice->dataCatalogs()[iCatalog];
235  }
236 
237  std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
238 
239  //use path to storage description from input parameter
240  if (!storageDescriptionPath.empty())
241  filename_storage = storageDescriptionPath;
242 
243  //now read json
244  pt::ptree json;
245  try {
246  boost::property_tree::read_json(filename_storage.string(), json);
247  } catch (std::exception& e) {
248  cms::Exception ex("FileCatalog");
249  ex << "Can not open storage.json (" << filename_storage.string()
250  << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
251  ex.addContext("edm::FileLocator:init()");
252  throw ex;
253  }
254  auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
255  //get site name
256  std::string siteName = site.second.get("site", kEmptyString);
257  //get volume name
258  std::string volName = site.second.get("volume", kEmptyString);
259  return aCatalog.storageSite == siteName && aCatalog.volume == volName;
260  });
261 
262  //let enforce that site-local-config.xml and storage.json contains valid catalogs in <data-access>, in which site defined in site-local-config.xml <data-access> should be found in storage.json
263  if (found_site == json.end()) {
264  cms::Exception ex("FileCatalog");
265  ex << "Can not find storage site \"" << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
266  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
267  ex.addContext("edm::FileLocator:init()");
268  throw ex;
269  }
270 
271  const pt::ptree& protocols = found_site->second.find("protocols")->second;
272  auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
273  std::string protName = protocol.second.get("protocol", kEmptyString);
274  return aCatalog.protocol == protName;
275  });
276 
277  //let enforce that site-local-config.xml and storage.json contains valid catalogs, in which protocol defined in site-local-config.xml <data-access> should be found in storage.json
278  if (found_protocol == protocols.end()) {
279  cms::Exception ex("FileCatalog");
280  ex << "Can not find protocol \"" << aCatalog.protocol << "\" for the storage site \"" << aCatalog.storageSite
281  << "\" and volume \"" << aCatalog.volume
282  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
283  ex.addContext("edm::FileLocator:init()");
284  throw ex;
285  }
286 
287  std::string protName = found_protocol->second.get("protocol", kEmptyString);
288  m_protocols.push_back(protName);
289 
290  //store all prefixes and rules to m_directRules. We need to do this so that "applyRules" can find the rule in case chaining is used
291  //loop over protocols
292  for (pt::ptree::value_type const& protocol : protocols) {
293  std::string protName = protocol.second.get("protocol", kEmptyString);
294  //loop over rules
295  std::string prefixTmp = protocol.second.get("prefix", kEmptyString);
296  if (prefixTmp == kEmptyString) {
297  const pt::ptree& rules = protocol.second.find("rules")->second;
298  for (pt::ptree::value_type const& storageRule : rules) {
299  parseRule(storageRule, protName, m_directRules);
300  }
301  }
302  //now convert prefix to a rule and save it
303  else {
304  Rule rule;
305  rule.pathMatch.assign("/?(.*)");
306  rule.destinationMatch.assign(".*");
307  rule.result = prefixTmp + "/$1";
308  rule.chain = kEmptyString;
309  m_directRules[protName].emplace_back(std::move(rule));
310  }
311  }
312  }
313 
315  std::string const& protocol,
316  std::string const& destination,
317  bool direct,
318  std::string name) const {
319  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
320  if (rulesIterator == protocolRules.end()) {
321  return "";
322  }
323 
324  Rules const& rules = (*(rulesIterator)).second;
325 
326  std::smatch destinationMatches;
327  std::smatch nameMatches;
328 
329  /* Look up for a matching rule*/
330  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
331  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
332  continue;
333  }
334 
335  if (!std::regex_match(name, i->pathMatch)) {
336  continue;
337  }
338 
339  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
340 
341  std::string const chain = i->chain;
342  if ((direct == true) && (!chain.empty())) {
343  name = applyRules(protocolRules, chain, destination, direct, name);
344  if (name.empty()) {
345  return "";
346  }
347  }
348 
349  std::regex_match(name, nameMatches, i->pathMatch);
350  name = replaceWithRegexp(nameMatches, i->result);
351 
352  if ((direct == false) && (!chain.empty())) {
353  name = applyRules(protocolRules, chain, destination, direct, name);
354  }
355  return name;
356  }
357  return "";
358  }
359 } // namespace edm
ProtocolRules m_directRules_trivialCatalog
Definition: FileLocator.h:64
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:42
FileLocator(edm::CatalogAttributes const &catAttr, unsigned iCatalog=0, std::string const &storageDescriptionPath=std::string())
Definition: FileLocator.cc:53
std::string m_destination
Definition: FileLocator.h:76
std::string pfn(std::string const &ilfn, edm::CatalogType catType) const
Definition: FileLocator.cc:62
nlohmann::json json
static std::string const input
Definition: EdmProvDump.cc:50
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:66
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::string m_filename
Definition: FileLocator.h:71
void init_trivialCatalog(std::string const &catUrl, unsigned iCatalog)
Definition: FileLocator.cc:122
void init(edm::CatalogAttributes const &input_dataCatalog, unsigned iCatalog, std::string const &storageDescriptionPath)
Definition: FileLocator.cc:216
void addContext(std::string const &context)
Definition: Exception.cc:169
ProtocolRules m_directRules
Definition: FileLocator.h:68
static const char kEmptyString[1]
std::vector< Rule > Rules
Definition: FileLocator.h:41
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:75
void parseRuleTrivialCatalog(tinyxml2::XMLElement *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:82
void parseRule(boost::property_tree::ptree::value_type const &storageRule, std::string const &protocol, ProtocolRules &rules)
Definition: FileLocator.cc:105
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:68
bool isAvailable() const
Definition: Service.h:40
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:314
std::regex destinationMatch
Definition: FileLocator.h:36
#define str(s)
def move(src, dest)
Definition: eostools.py:511