CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
4 
5 #include <boost/algorithm/string.hpp>
6 #include <boost/algorithm/string/replace.hpp>
7 #include <boost/property_tree/json_parser.hpp>
8 
9 #include <filesystem>
10 #include <cstdlib>
11 #include <stdexcept>
12 #include <fstream>
13 #include <sstream>
14 
15 namespace pt = boost::property_tree;
16 
17 namespace {
18 
19  std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
20  std::string result = outputFormat;
21  std::stringstream str;
22 
23  for (size_t i = 1; i < matches.size(); ++i) {
24  str.str("");
25  str << "$" << i;
26  std::string const matchedString(matches[i].first, matches[i].second);
27  if (!matchedString.empty())
28  boost::algorithm::replace_all(result, str.str(), matchedString);
29  }
30  return result;
31  }
32 
33  constexpr char const* const kEmptyString = "";
34  constexpr char const* const kLFNPrefix = "/store/";
35 
36  const char* safe(const char* iCheck) {
37  if (iCheck == nullptr) {
38  return kEmptyString;
39  }
40  return iCheck;
41  }
42 
43 } // namespace
44 
45 namespace pt = boost::property_tree;
46 
47 namespace edm {
48 
49  FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
50  init_trivialCatalog(catUrl, iCatalog);
51  }
52 
54  unsigned iCatalog,
55  std::string const& storageDescriptionPath)
56  : m_destination("any") {
57  init(catAttr, iCatalog, storageDescriptionPath);
58  }
59 
61 
63  if (catType == edm::CatalogType::TrivialCatalog)
64  return convert(ilfn, m_directRules_trivialCatalog, true);
65  return convert(ilfn, m_directRules, true);
66  }
67 
69  std::string out = "";
70  //check if input is an authentic LFN
71  if (input.compare(0, 7, kLFNPrefix) != 0)
72  return out;
73  //use prefix in the protocol
74  if (!m_prefix.empty()) {
75  out = m_prefix + "/" + input;
76  if (input[0] == '/')
77  out = m_prefix + input;
78  return out;
79  }
80  //no prefix in the protocol, use rule
81  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
83  if (!out.empty()) {
84  return out;
85  }
86  }
87  return out;
88  }
89 
90  void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
91  if (!ruleElement) {
92  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
93  }
94 
95  auto const protocol = safe(ruleElement->Attribute("protocol"));
96  auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
97  if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
98  destinationMatchRegexp = ".*";
99  }
100 
101  auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
102  auto const result = safe(ruleElement->Attribute("result"));
103  auto const chain = safe(ruleElement->Attribute("chain"));
104 
105  Rule rule;
106  rule.pathMatch.assign(pathMatchRegexp);
107  rule.destinationMatch.assign(destinationMatchRegexp);
108  rule.result = result;
109  rule.chain = chain;
110  rules[protocol].emplace_back(std::move(rule));
111  }
112 
114  std::string const& protocol,
115  ProtocolRules& rules) {
116  if (storageRule.second.empty()) {
117  throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
118  }
119  auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
120  auto const result = storageRule.second.get<std::string>("pfn");
121  Rule rule;
122  rule.pathMatch.assign(pathMatchRegexp);
123  rule.destinationMatch.assign(".*");
124  rule.result = result;
125  rule.chain = "";
126  rules[protocol].emplace_back(std::move(rule));
127  }
128 
129  void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
130  std::string url = catUrl;
131  if (url.empty()) {
132  Service<SiteLocalConfig> localconfservice;
133  if (!localconfservice.isAvailable())
134  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
135  if (iCatalog >= localconfservice->trivialDataCatalogs().size())
136  throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
137  url = localconfservice->trivialDataCatalogs()[iCatalog];
138  }
139 
140  if (url.find("file:") == std::string::npos) {
141  throw cms::Exception("TrivialFileCatalog",
142  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
143  }
144 
145  url = url.erase(0, url.find(':') + 1);
146 
147  std::vector<std::string> tokens;
148  boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
149  m_filename = tokens[0];
150 
151  if (tokens.size() == 2) {
152  std::string const options = tokens[1];
153  std::vector<std::string> optionTokens;
154  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
155 
156  std::string const equalSign("=");
157  std::string const comma(",");
158 
159  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
160  std::string const option = optionTokens[oi];
161  std::vector<std::string> argTokens;
162  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
163 
164  if (argTokens.size() != 2) {
165  throw cms::Exception("TrivialFileCatalog",
166  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
167  }
168 
169  if (argTokens[0] == "protocol") {
170  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
171  } else if (argTokens[0] == "destination") {
172  m_destination = argTokens[1];
173  }
174  }
175  }
176 
177  if (m_protocols.empty()) {
178  throw cms::Exception("TrivialFileCatalog",
179  "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
180  }
181 
182  std::ifstream configFile;
183  configFile.open(m_filename.c_str());
184 
185  if (!configFile.good() || !configFile.is_open()) {
186  throw cms::Exception("TrivialFileCatalog",
187  "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
188  }
189 
190  configFile.close();
191 
192  tinyxml2::XMLDocument doc;
193  auto loadErr = doc.LoadFile(m_filename.c_str());
194  if (loadErr != tinyxml2::XML_SUCCESS) {
195  throw cms::Exception("TrivialFileCatalog")
196  << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
197  }
198  /* trivialFileCatalog matches the following xml schema
199  FIXME: write a proper DTD
200  <storage-mapping>
201  <lfn-to-pfn protocol="direct" destination-match=".*"
202  path-match="lfn/guid match regular expression"
203  result="/castor/cern.ch/cms/$1"/>
204  <pfn-to-lfn protocol="srm"
205  path-match="lfn/guid match regular expression"
206  result="$1"/>
207  </storage-mapping>
208  */
209  auto rootElement = doc.RootElement();
210  /*first of all do the lfn-to-pfn bit*/
211  for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
212  el = el->NextSiblingElement("lfn-to-pfn")) {
214  }
215 
216  /*Then we handle the pfn-to-lfn bit*/
217  for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
218  el = el->NextSiblingElement("pfn-to-lfn")) {
220  }
221  }
222 
223  void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
224  unsigned iCatalog,
225  std::string const& storageDescriptionPath) {
226  Service<SiteLocalConfig> localconfservice;
227  edm::CatalogAttributes aCatalog = input_dataCatalog;
228  if (input_dataCatalog.empty()) {
229  if (!localconfservice.isAvailable()) {
230  cms::Exception ex("FileCatalog");
231  ex << "edm::SiteLocalConfigService is not available";
232  ex.addContext("Calling edm::FileLocator::init()");
233  throw ex;
234  }
235  if (iCatalog >= localconfservice->dataCatalogs().size()) {
236  cms::Exception ex("FileCatalog");
237  ex << "Request nonexistence data catalog";
238  ex.addContext("Calling edm::FileLocator::init()");
239  throw ex;
240  }
241  aCatalog = localconfservice->dataCatalogs()[iCatalog];
242  }
243 
244  std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
245 
246  //use path to storage description from input parameter
247  if (!storageDescriptionPath.empty())
248  filename_storage = storageDescriptionPath;
249 
250  //now read json
251  pt::ptree json;
252  try {
253  boost::property_tree::read_json(filename_storage.string(), json);
254  } catch (std::exception& e) {
255  cms::Exception ex("FileCatalog");
256  ex << "Can not open storage.json (" << filename_storage.string()
257  << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
258  ex.addContext("edm::FileLocator:init()");
259  throw ex;
260  }
261  auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
262  //get site name
263  std::string siteName = site.second.get("site", kEmptyString);
264  //get volume name
265  std::string volName = site.second.get("volume", kEmptyString);
266  return aCatalog.storageSite == siteName && aCatalog.volume == volName;
267  });
268 
269  //let enforce that site-local-config.xml and storage.json contains valid catalogs in <data-access>, in which site defined in site-local-config.xml <data-access> should be found in storage.json
270  if (found_site == json.end()) {
271  cms::Exception ex("FileCatalog");
272  ex << "Can not find storage site \"" << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
273  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
274  ex.addContext("edm::FileLocator:init()");
275  throw ex;
276  }
277 
278  const pt::ptree& protocols = found_site->second.find("protocols")->second;
279  auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
280  std::string protName = protocol.second.get("protocol", kEmptyString);
281  return aCatalog.protocol == protName;
282  });
283 
284  //let enforce that site-local-config.xml and storage.json contains valid catalogs, in which protocol defined in site-local-config.xml <data-access> should be found in storage.json
285  if (found_protocol == protocols.end()) {
286  cms::Exception ex("FileCatalog");
287  ex << "Can not find protocol \"" << aCatalog.protocol << "\" for the storage site \"" << aCatalog.storageSite
288  << "\" and volume \"" << aCatalog.volume
289  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
290  ex.addContext("edm::FileLocator:init()");
291  throw ex;
292  }
293 
294  std::string protName = found_protocol->second.get("protocol", kEmptyString);
295  m_protocols.push_back(protName);
296  m_prefix = found_protocol->second.get("prefix", kEmptyString);
297  if (m_prefix == kEmptyString) {
298  //get rules
299  if (found_protocol->second.find("rules") == found_protocol->second.not_found()) {
300  cms::Exception ex("FileCatalog");
301  ex << "protocol must contain either a prefix or rules, "
302  << "neither found for protocol \"" << aCatalog.protocol << "\" for the storage site \""
303  << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
304  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
305  ex.addContext("edm::FileLocator:init()");
306  throw ex;
307  }
308  const pt::ptree& rules = found_protocol->second.find("rules")->second;
309  //loop over rules
310  for (pt::ptree::value_type const& storageRule : rules) {
311  parseRule(storageRule, protName, m_directRules);
312  }
313  }
314  }
315 
317  std::string const& protocol,
318  std::string const& destination,
319  bool direct,
320  std::string name) const {
321  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
322  if (rulesIterator == protocolRules.end()) {
323  return "";
324  }
325 
326  Rules const& rules = (*(rulesIterator)).second;
327 
328  std::smatch destinationMatches;
329  std::smatch nameMatches;
330 
331  /* Look up for a matching rule*/
332  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
333  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
334  continue;
335  }
336 
337  if (!std::regex_match(name, i->pathMatch)) {
338  continue;
339  }
340 
341  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
342 
343  std::string const chain = i->chain;
344  if ((direct == true) && (!chain.empty())) {
345  name = applyRules(protocolRules, chain, destination, direct, name);
346  if (name.empty()) {
347  return "";
348  }
349  }
350 
351  std::regex_match(name, nameMatches, i->pathMatch);
352  name = replaceWithRegexp(nameMatches, i->result);
353 
354  if ((direct == false) && (!chain.empty())) {
355  name = applyRules(protocolRules, chain, destination, direct, name);
356  }
357  return name;
358  }
359  return "";
360  }
361 } // namespace edm
ProtocolRules m_directRules_trivialCatalog
Definition: FileLocator.h:63
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:42
FileLocator(edm::CatalogAttributes const &catAttr, unsigned iCatalog=0, std::string const &storageDescriptionPath=std::string())
Definition: FileLocator.cc:53
nlohmann::json json
std::string m_destination
Definition: FileLocator.h:72
std::string pfn(std::string const &ilfn, edm::CatalogType catType) const
Definition: FileLocator.cc:62
static std::string const input
Definition: EdmProvDump.cc:50
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:65
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::string m_filename
Definition: FileLocator.h:70
std::string m_prefix
Definition: FileLocator.h:73
void init_trivialCatalog(std::string const &catUrl, unsigned iCatalog)
Definition: FileLocator.cc:129
void init(edm::CatalogAttributes const &input_dataCatalog, unsigned iCatalog, std::string const &storageDescriptionPath)
Definition: FileLocator.cc:223
void addContext(std::string const &context)
Definition: Exception.cc:169
ProtocolRules m_directRules
Definition: FileLocator.h:67
static const char kEmptyString[1]
std::vector< Rule > Rules
Definition: FileLocator.h:41
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:71
void parseRuleTrivialCatalog(tinyxml2::XMLElement *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:90
void parseRule(boost::property_tree::ptree::value_type const &storageRule, std::string const &protocol, ProtocolRules &rules)
Definition: FileLocator.cc:113
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:68
bool isAvailable() const
Definition: Service.h:40
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:316
#define str(s)
def move(src, dest)
Definition: eostools.py:511