CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
3 
4 #include <boost/algorithm/string.hpp>
5 #include <boost/algorithm/string/replace.hpp>
6 #include <boost/property_tree/json_parser.hpp>
7 
8 #include <filesystem>
9 #include <cstdlib>
10 #include <stdexcept>
11 #include <fstream>
12 #include <sstream>
13 
14 namespace pt = boost::property_tree;
15 
16 namespace {
17 
18  std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
19  std::string result = outputFormat;
20  std::stringstream str;
21 
22  for (size_t i = 1; i < matches.size(); ++i) {
23  str.str("");
24  str << "$" << i;
25  std::string const matchedString(matches[i].first, matches[i].second);
26  if (!matchedString.empty())
27  boost::algorithm::replace_all(result, str.str(), matchedString);
28  }
29  return result;
30  }
31 
32  constexpr char const* const kEmptyString = "";
33  constexpr char const* const kLFNPrefix = "/store/";
34 
35  const char* safe(const char* iCheck) {
36  if (iCheck == nullptr) {
37  return kEmptyString;
38  }
39  return iCheck;
40  }
41 
42 } // namespace
43 
44 namespace pt = boost::property_tree;
45 
46 namespace edm {
47 
48  FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
49  init_trivialCatalog(catUrl, iCatalog);
50  }
51 
53  unsigned iCatalog,
54  std::string const& storageDescriptionPath)
55  : m_destination("any") {
56  init(catAttr, iCatalog, storageDescriptionPath);
57  }
58 
60 
62  if (catType == edm::CatalogType::TrivialCatalog)
63  return convert(ilfn, m_directRules_trivialCatalog, true);
64  return convert(ilfn, m_directRules, true);
65  }
66 
68  std::string out = "";
69  //check if input is an authentic LFN
70  if (input.compare(0, 7, kLFNPrefix) != 0)
71  return out;
72  //use prefix in the protocol
73  if (!m_prefix.empty()) {
74  out = m_prefix + "/" + input;
75  if (input[0] == '/')
76  out = m_prefix + input;
77  return out;
78  }
79  //no prefix in the protocol, use rule
80  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
82  if (!out.empty()) {
83  return out;
84  }
85  }
86  return out;
87  }
88 
89  void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
90  if (!ruleElement) {
91  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
92  }
93 
94  auto const protocol = safe(ruleElement->Attribute("protocol"));
95  auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
96  if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
97  destinationMatchRegexp = ".*";
98  }
99 
100  auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
101  auto const result = safe(ruleElement->Attribute("result"));
102  auto const chain = safe(ruleElement->Attribute("chain"));
103 
104  Rule rule;
105  rule.pathMatch.assign(pathMatchRegexp);
106  rule.destinationMatch.assign(destinationMatchRegexp);
107  rule.result = result;
108  rule.chain = chain;
109  rules[protocol].emplace_back(std::move(rule));
110  }
111 
113  std::string const& protocol,
114  ProtocolRules& rules) {
115  if (storageRule.second.empty()) {
116  throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
117  }
118  auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
119  auto const result = storageRule.second.get<std::string>("pfn");
120  Rule rule;
121  rule.pathMatch.assign(pathMatchRegexp);
122  rule.destinationMatch.assign(".*");
123  rule.result = result;
124  rule.chain = "";
125  rules[protocol].emplace_back(std::move(rule));
126  }
127 
128  void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
129  std::string url = catUrl;
130  if (url.empty()) {
131  Service<SiteLocalConfig> localconfservice;
132  if (!localconfservice.isAvailable())
133  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
134  if (iCatalog >= localconfservice->trivialDataCatalogs().size())
135  throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
136  url = localconfservice->trivialDataCatalogs()[iCatalog];
137  }
138 
139  if (url.find("file:") == std::string::npos) {
140  throw cms::Exception("TrivialFileCatalog",
141  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
142  }
143 
144  url = url.erase(0, url.find(':') + 1);
145 
146  std::vector<std::string> tokens;
147  boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
148  m_filename = tokens[0];
149 
150  if (tokens.size() == 2) {
151  std::string const options = tokens[1];
152  std::vector<std::string> optionTokens;
153  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
154 
155  std::string const equalSign("=");
156  std::string const comma(",");
157 
158  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
159  std::string const option = optionTokens[oi];
160  std::vector<std::string> argTokens;
161  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
162 
163  if (argTokens.size() != 2) {
164  throw cms::Exception("TrivialFileCatalog",
165  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
166  }
167 
168  if (argTokens[0] == "protocol") {
169  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
170  } else if (argTokens[0] == "destination") {
171  m_destination = argTokens[1];
172  }
173  }
174  }
175 
176  if (m_protocols.empty()) {
177  throw cms::Exception("TrivialFileCatalog",
178  "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
179  }
180 
181  std::ifstream configFile;
182  configFile.open(m_filename.c_str());
183 
184  if (!configFile.good() || !configFile.is_open()) {
185  throw cms::Exception("TrivialFileCatalog",
186  "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
187  }
188 
189  configFile.close();
190 
191  tinyxml2::XMLDocument doc;
192  auto loadErr = doc.LoadFile(m_filename.c_str());
193  if (loadErr != tinyxml2::XML_SUCCESS) {
194  throw cms::Exception("TrivialFileCatalog")
195  << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
196  }
197  /* trivialFileCatalog matches the following xml schema
198  FIXME: write a proper DTD
199  <storage-mapping>
200  <lfn-to-pfn protocol="direct" destination-match=".*"
201  path-match="lfn/guid match regular expression"
202  result="/castor/cern.ch/cms/$1"/>
203  <pfn-to-lfn protocol="srm"
204  path-match="lfn/guid match regular expression"
205  result="$1"/>
206  </storage-mapping>
207  */
208  auto rootElement = doc.RootElement();
209  /*first of all do the lfn-to-pfn bit*/
210  for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
211  el = el->NextSiblingElement("lfn-to-pfn")) {
213  }
214 
215  /*Then we handle the pfn-to-lfn bit*/
216  for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
217  el = el->NextSiblingElement("pfn-to-lfn")) {
219  }
220  }
221 
222  void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
223  unsigned iCatalog,
224  std::string const& storageDescriptionPath) {
225  Service<SiteLocalConfig> localconfservice;
226  edm::CatalogAttributes aCatalog = input_dataCatalog;
227  if (input_dataCatalog.empty()) {
228  if (!localconfservice.isAvailable()) {
229  cms::Exception ex("FileCatalog");
230  ex << "edm::SiteLocalConfigService is not available";
231  ex.addContext("Calling edm::FileLocator::init()");
232  throw ex;
233  }
234  if (iCatalog >= localconfservice->dataCatalogs().size()) {
235  cms::Exception ex("FileCatalog");
236  ex << "Request nonexistence data catalog";
237  ex.addContext("Calling edm::FileLocator::init()");
238  throw ex;
239  }
240  aCatalog = localconfservice->dataCatalogs()[iCatalog];
241  }
242 
243  std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
244 
245  //use path to storage description from input parameter
246  if (!storageDescriptionPath.empty())
247  filename_storage = storageDescriptionPath;
248 
249  //now read json
250  pt::ptree json;
251  try {
252  boost::property_tree::read_json(filename_storage.string(), json);
253  } catch (std::exception& e) {
254  cms::Exception ex("FileCatalog");
255  ex << "Can not open storage.json (" << filename_storage.string()
256  << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
257  ex.addContext("edm::FileLocator:init()");
258  throw ex;
259  }
260  auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
261  //get site name
262  std::string siteName = site.second.get("site", kEmptyString);
263  //get volume name
264  std::string volName = site.second.get("volume", kEmptyString);
265  return aCatalog.storageSite == siteName && aCatalog.volume == volName;
266  });
267 
268  //let enforce that site-local-config.xml and storage.json contains valid catalogs in <data-access>, in which site defined in site-local-config.xml <data-access> should be found in storage.json
269  if (found_site == json.end()) {
270  cms::Exception ex("FileCatalog");
271  ex << "Can not find storage site \"" << aCatalog.storageSite << "\" and volume \"" << aCatalog.volume
272  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
273  ex.addContext("edm::FileLocator:init()");
274  throw ex;
275  }
276 
277  const pt::ptree& protocols = found_site->second.find("protocols")->second;
278  auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
279  std::string protName = protocol.second.get("protocol", kEmptyString);
280  return aCatalog.protocol == protName;
281  });
282 
283  //let enforce that site-local-config.xml and storage.json contains valid catalogs, in which protocol defined in site-local-config.xml <data-access> should be found in storage.json
284  if (found_protocol == protocols.end()) {
285  cms::Exception ex("FileCatalog");
286  ex << "Can not find protocol \"" << aCatalog.protocol << "\" for the storage site \"" << aCatalog.storageSite
287  << "\" and volume \"" << aCatalog.volume
288  << "\" in storage.json. Check site-local-config.xml <data-access> and storage.json";
289  ex.addContext("edm::FileLocator:init()");
290  throw ex;
291  }
292 
293  std::string protName = found_protocol->second.get("protocol", kEmptyString);
294  m_protocols.push_back(protName);
295  m_prefix = found_protocol->second.get("prefix", kEmptyString);
296  if (m_prefix == kEmptyString) {
297  //get rules
298  const pt::ptree& rules = found_protocol->second.find("rules")->second;
299  //loop over rules
300  for (pt::ptree::value_type const& storageRule : rules) {
301  parseRule(storageRule, protName, m_directRules);
302  }
303  }
304  }
305 
307  std::string const& protocol,
308  std::string const& destination,
309  bool direct,
310  std::string name) const {
311  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
312  if (rulesIterator == protocolRules.end()) {
313  return "";
314  }
315 
316  Rules const& rules = (*(rulesIterator)).second;
317 
318  std::smatch destinationMatches;
319  std::smatch nameMatches;
320 
321  /* Look up for a matching rule*/
322  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
323  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
324  continue;
325  }
326 
327  if (!std::regex_match(name, i->pathMatch)) {
328  continue;
329  }
330 
331  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
332 
333  std::string const chain = i->chain;
334  if ((direct == true) && (!chain.empty())) {
335  name = applyRules(protocolRules, chain, destination, direct, name);
336  if (name.empty()) {
337  return "";
338  }
339  }
340 
341  std::regex_match(name, nameMatches, i->pathMatch);
342  name = replaceWithRegexp(nameMatches, i->result);
343 
344  if ((direct == false) && (!chain.empty())) {
345  name = applyRules(protocolRules, chain, destination, direct, name);
346  }
347  return name;
348  }
349  return "";
350  }
351 } // namespace edm
ProtocolRules m_directRules_trivialCatalog
Definition: FileLocator.h:63
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:42
FileLocator(edm::CatalogAttributes const &catAttr, unsigned iCatalog=0, std::string const &storageDescriptionPath=std::string())
Definition: FileLocator.cc:52
nlohmann::json json
std::string m_destination
Definition: FileLocator.h:72
std::string pfn(std::string const &ilfn, edm::CatalogType catType) const
Definition: FileLocator.cc:61
static std::string const input
Definition: EdmProvDump.cc:50
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:65
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::string m_filename
Definition: FileLocator.h:70
std::string m_prefix
Definition: FileLocator.h:73
void init_trivialCatalog(std::string const &catUrl, unsigned iCatalog)
Definition: FileLocator.cc:128
void init(edm::CatalogAttributes const &input_dataCatalog, unsigned iCatalog, std::string const &storageDescriptionPath)
Definition: FileLocator.cc:222
void addContext(std::string const &context)
Definition: Exception.cc:165
ProtocolRules m_directRules
Definition: FileLocator.h:67
static const char kEmptyString[1]
std::vector< Rule > Rules
Definition: FileLocator.h:41
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:71
void parseRuleTrivialCatalog(tinyxml2::XMLElement *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:89
void parseRule(boost::property_tree::ptree::value_type const &storageRule, std::string const &protocol, ProtocolRules &rules)
Definition: FileLocator.cc:112
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:67
bool isAvailable() const
Definition: Service.h:40
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:306
#define str(s)
def move(src, dest)
Definition: eostools.py:511