CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
3 
4 #include <boost/algorithm/string.hpp>
5 #include <boost/algorithm/string/replace.hpp>
6 #include <boost/property_tree/json_parser.hpp>
7 
8 #include <filesystem>
9 #include <cstdlib>
10 #include <stdexcept>
11 #include <fstream>
12 #include <sstream>
13 
14 namespace pt = boost::property_tree;
15 
16 namespace {
17 
18  std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
19  std::string result = outputFormat;
20  std::stringstream str;
21 
22  for (size_t i = 1; i < matches.size(); ++i) {
23  str.str("");
24  str << "$" << i;
25  std::string const matchedString(matches[i].first, matches[i].second);
26  if (!matchedString.empty())
27  boost::algorithm::replace_all(result, str.str(), matchedString);
28  }
29  return result;
30  }
31 
32  constexpr char const* const kEmptyString = "";
33  constexpr char const* const kLFNPrefix = "/store/";
34 
35  const char* safe(const char* iCheck) {
36  if (iCheck == nullptr) {
37  return kEmptyString;
38  }
39  return iCheck;
40  }
41 
42 } // namespace
43 
44 namespace pt = boost::property_tree;
45 
46 namespace edm {
47 
48  FileLocator::FileLocator(std::string const& catUrl, unsigned iCatalog) : m_destination("any") {
49  init_trivialCatalog(catUrl, iCatalog);
50  }
51 
53  unsigned iCatalog,
54  std::string const& storageDescriptionPath)
55  : m_destination("any") {
56  init(catAttr, iCatalog, storageDescriptionPath);
57  }
58 
60 
62  if (catType == edm::CatalogType::TrivialCatalog)
63  return convert(ilfn, m_directRules_trivialCatalog, true);
64  return convert(ilfn, m_directRules, true);
65  }
66 
68  std::string out = "";
69  //check if input is an authentic LFN
70  if (input.compare(0, 7, kLFNPrefix) != 0)
71  return out;
72  //use prefix in the protocol
73  if (!m_prefix.empty()) {
74  out = m_prefix + "/" + input;
75  if (input[0] == '/')
76  out = m_prefix + input;
77  return out;
78  }
79  //no prefix in the protocol, use rule
80  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
82  if (!out.empty()) {
83  return out;
84  }
85  }
86  return out;
87  }
88 
89  void FileLocator::parseRuleTrivialCatalog(tinyxml2::XMLElement* ruleElement, ProtocolRules& rules) {
90  if (!ruleElement) {
91  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
92  }
93 
94  auto const protocol = safe(ruleElement->Attribute("protocol"));
95  auto destinationMatchRegexp = ruleElement->Attribute("destination-match");
96  if (destinationMatchRegexp == nullptr or destinationMatchRegexp[0] == 0) {
97  destinationMatchRegexp = ".*";
98  }
99 
100  auto const pathMatchRegexp = safe(ruleElement->Attribute("path-match"));
101  auto const result = safe(ruleElement->Attribute("result"));
102  auto const chain = safe(ruleElement->Attribute("chain"));
103 
104  Rule rule;
105  rule.pathMatch.assign(pathMatchRegexp);
106  rule.destinationMatch.assign(destinationMatchRegexp);
107  rule.result = result;
108  rule.chain = chain;
109  rules[protocol].emplace_back(std::move(rule));
110  }
111 
113  std::string const& protocol,
114  ProtocolRules& rules) {
115  if (storageRule.second.empty()) {
116  throw cms::Exception("RucioFileCatalog", "edm::FileLocator::parseRule Malformed storage rule");
117  }
118  auto const pathMatchRegexp = storageRule.second.get<std::string>("lfn");
119  auto const result = storageRule.second.get<std::string>("pfn");
120  Rule rule;
121  rule.pathMatch.assign(pathMatchRegexp);
122  rule.destinationMatch.assign(".*");
123  rule.result = result;
124  rule.chain = "";
125  rules[protocol].emplace_back(std::move(rule));
126  }
127 
128  void FileLocator::init_trivialCatalog(std::string const& catUrl, unsigned iCatalog) {
129  std::string url = catUrl;
130  if (url.empty()) {
131  Service<SiteLocalConfig> localconfservice;
132  if (!localconfservice.isAvailable())
133  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
134  if (iCatalog >= localconfservice->trivialDataCatalogs().size())
135  throw cms::Exception("TrivialFileCatalog", "edm::FileLocator: Request nonexistence data catalog");
136  url = localconfservice->trivialDataCatalogs()[iCatalog];
137  }
138 
139  if (url.find("file:") == std::string::npos) {
140  throw cms::Exception("TrivialFileCatalog",
141  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
142  }
143 
144  url = url.erase(0, url.find(':') + 1);
145 
146  std::vector<std::string> tokens;
147  boost::algorithm::split(tokens, url, boost::is_any_of(std::string("?")));
148  m_filename = tokens[0];
149 
150  if (tokens.size() == 2) {
151  std::string const options = tokens[1];
152  std::vector<std::string> optionTokens;
153  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
154 
155  std::string const equalSign("=");
156  std::string const comma(",");
157 
158  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
159  std::string const option = optionTokens[oi];
160  std::vector<std::string> argTokens;
161  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
162 
163  if (argTokens.size() != 2) {
164  throw cms::Exception("TrivialFileCatalog",
165  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
166  }
167 
168  if (argTokens[0] == "protocol") {
169  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
170  } else if (argTokens[0] == "destination") {
171  m_destination = argTokens[1];
172  }
173  }
174  }
175 
176  if (m_protocols.empty()) {
177  throw cms::Exception("TrivialFileCatalog",
178  "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
179  }
180 
181  std::ifstream configFile;
182  configFile.open(m_filename.c_str());
183 
184  if (!configFile.good() || !configFile.is_open()) {
185  throw cms::Exception("TrivialFileCatalog",
186  "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
187  }
188 
189  configFile.close();
190 
191  tinyxml2::XMLDocument doc;
192  auto loadErr = doc.LoadFile(m_filename.c_str());
193  if (loadErr != tinyxml2::XML_SUCCESS) {
194  throw cms::Exception("TrivialFileCatalog")
195  << "tinyxml file load failed with error : " << doc.ErrorStr() << std::endl;
196  }
197  /* trivialFileCatalog matches the following xml schema
198  FIXME: write a proper DTD
199  <storage-mapping>
200  <lfn-to-pfn protocol="direct" destination-match=".*"
201  path-match="lfn/guid match regular expression"
202  result="/castor/cern.ch/cms/$1"/>
203  <pfn-to-lfn protocol="srm"
204  path-match="lfn/guid match regular expression"
205  result="$1"/>
206  </storage-mapping>
207  */
208  auto rootElement = doc.RootElement();
209  /*first of all do the lfn-to-pfn bit*/
210  for (auto el = rootElement->FirstChildElement("lfn-to-pfn"); el != nullptr;
211  el = el->NextSiblingElement("lfn-to-pfn")) {
213  }
214 
215  /*Then we handle the pfn-to-lfn bit*/
216  for (auto el = rootElement->FirstChildElement("pfn-to-lfn"); el != nullptr;
217  el = el->NextSiblingElement("pfn-to-lfn")) {
219  }
220  }
221 
222  void FileLocator::init(edm::CatalogAttributes const& input_dataCatalog,
223  unsigned iCatalog,
224  std::string const& storageDescriptionPath) {
225  Service<SiteLocalConfig> localconfservice;
226  edm::CatalogAttributes aCatalog = input_dataCatalog;
227  if (input_dataCatalog.empty()) {
228  if (!localconfservice.isAvailable()) {
229  cms::Exception ex("FileCatalog");
230  ex << "edm::SiteLocalConfigService is not available";
231  ex.addContext("Calling edm::FileLocator::init()");
232  throw ex;
233  }
234  if (iCatalog >= localconfservice->dataCatalogs().size()) {
235  cms::Exception ex("FileCatalog");
236  ex << "Request nonexistence data catalog";
237  ex.addContext("Calling edm::FileLocator::init()");
238  throw ex;
239  }
240  aCatalog = localconfservice->dataCatalogs()[iCatalog];
241  }
242 
243  std::filesystem::path filename_storage = localconfservice->storageDescriptionPath(aCatalog);
244 
245  //use path to storage description from input parameter
246  if (!storageDescriptionPath.empty())
247  filename_storage = storageDescriptionPath;
248 
249  //now read json
250  pt::ptree json;
251  try {
252  boost::property_tree::read_json(filename_storage.string(), json);
253  } catch (std::exception& e) {
254  cms::Exception ex("FileCatalog");
255  ex << "Can not open storage.json (" << filename_storage.string()
256  << "). Check SITECONFIG_PATH and site-local-config.xml <data-access>";
257  ex.addContext("edm::FileLocator:init()");
258  throw ex;
259  }
260  auto found_site = std::find_if(json.begin(), json.end(), [&](pt::ptree::value_type const& site) {
261  //get site name
262  std::string siteName = site.second.get("site", kEmptyString);
263  //get volume name
264  std::string volName = site.second.get("volume", kEmptyString);
265  return aCatalog.storageSite == siteName && aCatalog.volume == volName;
266  });
267 
268  //let enforce that site-local-config.xml and storage.json contains valid catalogs in <data-access>, in which site defined in site-local-config.xml <data-access> should be found in storage.json
269  if (found_site == json.end()) {
270  cms::Exception ex("FileCatalog");
271  ex << "Can not find site and volume " << aCatalog.site << ", " << aCatalog.volume
272  << " in storage.json. Check site-local-config.xml <data-access> and storage.json";
273  ex.addContext("edm::FileLocator:init()");
274  throw ex;
275  }
276 
277  const pt::ptree& protocols = found_site->second.find("protocols")->second;
278  auto found_protocol = std::find_if(protocols.begin(), protocols.end(), [&](pt::ptree::value_type const& protocol) {
279  std::string protName = protocol.second.get("protocol", kEmptyString);
280  return aCatalog.protocol == protName;
281  });
282 
283  //let enforce that site-local-config.xml and storage.json contains valid catalogs, in which protocol defined in site-local-config.xml <data-access> should be found in storage.json
284  if (found_protocol == protocols.end()) {
285  cms::Exception ex("FileCatalog");
286  ex << "Can not find protocol " << aCatalog.protocol
287  << " in storage.json. Check site-local-config.xml <data-access> and storage.json";
288  ex.addContext("edm::FileLocator:init()");
289  throw ex;
290  }
291 
292  std::string protName = found_protocol->second.get("protocol", kEmptyString);
293  m_protocols.push_back(protName);
294  m_prefix = found_protocol->second.get("prefix", kEmptyString);
295  if (m_prefix == kEmptyString) {
296  //get rules
297  const pt::ptree& rules = found_protocol->second.find("rules")->second;
298  //loop over rules
299  for (pt::ptree::value_type const& storageRule : rules) {
300  parseRule(storageRule, protName, m_directRules);
301  }
302  }
303  }
304 
306  std::string const& protocol,
307  std::string const& destination,
308  bool direct,
309  std::string name) const {
310  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
311  if (rulesIterator == protocolRules.end()) {
312  return "";
313  }
314 
315  Rules const& rules = (*(rulesIterator)).second;
316 
317  std::smatch destinationMatches;
318  std::smatch nameMatches;
319 
320  /* Look up for a matching rule*/
321  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
322  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
323  continue;
324  }
325 
326  if (!std::regex_match(name, i->pathMatch)) {
327  continue;
328  }
329 
330  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
331 
332  std::string const chain = i->chain;
333  if ((direct == true) && (!chain.empty())) {
334  name = applyRules(protocolRules, chain, destination, direct, name);
335  if (name.empty()) {
336  return "";
337  }
338  }
339 
340  std::regex_match(name, nameMatches, i->pathMatch);
341  name = replaceWithRegexp(nameMatches, i->result);
342 
343  if ((direct == false) && (!chain.empty())) {
344  name = applyRules(protocolRules, chain, destination, direct, name);
345  }
346  return name;
347  }
348  return "";
349  }
350 } // namespace edm
ProtocolRules m_directRules_trivialCatalog
Definition: FileLocator.h:63
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:42
FileLocator(edm::CatalogAttributes const &catAttr, unsigned iCatalog=0, std::string const &storageDescriptionPath=std::string())
Definition: FileLocator.cc:52
nlohmann::json json
std::string m_destination
Definition: FileLocator.h:72
std::string pfn(std::string const &ilfn, edm::CatalogType catType) const
Definition: FileLocator.cc:61
static std::string const input
Definition: EdmProvDump.cc:50
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:65
The Signals That Services Can Subscribe To This is based on ActivityRegistry and is current per Services can connect to the signals distributed by the ActivityRegistry in order to monitor the activity of the application Each possible callback has some defined which we here list in angle e< void, edm::EventID const &, edm::Timestamp const & > We also list in braces which AR_WATCH_USING_METHOD_ is used for those or
Definition: Activities.doc:12
std::string m_filename
Definition: FileLocator.h:70
std::string m_prefix
Definition: FileLocator.h:73
void init_trivialCatalog(std::string const &catUrl, unsigned iCatalog)
Definition: FileLocator.cc:128
void init(edm::CatalogAttributes const &input_dataCatalog, unsigned iCatalog, std::string const &storageDescriptionPath)
Definition: FileLocator.cc:222
void addContext(std::string const &context)
Definition: Exception.cc:165
ProtocolRules m_directRules
Definition: FileLocator.h:67
static const char kEmptyString[1]
std::vector< Rule > Rules
Definition: FileLocator.h:41
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:71
void parseRuleTrivialCatalog(tinyxml2::XMLElement *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:89
void parseRule(boost::property_tree::ptree::value_type const &storageRule, std::string const &protocol, ProtocolRules &rules)
Definition: FileLocator.cc:112
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:67
bool isAvailable() const
Definition: Service.h:40
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:305
#define str(s)
def move(src, dest)
Definition: eostools.py:511