CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
4 
5 #include <xercesc/parsers/XercesDOMParser.hpp>
8 
9 #include <boost/algorithm/string.hpp>
10 #include <boost/algorithm/string/replace.hpp>
11 
12 #include <cstdlib>
13 #include <stdexcept>
14 #include <fstream>
15 #include <iostream>
16 #include <sstream>
17 
18 using namespace xercesc;
19 using namespace cms::xerces;
20 
21 namespace {
22 
23  std::string replaceWithRegexp(std::smatch const& matches, std::string const& outputFormat) {
24  std::string result = outputFormat;
25  std::stringstream str;
26 
27  // std::cerr << "Output format: "<< outputFormat << std::endl;
28  for (size_t i = 1; i < matches.size(); ++i) {
29  str.str("");
30  str << "$" << i;
31  // std::cerr << "Current match: " << matches[i] << std::endl;
32  std::string const matchedString(matches[i].first, matches[i].second);
33  if (!matchedString.empty())
34  boost::algorithm::replace_all(result, str.str(), matchedString);
35  }
36  // std::cerr << "Final string: " << result << std::endl;
37  return result;
38  }
39 } // namespace
40 
41 namespace edm {
42 
43  int FileLocator::s_numberOfInstances = 0;
44 
45  FileLocator::FileLocator(std::string const& catUrl, bool fallback) : m_destination("any") {
46  try {
47  // << "Xerces-c initialization Number "
48  // << s_numberOfInstances <<
49  if (s_numberOfInstances == 0) {
51  }
52  } catch (XMLException const& e) {
53  // << "Xerces-c error in initialization \n"
54  // << "Exception message is: \n"
55  // << toString(e.getMessage()) <<
56  throw cms::Exception("TrivialFileCatalog",
57  std::string("Fatal Error on edm::FileLocator:") + toString(e.getMessage()));
58  }
60 
61  init(catUrl, fallback);
62 
63  // std::cout << m_protocols.size() << " protocols" << std::endl;
64  // std::cout << m_directRules[m_protocols[0]].size() << " rules" << std::endl;
65  }
66 
68 
69  std::string FileLocator::pfn(std::string const& ilfn) const { return convert(ilfn, m_directRules, true); }
70 
71  std::string FileLocator::lfn(std::string const& ipfn) const { return convert(ipfn, m_inverseRules, false); }
72 
74  std::string out = "";
75 
76  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
77  out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
78  if (!out.empty())
79  return out;
80  }
81  return out;
82  }
83 
84  void FileLocator::parseRule(DOMNode* ruleNode, ProtocolRules& rules) {
85  if (!ruleNode) {
86  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
87  }
88 
89  // ruleNode is actually always a DOMElement because it's the result of
90  // a `getElementsByTagName()` in the calling method.
91  DOMElement* ruleElement = static_cast<DOMElement*>(ruleNode);
92 
93  std::string const protocol = toString(ruleElement->getAttribute(uStr("protocol").ptr()));
94  std::string destinationMatchRegexp = toString(ruleElement->getAttribute(uStr("destination-match").ptr()));
95 
96  if (destinationMatchRegexp.empty()) {
97  destinationMatchRegexp = ".*";
98  }
99 
100  std::string const pathMatchRegexp = toString(ruleElement->getAttribute(uStr("path-match").ptr()));
101  std::string const result = toString(ruleElement->getAttribute(uStr("result").ptr()));
102  std::string const chain = toString(ruleElement->getAttribute(uStr("chain").ptr()));
103 
104  Rule rule;
105  rule.pathMatch.assign(pathMatchRegexp);
106  rule.destinationMatch.assign(destinationMatchRegexp);
107  rule.result = result;
108  rule.chain = chain;
109  rules[protocol].push_back(rule);
110  }
111 
112  void FileLocator::init(std::string const& catUrl, bool fallback) {
113  std::string m_url = catUrl;
114 
115  if (m_url.empty()) {
116  Service<SiteLocalConfig> localconfservice;
117  if (!localconfservice.isAvailable())
118  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
119 
120  m_url = (fallback ? localconfservice->fallbackDataCatalog() : localconfservice->dataCatalog());
121  }
122 
123  // std::cout << "Connecting to the catalog " << m_url << std::endl;
124 
125  if (m_url.find("file:") == std::string::npos) {
126  throw cms::Exception("TrivialFileCatalog",
127  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
128  }
129 
130  m_url = m_url.erase(0, m_url.find(":") + 1);
131 
132  std::vector<std::string> tokens;
133  boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
134  m_filename = tokens[0];
135 
136  if (tokens.size() == 2) {
137  std::string const options = tokens[1];
138  std::vector<std::string> optionTokens;
139  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
140 
141  std::string const equalSign("=");
142  std::string const comma(",");
143 
144  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
145  std::string const option = optionTokens[oi];
146  std::vector<std::string> argTokens;
147  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
148 
149  if (argTokens.size() != 2) {
150  throw cms::Exception("TrivialFileCatalog",
151  "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
152  }
153 
154  if (argTokens[0] == "protocol") {
155  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
156  } else if (argTokens[0] == "destination") {
157  m_destination = argTokens[1];
158  }
159  }
160  }
161 
162  if (m_protocols.empty()) {
163  throw cms::Exception("TrivialFileCatalog",
164  "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
165  }
166 
167  std::ifstream configFile;
168  configFile.open(m_filename.c_str());
169 
170  //
171  // std::cout << "Using catalog configuration " << m_filename << std::endl;
172 
173  if (!configFile.good() || !configFile.is_open()) {
174  throw cms::Exception("TrivialFileCatalog",
175  "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
176  }
177 
178  configFile.close();
179 
180  auto parser = std::make_unique<XercesDOMParser>();
181  try {
182  parser->setValidationScheme(XercesDOMParser::Val_Auto);
183  parser->setDoNamespaces(false);
184  parser->parse(m_filename.c_str());
185  DOMDocument* doc = parser->getDocument();
186  assert(doc);
187 
188  /* trivialFileCatalog matches the following xml schema
189  FIXME: write a proper DTD
190  <storage-mapping>
191  <lfn-to-pfn protocol="direct" destination-match=".*"
192  path-match="lfn/guid match regular expression"
193  result="/castor/cern.ch/cms/$1"/>
194  <pfn-to-lfn protocol="srm"
195  path-match="lfn/guid match regular expression"
196  result="$1"/>
197  </storage-mapping>
198  */
199 
200  /*first of all do the lfn-to-pfn bit*/
201  {
202  DOMNodeList* rules = doc->getElementsByTagName(uStr("lfn-to-pfn").ptr());
203  XMLSize_t const ruleTagsNum = rules->getLength();
204 
205  // FIXME: we should probably use a DTD for checking validity
206 
207  for (XMLSize_t i = 0; i < ruleTagsNum; ++i) {
208  DOMNode* ruleNode = rules->item(i);
209  parseRule(ruleNode, m_directRules);
210  }
211  }
212  /*Then we handle the pfn-to-lfn bit*/
213  {
214  DOMNodeList* rules = doc->getElementsByTagName(uStr("pfn-to-lfn").ptr());
215  XMLSize_t ruleTagsNum = rules->getLength();
216 
217  for (XMLSize_t i = 0; i < ruleTagsNum; ++i) {
218  DOMNode* ruleNode = rules->item(i);
219  parseRule(ruleNode, m_inverseRules);
220  }
221  }
222  } catch (xercesc::DOMException const& e) {
223  throw cms::Exception("TrivialFileCatalog")
224  << "Xerces XML parser threw this exception: " << cStr(e.getMessage()).ptr() << std::endl;
225  }
226  }
227 
229  std::string const& protocol,
230  std::string const& destination,
231  bool direct,
232  std::string name) const {
233  // std::cerr << "Calling apply rules with protocol: " << protocol << "\n destination: " << destination << "\n " << " on name " << name << std::endl;
234 
235  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
236  if (rulesIterator == protocolRules.end()) {
237  return "";
238  }
239 
240  Rules const& rules = (*(rulesIterator)).second;
241 
242  std::smatch destinationMatches;
243  std::smatch nameMatches;
244 
245  /* Look up for a matching rule*/
246  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
247  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
248  continue;
249  }
250 
251  if (!std::regex_match(name, i->pathMatch)) {
252  continue;
253  }
254 
255  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
256 
257  std::string const chain = i->chain;
258  if ((direct == true) && (!chain.empty())) {
259  name = applyRules(protocolRules, chain, destination, direct, name);
260  if (name.empty()) {
261  return "";
262  }
263  }
264 
265  std::regex_match(name, nameMatches, i->pathMatch);
266  name = replaceWithRegexp(nameMatches, i->result);
267 
268  if ((direct == false) && (!chain.empty())) {
269  name = applyRules(protocolRules, chain, destination, direct, name);
270  }
271  return name;
272  }
273  return "";
274  }
275 } // namespace edm
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:35
Definition: chain.py:1
static int s_numberOfInstances
Definition: FileLocator.h:25
std::string lfn(std::string const &ipfn) const
Definition: FileLocator.cc:71
std::string m_destination
Definition: FileLocator.h:57
void xercesInitialize()
Definition: Xerces.cc:18
static std::string const input
Definition: EdmProvDump.cc:48
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:52
ZStr< char > cStr(XMLCh const *str)
bool isAvailable() const
Definition: Service.h:40
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:73
ZStr< XMLCh > uStr(char const *str)
std::string pfn(std::string const &ilfn) const
Definition: FileLocator.cc:69
std::string m_filename
Definition: FileLocator.h:55
std::string toString(const std::pair< T, T > &aT)
Definition: CaloEllipse.h:72
ProtocolRules m_directRules
Definition: FileLocator.h:50
std::vector< Rule > Rules
Definition: FileLocator.h:34
void parseRule(xercesc::DOMNode *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:84
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:56
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:228
#define str(s)
double split
Definition: MVATrainer.cc:139
void init(std::string const &catUrl, bool fallback)
Definition: FileLocator.cc:112