CMS 3D CMS Logo

FileLocator.cc
Go to the documentation of this file.
4 
5 #include <xercesc/parsers/XercesDOMParser.hpp>
8 
9 #include <boost/algorithm/string.hpp>
10 #include <boost/algorithm/string/replace.hpp>
11 
12 #include <cstdlib>
13 #include <stdexcept>
14 #include <fstream>
15 #include <iostream>
16 #include <sstream>
17 
18 using namespace xercesc;
19 using namespace cms::xerces;
20 
21 namespace {
22 
24  replaceWithRegexp(std::smatch const& matches,
25  std::string const& outputFormat) {
26  std::string result = outputFormat;
27  std::stringstream str;
28 
29  // std::cerr << "Output format: "<< outputFormat << std::endl;
30  for (size_t i = 1; i < matches.size(); ++i) {
31  str.str("");
32  str << "$" << i;
33  // std::cerr << "Current match: " << matches[i] << std::endl;
34  std::string const matchedString(matches[i].first, matches[i].second);
35  if (!matchedString.empty())
36  boost::algorithm::replace_all(result, str.str(), matchedString);
37  }
38  // std::cerr << "Final string: " << result << std::endl;
39  return result;
40  }
41 }
42 
43 namespace edm {
44 
45  int FileLocator::s_numberOfInstances = 0;
46 
47  FileLocator::FileLocator(std::string const& catUrl, bool fallback)
48  : m_destination("any") {
49  try {
50  // << "Xerces-c initialization Number "
51  // << s_numberOfInstances <<
52  if (s_numberOfInstances == 0) {
54  }
55  }
56  catch (XMLException const& e) {
57  // << "Xerces-c error in initialization \n"
58  // << "Exception message is: \n"
59  // << toString(e.getMessage()) <<
60  throw
61  cms::Exception("TrivialFileCatalog", std::string("Fatal Error on edm::FileLocator:")+ toString(e.getMessage()));
62  }
64 
65  init(catUrl, fallback);
66 
67  // std::cout << m_protocols.size() << " protocols" << std::endl;
68  // std::cout << m_directRules[m_protocols[0]].size() << " rules" << std::endl;
69  }
70 
72  {}
73 
75  FileLocator::pfn(std::string const& ilfn) const {
76  return convert(ilfn, m_directRules, true);
77  }
78 
80  FileLocator::lfn(std::string const& ipfn) const {
81  return convert(ipfn, m_inverseRules, false);
82  }
83 
85  FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
86  std::string out = "";
87 
88  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
89  out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
90  if (!out.empty())
91  return out;
92  }
93  return out;
94  }
95 
96  void
98  if (!ruleNode) {
99  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
100  }
101 
102  // ruleNode is actually always a DOMElement because it's the result of
103  // a `getElementsByTagName()` in the calling method.
104  DOMElement* ruleElement = static_cast<DOMElement *>(ruleNode);
105 
106  std::string const protocol = toString(ruleElement->getAttribute(uStr("protocol").ptr()));
107  std::string destinationMatchRegexp = toString(ruleElement->getAttribute(uStr("destination-match").ptr()));
108 
109  if (destinationMatchRegexp.empty()) {
110  destinationMatchRegexp = ".*";
111  }
112 
113  std::string const pathMatchRegexp
114  = toString(ruleElement->getAttribute(uStr("path-match").ptr()));
115  std::string const result
116  = toString(ruleElement->getAttribute(uStr("result").ptr()));
117  std::string const chain
118  = toString(ruleElement->getAttribute(uStr("chain").ptr()));
119 
120  Rule rule;
121  rule.pathMatch.assign(pathMatchRegexp);
122  rule.destinationMatch.assign(destinationMatchRegexp);
123  rule.result = result;
124  rule.chain = chain;
125  rules[protocol].push_back(rule);
126  }
127 
128  void
129  FileLocator::init(std::string const& catUrl, bool fallback) {
130  std::string m_url = catUrl;
131 
132  if (m_url.empty()) {
133  Service<SiteLocalConfig> localconfservice;
134  if (!localconfservice.isAvailable())
135  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
136 
137  m_url = (fallback ? localconfservice->fallbackDataCatalog() : localconfservice->dataCatalog());
138  }
139 
140  // std::cout << "Connecting to the catalog " << m_url << std::endl;
141 
142  if (m_url.find("file:") == std::string::npos) {
143  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
144  }
145 
146  m_url = m_url.erase(0, m_url.find(":") + 1);
147 
148  std::vector<std::string> tokens;
149  boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
150  m_filename = tokens[0];
151 
152  if (tokens.size() == 2) {
153  std::string const options = tokens[1];
154  std::vector<std::string> optionTokens;
155  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
156 
157  std::string const equalSign("=");
158  std::string const comma(",");
159 
160  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
161  std::string const option = optionTokens[oi];
162  std::vector<std::string> argTokens;
163  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
164 
165  if (argTokens.size() != 2) {
166  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
167  }
168 
169  if (argTokens[0] == "protocol") {
170  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
171  } else if (argTokens[0] == "destination") {
172  m_destination = argTokens[1];
173  }
174  }
175  }
176 
177  if (m_protocols.empty()) {
178  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
179  }
180 
181  std::ifstream configFile;
182  configFile.open(m_filename.c_str());
183 
184  //
185  // std::cout << "Using catalog configuration " << m_filename << std::endl;
186 
187  if (!configFile.good() || !configFile.is_open()) {
188  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
189  }
190 
191  configFile.close();
192 
193  auto parser = std::make_unique<XercesDOMParser>();
194  try {
195  parser->setValidationScheme(XercesDOMParser::Val_Auto);
196  parser->setDoNamespaces(false);
197  parser->parse(m_filename.c_str());
198  DOMDocument* doc = parser->getDocument();
199  assert(doc);
200 
201  /* trivialFileCatalog matches the following xml schema
202  FIXME: write a proper DTD
203  <storage-mapping>
204  <lfn-to-pfn protocol="direct" destination-match=".*"
205  path-match="lfn/guid match regular expression"
206  result="/castor/cern.ch/cms/$1"/>
207  <pfn-to-lfn protocol="srm"
208  path-match="lfn/guid match regular expression"
209  result="$1"/>
210  </storage-mapping>
211  */
212 
213  /*first of all do the lfn-to-pfn bit*/
214  {
215  DOMNodeList* rules = doc->getElementsByTagName(uStr("lfn-to-pfn").ptr());
216  XMLSize_t const ruleTagsNum = rules->getLength();
217 
218  // FIXME: we should probably use a DTD for checking validity
219 
220  for (XMLSize_t i = 0; i < ruleTagsNum; ++i) {
221  DOMNode* ruleNode = rules->item(i);
222  parseRule(ruleNode, m_directRules);
223  }
224  }
225  /*Then we handle the pfn-to-lfn bit*/
226  {
227  DOMNodeList* rules = doc->getElementsByTagName(uStr("pfn-to-lfn").ptr());
228  XMLSize_t ruleTagsNum = rules->getLength();
229 
230  for (XMLSize_t i = 0; i < ruleTagsNum; ++i) {
231  DOMNode* ruleNode = rules->item(i);
232  parseRule(ruleNode, m_inverseRules);
233  }
234  }
235  }
236  catch (xercesc::DOMException const& e) {
237  throw cms::Exception("TrivialFileCatalog") << "Xerces XML parser threw this exception: " << cStr(e.getMessage()).ptr() << std::endl;
238  }
239  }
240 
243  std::string const& protocol,
244  std::string const& destination,
245  bool direct,
246  std::string name) const {
247 
248  // std::cerr << "Calling apply rules with protocol: " << protocol << "\n destination: " << destination << "\n " << " on name " << name << std::endl;
249 
250  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
251  if (rulesIterator == protocolRules.end()) {
252  return "";
253  }
254 
255  Rules const& rules = (*(rulesIterator)).second;
256 
257  std::smatch destinationMatches;
258  std::smatch nameMatches;
259 
260  /* Look up for a matching rule*/
261  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
262 
263  if (!std::regex_match(destination, destinationMatches, i->destinationMatch)) {
264  continue;
265  }
266 
267  if (!std::regex_match(name, i->pathMatch)) {
268  continue;
269  }
270 
271  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
272 
273  std::string const chain = i->chain;
274  if ((direct == true) && (chain != "")) {
275  name = applyRules(protocolRules, chain, destination, direct, name);
276  if (name.empty()) {
277  return "";
278  }
279  }
280 
281  std::regex_match(name, nameMatches, i->pathMatch);
282  name = replaceWithRegexp(nameMatches, i->result);
283 
284  if ((direct == false) && (chain != "")) {
285  name = applyRules(protocolRules, chain, destination, direct, name);
286  }
287  return name;
288  }
289  return "";
290  }
291 }
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:36
Definition: chain.py:1
static int s_numberOfInstances
Definition: FileLocator.h:26
std::string lfn(std::string const &ipfn) const
Definition: FileLocator.cc:80
std::string m_destination
Definition: FileLocator.h:59
void xercesInitialize()
Definition: Xerces.cc:18
static std::string const input
Definition: EdmProvDump.cc:45
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:54
ZStr< char > cStr(XMLCh const *str)
std::string toString(const char *format,...)
Definition: xdaq_compat.cc:4
bool isAvailable() const
Definition: Service.h:46
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:85
ZStr< XMLCh > uStr(char const *str)
std::string pfn(std::string const &ilfn) const
Definition: FileLocator.cc:75
std::string m_filename
Definition: FileLocator.h:57
ProtocolRules m_directRules
Definition: FileLocator.h:52
std::vector< Rule > Rules
Definition: FileLocator.h:35
void parseRule(xercesc::DOMNode *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:97
HLT enums.
std::vector< std::string > m_protocols
Definition: FileLocator.h:58
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:242
#define str(s)
double split
Definition: MVATrainer.cc:139
void init(std::string const &catUrl, bool fallback)
Definition: FileLocator.cc:129