CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
FileLocator.cc
Go to the documentation of this file.
4 
5 #include <xercesc/parsers/XercesDOMParser.hpp>
7 
8 #include <boost/algorithm/string.hpp>
9 #include <boost/algorithm/string/replace.hpp>
10 
11 #include <cstdlib>
12 #include <stdexcept>
13 #include <fstream>
14 #include <iostream>
15 #include <sstream>
16 
17 using namespace xercesc;
18 
19 namespace {
20 
21  inline std::string _toString(XMLCh const* toTranscode) {
22  std::string tmp(XMLString::transcode(toTranscode));
23  return tmp;
24  }
25 
26  inline XMLCh* _toDOMS(std::string temp) {
27  XMLCh* buff = XMLString::transcode(temp.c_str());
28  return buff;
29  }
30 
32  replaceWithRegexp(boost::smatch const& matches,
33  std::string const& outputFormat) {
34  std::string result = outputFormat;
35  std::stringstream str;
36 
37  // std::cerr << "Output format: "<< outputFormat << std::endl;
38  for (size_t i = 1; i < matches.size(); ++i) {
39  str.str("");
40  str << "$" << i;
41  // std::cerr << "Current match: " << matches[i] << std::endl;
42  std::string const matchedString(matches[i].first, matches[i].second);
43  if (!matchedString.empty())
44  boost::algorithm::replace_all(result, str.str(), matchedString);
45  }
46  // std::cerr << "Final string: " << result << std::endl;
47  return result;
48  }
49 }
50 
51 namespace edm {
52 
53  int FileLocator::s_numberOfInstances = 0;
54 
55  FileLocator::FileLocator(std::string const& catUrl, bool fallback)
56  : m_destination("any") {
57  try {
58  // << "Xerces-c initialization Number "
59  // << s_numberOfInstances <<
60  if (s_numberOfInstances == 0) {
62  }
63  }
64  catch (XMLException const& e) {
65  // << "Xerces-c error in initialization \n"
66  // << "Exception message is: \n"
67  // << _toString(e.getMessage()) <<
68  throw
69  cms::Exception("TrivialFileCatalog", std::string("Fatal Error on edm::FileLocator:")+ _toString(e.getMessage()));
70  }
72 
73  init(catUrl, fallback);
74 
75  // std::cout << m_protocols.size() << " protocols" << std::endl;
76  // std::cout << m_directRules[m_protocols[0]].size() << " rules" << std::endl;
77  }
78 
80  {}
81 
82 
84  FileLocator::pfn(std::string const& ilfn) const {
85  return convert(ilfn, m_directRules, true);
86  }
87 
89  FileLocator::lfn(std::string const& ipfn) const {
90  return convert(ipfn, m_inverseRules, false);
91  }
92 
94  FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
95  std::string out = "";
96 
97  for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
98  out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
99  if (!out.empty())
100  return out;
101  }
102  return out;
103  }
104 
105  void
107  if (!ruleNode) {
108  throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
109  }
110 
111  // ruleNode is actually always a DOMElement because it's the result of
112  // a `getElementsByTagName()` in the calling method.
113  DOMElement* ruleElement = static_cast<DOMElement *>(ruleNode);
114 
115  std::string const protocol = _toString(ruleElement->getAttribute(_toDOMS("protocol")));
116  std::string destinationMatchRegexp = _toString(ruleElement->getAttribute(_toDOMS("destination-match")));
117 
118  if (destinationMatchRegexp.empty()) {
119  destinationMatchRegexp = ".*";
120  }
121 
122  std::string const pathMatchRegexp
123  = _toString(ruleElement->getAttribute(_toDOMS("path-match")));
124  std::string const result
125  = _toString(ruleElement->getAttribute(_toDOMS("result")));
126  std::string const chain
127  = _toString(ruleElement->getAttribute(_toDOMS("chain")));
128 
129  Rule rule;
130  rule.pathMatch.assign(pathMatchRegexp);
131  rule.destinationMatch.assign(destinationMatchRegexp);
132  rule.result = result;
133  rule.chain = chain;
134  rules[protocol].push_back(rule);
135  }
136 
137  void
138  FileLocator::init(std::string const& catUrl, bool fallback) {
139  std::string m_url = catUrl;
140 
141  if (m_url.empty()) {
142  Service<SiteLocalConfig> localconfservice;
143  if (!localconfservice.isAvailable())
144  throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
145 
146  m_url = (fallback ? localconfservice->fallbackDataCatalog() : localconfservice->dataCatalog());
147  }
148 
149  // std::cout << "Connecting to the catalog " << m_url << std::endl;
150 
151  if (m_url.find("file:") == std::string::npos) {
152  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
153  }
154 
155  m_url = m_url.erase(0, m_url.find(":") + 1);
156 
157  std::vector<std::string> tokens;
158  boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
159  m_filename = tokens[0];
160 
161  if (tokens.size() == 2) {
162  std::string const options = tokens[1];
163  std::vector<std::string> optionTokens;
164  boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
165 
166  std::string const equalSign("=");
167  std::string const comma(",");
168 
169  for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
170  std::string const option = optionTokens[oi];
171  std::vector<std::string> argTokens;
172  boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
173 
174  if (argTokens.size() != 2) {
175  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
176  }
177 
178  if (argTokens[0] == "protocol") {
179  boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
180  } else if (argTokens[0] == "destination") {
181  m_destination = argTokens[1];
182  }
183  }
184  }
185 
186  if (m_protocols.empty()) {
187  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
188  }
189 
190  std::ifstream configFile;
191  configFile.open(m_filename.c_str());
192 
193  //
194  // std::cout << "Using catalog configuration " << m_filename << std::endl;
195 
196  if (!configFile.good() || !configFile.is_open()) {
197  throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
198  }
199 
200  configFile.close();
201 
202  XercesDOMParser* parser = new XercesDOMParser;
203  parser->setValidationScheme(XercesDOMParser::Val_Auto);
204  parser->setDoNamespaces(false);
205  parser->parse(m_filename.c_str());
206  DOMDocument* doc = parser->getDocument();
207  assert(doc);
208 
209  /* trivialFileCatalog matches the following xml schema
210  FIXME: write a proper DTD
211  <storage-mapping>
212  <lfn-to-pfn protocol="direct" destination-match=".*"
213  path-match="lfn/guid match regular expression"
214  result="/castor/cern.ch/cms/$1"/>
215  <pfn-to-lfn protocol="srm"
216  path-match="lfn/guid match regular expression"
217  result="$1"/>
218  </storage-mapping>
219  */
220 
221  /*first of all do the lfn-to-pfn bit*/
222  {
223  DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("lfn-to-pfn"));
224  unsigned int const ruleTagsNum = rules->getLength();
225 
226  // FIXME: we should probably use a DTD for checking validity
227 
228  for (unsigned int i = 0; i < ruleTagsNum; ++i) {
229  DOMNode* ruleNode = rules->item(i);
230  parseRule(ruleNode, m_directRules);
231  }
232  }
233  /*Then we handle the pfn-to-lfn bit*/
234  {
235  DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("pfn-to-lfn"));
236  unsigned int ruleTagsNum = rules->getLength();
237 
238  for (unsigned int i = 0; i < ruleTagsNum; ++i) {
239  DOMNode* ruleNode = rules->item(i);
240  parseRule(ruleNode, m_inverseRules);
241  }
242  }
243  }
244 
247  std::string const& protocol,
248  std::string const& destination,
249  bool direct,
250  std::string name) const {
251 
252  // std::cerr << "Calling apply rules with protocol: " << protocol << "\n destination: " << destination << "\n " << " on name " << name << std::endl;
253 
254  ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
255  if (rulesIterator == protocolRules.end()) {
256  return "";
257  }
258 
259  Rules const& rules = (*(rulesIterator)).second;
260 
261  boost::smatch destinationMatches;
262  boost::smatch nameMatches;
263 
264  /* Look up for a matching rule*/
265  for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
266 
267  if (!boost::regex_match(destination, destinationMatches, i->destinationMatch)) {
268  continue;
269  }
270 
271  if (!boost::regex_match(name, i->pathMatch)) {
272  continue;
273  }
274 
275  // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
276 
277  std::string const chain = i->chain;
278  if ((direct == true) && (chain != "")) {
279  name = applyRules(protocolRules, chain, destination, direct, name);
280  if (name.empty()) {
281  return "";
282  }
283  }
284 
285  boost::regex_match(name, nameMatches, i->pathMatch);
286  name = replaceWithRegexp(nameMatches, i->result);
287 
288  if ((direct == false) && (chain != "")) {
289  name = applyRules(protocolRules, chain, destination, direct, name);
290  }
291  return name;
292  }
293  return "";
294  }
295 }
int i
Definition: DBlmapReader.cc:9
std::map< std::string, Rules > ProtocolRules
Definition: FileLocator.h:36
static int s_numberOfInstances
Definition: FileLocator.h:26
std::string lfn(std::string const &ipfn) const
Definition: FileLocator.cc:89
std::string m_destination
Definition: FileLocator.h:59
string destination
assert(m_qm.get())
std::string _toString(const XMLCh *toTranscode)
void xercesInitialize()
Definition: Xerces.cc:17
static std::string const input
Definition: EdmProvDump.cc:43
const Double_t pi
U second(std::pair< T, U > const &p)
ProtocolRules m_inverseRules
Definition: FileLocator.h:54
tuple result
Definition: query.py:137
bool isAvailable() const
Definition: Service.h:46
std::string convert(std::string const &input, ProtocolRules const &rules, bool direct) const
Definition: FileLocator.cc:94
std::string pfn(std::string const &ilfn) const
Definition: FileLocator.cc:84
std::string m_filename
Definition: FileLocator.h:57
tuple doc
Definition: asciidump.py:381
tuple out
Definition: dbtoconf.py:99
ProtocolRules m_directRules
Definition: FileLocator.h:52
std::vector< Rule > Rules
Definition: FileLocator.h:35
void parseRule(xercesc::DOMNode *ruleNode, ProtocolRules &rules)
Definition: FileLocator.cc:106
std::vector< std::vector< double > > tmp
Definition: MVATrainer.cc:100
std::vector< std::string > m_protocols
Definition: FileLocator.h:58
XMLCh * _toDOMS(std::string temp)
boost::regex pathMatch
Definition: FileLocator.h:29
std::string applyRules(ProtocolRules const &protocolRules, std::string const &protocol, std::string const &destination, bool direct, std::string name) const
Definition: FileLocator.cc:246
double split
Definition: MVATrainer.cc:139
void init(std::string const &catUrl, bool fallback)
Definition: FileLocator.cc:138