CMS 3D CMS Logo

/afs/cern.ch/work/a/aaltunda/public/www/CMSSW_5_3_13_patch3/src/FWCore/Catalog/src/FileLocator.cc

Go to the documentation of this file.
00001 #include "FWCore/Catalog/interface/FileLocator.h"
00002 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
00003 #include "FWCore/ServiceRegistry/interface/Service.h"
00004 
00005 #include <xercesc/parsers/XercesDOMParser.hpp>
00006 #include <xercesc/util/PlatformUtils.hpp>
00007 
00008 #include <boost/algorithm/string.hpp>
00009 #include <boost/algorithm/string/replace.hpp>
00010 
00011 #include <cstdlib>
00012 #include <stdexcept>
00013 #include <fstream>
00014 #include <iostream>
00015 #include <sstream>
00016 
00017 using namespace xercesc;
00018 
00019 namespace {
00020 
00021   inline std::string _toString(XMLCh const* toTranscode) {
00022     std::string tmp(XMLString::transcode(toTranscode));
00023     return tmp;
00024   }
00025 
00026   inline XMLCh*  _toDOMS(std::string temp) {
00027     XMLCh* buff = XMLString::transcode(temp.c_str());
00028     return  buff;
00029   }
00030 
00031   std::string
00032   replaceWithRegexp(boost::smatch const& matches,
00033                     std::string const& outputFormat) {
00034     std::string result = outputFormat;
00035     std::stringstream str;
00036 
00037     // std::cerr << "Output format: "<< outputFormat << std::endl;
00038     for (size_t i = 1; i < matches.size(); ++i) {
00039       str.str("");
00040       str << "$" << i;
00041       // std::cerr << "Current match: " << matches[i] << std::endl;
00042       std::string const matchedString(matches[i].first, matches[i].second);
00043       if (!matchedString.empty())
00044         boost::algorithm::replace_all(result, str.str(), matchedString);
00045     }
00046     // std::cerr << "Final string: " << result << std::endl;
00047     return result;
00048   }
00049 }
00050 
00051 namespace edm {
00052 
00053   int FileLocator::s_numberOfInstances = 0;
00054 
00055   FileLocator::FileLocator(std::string const& catUrl, bool fallback)
00056     : m_destination("any") {
00057     try {
00058       //  << "Xerces-c initialization Number "
00059       //   << s_numberOfInstances <<
00060       if (s_numberOfInstances == 0) {
00061         XMLPlatformUtils::Initialize();
00062       }
00063     }
00064     catch (XMLException const& e) {
00065       // << "Xerces-c error in initialization \n"
00066       //      << "Exception message is:  \n"
00067       //      << _toString(e.getMessage()) <<
00068       throw
00069         cms::Exception("TrivialFileCatalog", std::string("Fatal Error on edm::FileLocator:")+ _toString(e.getMessage()));
00070     }
00071     ++s_numberOfInstances;
00072 
00073     init(catUrl, fallback);
00074 
00075     // std::cout << m_protocols.size() << " protocols" << std::endl;
00076     // std::cout << m_directRules[m_protocols[0]].size() << " rules" << std::endl;
00077   }
00078 
00079   FileLocator::~FileLocator()
00080   {}
00081 
00082 
00083   std::string
00084   FileLocator::pfn(std::string const& ilfn) const {
00085     return convert(ilfn, m_directRules, true);
00086   }
00087 
00088   std::string
00089   FileLocator::lfn(std::string const& ipfn) const {
00090     return convert(ipfn, m_inverseRules, false);
00091   }
00092 
00093   std::string
00094   FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
00095     std::string out = "";
00096 
00097     for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
00098       out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
00099       if (!out.empty())
00100         return out;
00101     }
00102     return out;
00103   }
00104 
00105   void
00106   FileLocator::parseRule(DOMNode* ruleNode, ProtocolRules& rules) {
00107     if (!ruleNode) {
00108       throw cms::Exception("TrivialFileCatalog", std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
00109     }
00110 
00111     // ruleNode is actually always a DOMElement because it's the result of
00112     // a `getElementsByTagName()` in the calling method.
00113     DOMElement* ruleElement = static_cast<DOMElement *>(ruleNode);
00114 
00115     std::string const protocol = _toString(ruleElement->getAttribute(_toDOMS("protocol")));
00116     std::string destinationMatchRegexp = _toString(ruleElement->getAttribute(_toDOMS("destination-match")));
00117 
00118     if (destinationMatchRegexp.empty()) {
00119       destinationMatchRegexp = ".*";
00120     }
00121 
00122     std::string const pathMatchRegexp
00123       = _toString(ruleElement->getAttribute(_toDOMS("path-match")));
00124     std::string const result
00125       = _toString(ruleElement->getAttribute(_toDOMS("result")));
00126     std::string const chain
00127       = _toString(ruleElement->getAttribute(_toDOMS("chain")));
00128 
00129     Rule rule;
00130     rule.pathMatch.assign(pathMatchRegexp);
00131     rule.destinationMatch.assign(destinationMatchRegexp);
00132     rule.result = result;
00133     rule.chain = chain;
00134     rules[protocol].push_back(rule);
00135   }
00136 
00137   void
00138   FileLocator::init(std::string const& catUrl, bool fallback) {
00139     std::string m_url = catUrl;
00140 
00141     if (m_url.empty()) {
00142       Service<SiteLocalConfig> localconfservice;
00143       if (!localconfservice.isAvailable())
00144               throw cms::Exception("TrivialFileCatalog", "edm::SiteLocalConfigService is not available");
00145 
00146       m_url = (fallback ? localconfservice->fallbackDataCatalog() : localconfservice->dataCatalog());
00147     }
00148 
00149     // std::cout << "Connecting to the catalog " << m_url << std::endl;
00150 
00151     if (m_url.find("file:") == std::string::npos) {
00152       throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
00153     }
00154 
00155     m_url = m_url.erase(0, m_url.find(":") + 1);
00156 
00157     std::vector<std::string> tokens;
00158     boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
00159     m_filename = tokens[0];
00160 
00161     if (tokens.size() == 2) {
00162       std::string const options = tokens[1];
00163       std::vector<std::string> optionTokens;
00164       boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
00165 
00166       std::string const equalSign("=");
00167       std::string const comma(",");
00168 
00169       for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
00170         std::string const option = optionTokens[oi];
00171         std::vector<std::string> argTokens;
00172         boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
00173 
00174         if (argTokens.size() != 2) {
00175           throw  cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Malformed url for file catalog configuration");
00176         }
00177 
00178         if (argTokens[0] == "protocol") {
00179           boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
00180         } else if (argTokens[0] == "destination") {
00181           m_destination = argTokens[1];
00182         }
00183       }
00184     }
00185 
00186     if (m_protocols.empty()) {
00187       throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: protocol was not supplied in the contact string");
00188     }
00189 
00190     std::ifstream configFile;
00191     configFile.open(m_filename.c_str());
00192 
00193     //
00194     // std::cout << "Using catalog configuration " << m_filename << std::endl;
00195 
00196     if (!configFile.good() || !configFile.is_open()) {
00197       throw cms::Exception("TrivialFileCatalog", "TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
00198     }
00199 
00200     configFile.close();
00201 
00202     XercesDOMParser* parser = new XercesDOMParser;
00203     parser->setValidationScheme(XercesDOMParser::Val_Auto);
00204     parser->setDoNamespaces(false);
00205     parser->parse(m_filename.c_str());
00206     DOMDocument* doc = parser->getDocument();
00207     assert(doc);
00208 
00209     /* trivialFileCatalog matches the following xml schema
00210        FIXME: write a proper DTD
00211        <storage-mapping>
00212        <lfn-to-pfn protocol="direct" destination-match=".*"
00213        path-match="lfn/guid match regular expression"
00214        result="/castor/cern.ch/cms/$1"/>
00215        <pfn-to-lfn protocol="srm"
00216        path-match="lfn/guid match regular expression"
00217        result="$1"/>
00218        </storage-mapping>
00219     */
00220 
00221     /*first of all do the lfn-to-pfn bit*/
00222     {
00223       DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("lfn-to-pfn"));
00224       unsigned int const ruleTagsNum = rules->getLength();
00225 
00226       // FIXME: we should probably use a DTD for checking validity
00227 
00228       for (unsigned int i = 0; i < ruleTagsNum; ++i) {
00229         DOMNode* ruleNode = rules->item(i);
00230         parseRule(ruleNode, m_directRules);
00231       }
00232     }
00233     /*Then we handle the pfn-to-lfn bit*/
00234     {
00235       DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("pfn-to-lfn"));
00236       unsigned int ruleTagsNum = rules->getLength();
00237 
00238       for (unsigned int i = 0; i < ruleTagsNum; ++i) {
00239         DOMNode* ruleNode = rules->item(i);
00240         parseRule(ruleNode, m_inverseRules);
00241       }
00242     }
00243   }
00244 
00245   std::string
00246   FileLocator::applyRules(ProtocolRules const& protocolRules,
00247                           std::string const& protocol,
00248                           std::string const& destination,
00249                           bool direct,
00250                           std::string name) const {
00251 
00252     // std::cerr << "Calling apply rules with protocol: " << protocol << "\n destination: " << destination << "\n " << " on name " << name << std::endl;
00253 
00254     ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
00255     if (rulesIterator == protocolRules.end()) {
00256       return "";
00257     }
00258 
00259     Rules const& rules = (*(rulesIterator)).second;
00260 
00261     boost::smatch destinationMatches;
00262     boost::smatch nameMatches;
00263 
00264     /* Look up for a matching rule*/
00265     for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
00266 
00267       if (!boost::regex_match(destination, destinationMatches, i->destinationMatch)) {
00268         continue;
00269       }
00270 
00271       if (!boost::regex_match(name, i->pathMatch)) {
00272         continue;
00273       }
00274 
00275       // std::cerr << "Rule " << i->pathMatch << "matched! " << std::endl;
00276 
00277       std::string const chain = i->chain;
00278       if ((direct == true) && (chain != "")) {
00279         name = applyRules(protocolRules, chain, destination, direct, name);
00280         if (name.empty()) {
00281           return "";
00282         }
00283       }
00284 
00285       boost::regex_match(name, nameMatches, i->pathMatch);
00286       name = replaceWithRegexp(nameMatches, i->result);
00287 
00288       if ((direct == false) && (chain != "")) {
00289         name = applyRules(protocolRules, chain, destination, direct, name);
00290       }
00291       return name;
00292     }
00293     return "";
00294   }
00295 }