00001 #include "FWCore/Catalog/interface/FileLocator.h"
00002 #include "FWCore/Catalog/interface/SiteLocalConfig.h"
00003 #include "FWCore/ServiceRegistry/interface/Service.h"
00004
00005 #include <xercesc/parsers/XercesDOMParser.hpp>
00006 #include <xercesc/util/PlatformUtils.hpp>
00007
00008 #include <boost/algorithm/string.hpp>
00009 #include <boost/algorithm/string/replace.hpp>
00010
00011 #include <cstdlib>
00012 #include <stdexcept>
00013 #include <fstream>
00014 #include <iostream>
00015 #include <sstream>
00016
00017 using namespace xercesc;
00018
00019 namespace {
00020
00021 inline std::string _toString(XMLCh const* toTranscode) {
00022 std::string tmp(XMLString::transcode(toTranscode));
00023 return tmp;
00024 }
00025
00026 inline XMLCh* _toDOMS(std::string temp) {
00027 XMLCh* buff = XMLString::transcode(temp.c_str());
00028 return buff;
00029 }
00030
00031 std::string
00032 replaceWithRegexp(boost::smatch const& matches,
00033 std::string const& outputFormat) {
00034 std::string result = outputFormat;
00035 std::stringstream str;
00036
00037
00038 for (size_t i = 1; i < matches.size(); ++i) {
00039 str.str("");
00040 str << "$" << i;
00041
00042 std::string const matchedString(matches[i].first, matches[i].second);
00043 if (!matchedString.empty())
00044 boost::algorithm::replace_all(result, str.str(), matchedString);
00045 }
00046
00047 return result;
00048 }
00049 }
00050
00051 namespace edm {
00052
00053 int FileLocator::s_numberOfInstances = 0;
00054
00055 FileLocator::FileLocator(std::string const& catUrl, bool fallback)
00056 : m_destination("any") {
00057 try {
00058
00059
00060 if (s_numberOfInstances == 0) {
00061 XMLPlatformUtils::Initialize();
00062 }
00063 }
00064 catch (XMLException const& e) {
00065
00066
00067
00068 throw
00069 cms::Exception(std::string("Fatal Error on edm::FileLocator:")+ _toString(e.getMessage()));
00070 }
00071 ++s_numberOfInstances;
00072
00073 init(catUrl, fallback);
00074
00075
00076
00077 }
00078
00079 FileLocator::~FileLocator()
00080 {}
00081
00082
00083 std::string
00084 FileLocator::pfn(std::string const& ilfn) const {
00085 return convert(ilfn, m_directRules, true);
00086 }
00087
00088 std::string
00089 FileLocator::lfn(std::string const& ipfn) const {
00090 return convert(ipfn, m_inverseRules, false);
00091 }
00092
00093 std::string
00094 FileLocator::convert(std::string const& input, ProtocolRules const& rules, bool direct) const {
00095 std::string out = "";
00096
00097 for (size_t pi = 0, pe = m_protocols.size(); pi != pe; ++pi) {
00098 out = applyRules(rules, m_protocols[pi], m_destination, direct, input);
00099 if (!out.empty())
00100 return out;
00101 }
00102 return out;
00103 }
00104
00105 void
00106 FileLocator::parseRule(DOMNode* ruleNode, ProtocolRules& rules) {
00107 if (!ruleNode) {
00108 throw cms::Exception(std::string("TrivialFileCatalog::connect: Malformed trivial catalog"));
00109 }
00110
00111
00112
00113 DOMElement* ruleElement = static_cast<DOMElement *>(ruleNode);
00114
00115 std::string const protocol = _toString(ruleElement->getAttribute(_toDOMS("protocol")));
00116 std::string destinationMatchRegexp = _toString(ruleElement->getAttribute(_toDOMS("destination-match")));
00117
00118 if (destinationMatchRegexp.empty()) {
00119 destinationMatchRegexp = ".*";
00120 }
00121
00122 std::string const pathMatchRegexp
00123 = _toString(ruleElement->getAttribute(_toDOMS("path-match")));
00124 std::string const result
00125 = _toString(ruleElement->getAttribute(_toDOMS("result")));
00126 std::string const chain
00127 = _toString(ruleElement->getAttribute(_toDOMS("chain")));
00128
00129 Rule rule;
00130 rule.pathMatch.assign(pathMatchRegexp);
00131 rule.destinationMatch.assign(destinationMatchRegexp);
00132 rule.result = result;
00133 rule.chain = chain;
00134 rules[protocol].push_back(rule);
00135 }
00136
00137 void
00138 FileLocator::init(std::string const& catUrl, bool fallback) {
00139 std::string m_url = catUrl;
00140
00141 if (m_url.empty()) {
00142 Service<SiteLocalConfig> localconfservice;
00143 if (!localconfservice.isAvailable())
00144 throw cms::Exception("edm::SiteLocalConfigService is not available");
00145
00146 m_url = (fallback ? localconfservice->fallbackDataCatalog() : localconfservice->dataCatalog());
00147 }
00148
00149
00150
00151 if (m_url.find("file:") == std::string::npos) {
00152 throw cms::Exception("TrivialFileCatalog::connect: Malformed url for file catalog configuration");
00153 }
00154
00155 m_url = m_url.erase(0, m_url.find(":") + 1);
00156
00157 std::vector<std::string> tokens;
00158 boost::algorithm::split(tokens, m_url, boost::is_any_of(std::string("?")));
00159 m_filename = tokens[0];
00160
00161 if (tokens.size() == 2) {
00162 std::string const options = tokens[1];
00163 std::vector<std::string> optionTokens;
00164 boost::algorithm::split(optionTokens, options, boost::is_any_of(std::string("&")));
00165
00166 std::string const equalSign("=");
00167 std::string const comma(",");
00168
00169 for (size_t oi = 0, oe = optionTokens.size(); oi != oe; ++oi) {
00170 std::string const option = optionTokens[oi];
00171 std::vector<std::string> argTokens;
00172 boost::algorithm::split(argTokens, option, boost::is_any_of(equalSign));
00173
00174 if (argTokens.size() != 2) {
00175 throw cms::Exception("TrivialFileCatalog::connect: Malformed url for file catalog configuration");
00176 }
00177
00178 if (argTokens[0] == "protocol") {
00179 boost::algorithm::split(m_protocols, argTokens[1], boost::is_any_of(comma));
00180 } else if (argTokens[0] == "destination") {
00181 m_destination = argTokens[1];
00182 }
00183 }
00184 }
00185
00186 if (m_protocols.empty()) {
00187 throw cms::Exception("TrivialFileCatalog::connect: protocol was not supplied in the contact string");
00188 }
00189
00190 std::ifstream configFile;
00191 configFile.open(m_filename.c_str());
00192
00193
00194
00195
00196 if (!configFile.good() || !configFile.is_open()) {
00197 throw cms::Exception("TrivialFileCatalog::connect: Unable to open trivial file catalog " + m_filename);
00198 }
00199
00200 configFile.close();
00201
00202 XercesDOMParser* parser = new XercesDOMParser;
00203 parser->setValidationScheme(XercesDOMParser::Val_Auto);
00204 parser->setDoNamespaces(false);
00205 parser->parse(m_filename.c_str());
00206 DOMDocument* doc = parser->getDocument();
00207 assert(doc);
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 {
00223 DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("lfn-to-pfn"));
00224 unsigned int const ruleTagsNum = rules->getLength();
00225
00226
00227
00228 for (unsigned int i = 0; i < ruleTagsNum; ++i) {
00229 DOMNode* ruleNode = rules->item(i);
00230 parseRule(ruleNode, m_directRules);
00231 }
00232 }
00233
00234 {
00235 DOMNodeList* rules = doc->getElementsByTagName(_toDOMS("pfn-to-lfn"));
00236 unsigned int ruleTagsNum = rules->getLength();
00237
00238 for (unsigned int i = 0; i < ruleTagsNum; ++i) {
00239 DOMNode* ruleNode = rules->item(i);
00240 parseRule(ruleNode, m_inverseRules);
00241 }
00242 }
00243 }
00244
00245 std::string
00246 FileLocator::applyRules(ProtocolRules const& protocolRules,
00247 std::string const& protocol,
00248 std::string const& destination,
00249 bool direct,
00250 std::string name) const {
00251
00252
00253
00254 ProtocolRules::const_iterator const rulesIterator = protocolRules.find(protocol);
00255 if (rulesIterator == protocolRules.end()) {
00256 return "";
00257 }
00258
00259 Rules const& rules = (*(rulesIterator)).second;
00260
00261 boost::smatch destinationMatches;
00262 boost::smatch nameMatches;
00263
00264
00265 for (Rules::const_iterator i = rules.begin(); i != rules.end(); ++i) {
00266
00267 if (!boost::regex_match(destination, destinationMatches, i->destinationMatch)) {
00268 continue;
00269 }
00270
00271 if (!boost::regex_match(name, i->pathMatch)) {
00272 continue;
00273 }
00274
00275
00276
00277 std::string const chain = i->chain;
00278 if ((direct == true) && (chain != "")) {
00279 name = applyRules(protocolRules, chain, destination, direct, name);
00280 if (name.empty()) {
00281 return "";
00282 }
00283 }
00284
00285 boost::regex_match(name, nameMatches, i->pathMatch);
00286 name = replaceWithRegexp(nameMatches, i->result);
00287
00288 if ((direct == false) && (chain != "")) {
00289 name = applyRules(protocolRules, chain, destination, direct, name);
00290 }
00291 return name;
00292 }
00293 return "";
00294 }
00295 }