CMS 3D CMS Logo

/data/refman/pasoursint/CMSSW_5_3_3/src/FWCore/Framework/src/GroupSelectorRules.cc

Go to the documentation of this file.
00001 #include <algorithm>
00002 #include <iterator>
00003 #include <ostream>
00004 #include <cctype>
00005 
00006 #include "boost/algorithm/string.hpp"
00007 
00008 #include "DataFormats/Provenance/interface/BranchDescription.h"
00009 #include "FWCore/Framework/interface/GroupSelectorRules.h"
00010 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00011 #include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
00012 #include "FWCore/Utilities/interface/EDMException.h"
00013 
00014 namespace edm {
00015 // The following typedef is used only in this implementation file, in
00016 // order to shorten several lines of code.
00017 typedef std::vector<edm::BranchDescription const*> VCBDP;
00018 
00019   namespace 
00020   {
00021   
00022     //--------------------------------------------------
00023     // function partial_match is a helper for Rule. It encodes the
00024     // matching of std::strings, and knows about wildcarding rules.
00025     inline
00026     bool
00027     partial_match(const boost::regex& regularExpression,
00028                   const std::string& branchstring)
00029     {
00030       if (regularExpression.empty()) {
00031         if (branchstring == "") return true;
00032         else return false;
00033       }
00034       return boost::regex_match(branchstring, regularExpression);
00035     }
00036   }
00037 
00038   //--------------------------------------------------  
00039   // Class Rule is used to determine whether or not a given branch
00040   // (really a Group, as described by the BranchDescription object
00041   // that specifies that Group) matches a 'rule' specified by the
00042   // configuration. Each Rule is configured with a single std::string from
00043   // the configuration file.
00044   //
00045   // The configuration std::string is of the form:
00046   //
00047   //   'keep <spec>'            ** or **
00048   //   'drop <spec>'
00049   //
00050   // where '<spec>' is of the form:
00051   //
00052   //   <product type>_<module label>_<instance name>_<process name>
00053   //
00054   // The 3 underscores must always be present.  The four fields can
00055   // be empty or composed of alphanumeric characters.  "*" is an
00056   // allowed wildcard that will match 0 or more of any characters.
00057   // "?" is the other allowed wilcard that will match exactly one
00058   // character.  There is one exception to this, the entire '<spec>'
00059   // can be one single "*" without any underscores and this is
00060   // interpreted as "*_*_*_*".  Anything else will lead to an exception
00061   // being thrown.
00062   //
00063   // This class has much room for optimization. This should be
00064   // revisited as soon as profiling data are available.
00065 
00066   GroupSelectorRules::Rule::Rule(std::string const& s, std::string const& parameterName, std::string const& owner) :
00067     selectflag_(),
00068     productType_(),
00069     moduleLabel_(),
00070     instanceName_(),
00071     processName_()
00072   {
00073     if (s.size() < 6)
00074       throw edm::Exception(edm::errors::Configuration)
00075         << "Invalid statement in configuration file\n"
00076         << "In " << owner << " parameter named '" << parameterName << "'\n"
00077         << "Rule must have at least 6 characters because it must\n"
00078         << "specify 'keep ' or 'drop ' and also supply a pattern.\n"
00079         << "This is the invalid output configuration rule:\n" 
00080         << "    " << s << "\n"
00081         << "Exception thrown from GroupSelectorRules::Rule\n";
00082 
00083     if (s.substr(0,4) == "keep")
00084       selectflag_ = true;
00085     else if (s.substr(0,4) == "drop")
00086       selectflag_ = false;
00087     else
00088       throw edm::Exception(edm::errors::Configuration)
00089         << "Invalid statement in configuration file\n"
00090         << "In " << owner << " parameter named '" << parameterName << "'\n"
00091         << "Rule must specify 'keep ' or 'drop ' and also supply a pattern.\n"
00092         << "This is the invalid output configuration rule:\n" 
00093         << "    " << s << "\n"
00094         << "Exception thrown from GroupSelectorRules::Rule\n";
00095 
00096     if ( !std::isspace(s[4]) ) {
00097 
00098       throw edm::Exception(edm::errors::Configuration)
00099         << "Invalid statement in configuration file\n"
00100         << "In " << owner << " parameter named '" << parameterName << "'\n"
00101         << "In each rule, 'keep' or 'drop' must be followed by a space\n"
00102         << "This is the invalid output configuration rule:\n" 
00103         << "    " << s << "\n"
00104         << "Exception thrown from GroupSelectorRules::Rule\n";
00105     }
00106 
00107     // Now pull apart the std::string to get at the bits and pieces of the
00108     // specification...
00109     
00110     // Grab from after 'keep/drop ' (note the space!) to the end of
00111     // the std::string...
00112     std::string spec(s.begin()+5, s.end());
00113 
00114     // Trim any leading and trailing whitespace from spec
00115     boost::trim(spec);
00116 
00117     if (spec == "*") // special case for wildcard
00118     {
00119       productType_  = ".*";
00120       moduleLabel_  = ".*";
00121       instanceName_ = ".*";
00122       processName_  = ".*";
00123       return;
00124     }
00125     else
00126     {
00127       std::vector<std::string> parts;
00128       boost::split(parts, spec, boost::is_any_of("_"));
00129 
00130       // The std::vector must contain at least 4 parts
00131       // and none may be empty.
00132       bool good = (parts.size() == 4);
00133 
00134       // Require all the std::strings to contain only alphanumberic
00135       // characters or "*" or "?"
00136       if (good) 
00137       {
00138         for (int i = 0; i < 4; ++i) {
00139           std::string& field = parts[i];
00140           int size = field.size();
00141           for (int j = 0; j < size; ++j) {
00142             if ( !(isalnum(field[j]) || field[j] == '*' || field[j] == '?') ) {
00143               good = false;
00144             }
00145           }
00146 
00147           // We are using the boost regex library to deal with the wildcards.
00148           // The configuration file uses a syntax that accepts "*" and "?"
00149           // as wildcards so we need to convert these to the syntax used in
00150           // regular expressions.
00151           boost::replace_all(parts[i], "*", ".*");
00152           boost::replace_all(parts[i], "?", ".");
00153         }
00154       }
00155 
00156       if (!good)
00157       {
00158       throw edm::Exception(edm::errors::Configuration)
00159         << "Invalid statement in configuration file\n"
00160         << "In " << owner << " parameter named '" << parameterName << "'\n"
00161         << "In each rule, after 'keep ' or 'drop ' there must\n"
00162         << "be a branch specification of the form 'type_label_instance_process'\n"
00163         << "There must be 4 fields separated by underscores\n"
00164         << "The fields can only contain alphanumeric characters and the wildcards * or ?\n"
00165         << "Alternately, a single * is also allowed for the branch specification\n"
00166         << "This is the invalid output configuration rule:\n" 
00167         << "    " << s << "\n"
00168         << "Exception thrown from GroupSelectorRules::Rule\n";
00169       }
00170 
00171       // Assign the std::strings to the regex (regular expression) objects
00172       // If the std::string is empty we skip the assignment and leave
00173       // the regular expression also empty.
00174 
00175       if (parts[0] != "") productType_  = parts[0];
00176       if (parts[1] != "") moduleLabel_  = parts[1];
00177       if (parts[2] != "") instanceName_ = parts[2];
00178       if (parts[3] != "") processName_  = parts[3];
00179     }
00180   }
00181 
00182   void
00183   GroupSelectorRules::Rule::applyToAll(std::vector<BranchSelectState>& branchstates) const {
00184     std::vector<BranchSelectState>::iterator it = branchstates.begin();
00185     std::vector<BranchSelectState>::iterator end = branchstates.end();
00186     for (; it != end; ++it) applyToOne(it->desc, it->selectMe);
00187   }
00188 
00189   void
00190   GroupSelectorRules::applyToAll(std::vector<BranchSelectState>& branchstates) const {
00191     std::vector<Rule>::const_iterator it = rules_.begin();
00192     std::vector<Rule>::const_iterator end = rules_.end();
00193     for (; it != end; ++it) it->applyToAll(branchstates);
00194   }
00195 
00196 //   bool
00197 //   Rule::applyToOne(edm::BranchDescription const* branch) const
00198 //   {
00199 //     bool match = 
00200 //       partial_match(productType_, branch->friendlyClassName()) && 
00201 //       partial_match(moduleLabel_, branch->moduleLabel()) &&
00202 //       partial_match(instanceName_, branch->productInstanceName()) &&
00203 //       partial_match(processName_, branch->processName());
00204 
00205 //     return match ? selectflag_ : !selectflag_;      
00206 //   }
00207 
00208   void
00209   GroupSelectorRules::Rule::applyToOne(edm::BranchDescription const* branch,
00210                    bool& result) const
00211   {
00212     if (this->appliesTo(branch)) result = selectflag_;    
00213   }
00214 
00215   bool
00216   GroupSelectorRules::Rule::appliesTo(edm::BranchDescription const* branch) const
00217   {
00218     return
00219       partial_match(productType_, branch->friendlyClassName()) && 
00220       partial_match(moduleLabel_, branch->moduleLabel()) &&
00221       partial_match(instanceName_, branch->productInstanceName()) &&
00222       partial_match(processName_, branch->processName());
00223   }
00224 
00225   void
00226   GroupSelectorRules::fillDescription(ParameterSetDescription& desc, char const* parameterName) {
00227     std::vector<std::string> defaultStrings(1U, std::string("keep *"));
00228     desc.addUntracked<std::vector<std::string> >(parameterName, defaultStrings)
00229         ->setComment("Specifies which branches are kept or dropped.");
00230   }
00231 
00232   GroupSelectorRules::GroupSelectorRules(ParameterSet const& pset,
00233                                std::string const& parameterName,
00234                                std::string const& parameterOwnerName) :
00235   rules_(),
00236   parameterName_(parameterName),
00237   parameterOwnerName_(parameterOwnerName)
00238   {
00239     // Fill the rules.
00240     // If there is no parameter whose name is parameterName_ in the
00241     // ParameterSet we are given, we use the following default.
00242     std::vector<std::string> defaultCommands(1U, std::string("keep *"));
00243 
00244     std::vector<std::string> commands = 
00245       pset.getUntrackedParameter<std::vector<std::string> >(parameterName,
00246                                                     defaultCommands);
00247     if (commands.empty()) {
00248       commands.push_back(defaultCommands[0]);
00249     }
00250     rules_.reserve(commands.size());
00251     for(std::vector<std::string>::const_iterator it = commands.begin(), end = commands.end();
00252         it != end; ++it) {
00253       rules_.push_back(Rule(*it, parameterName, parameterOwnerName));
00254     }
00255     keepAll_ = commands.size() == 1 && commands[0] == defaultCommands[0];
00256   }
00257 
00258 
00259 }