CMS 3D CMS Logo

GroupSelectorRules.cc

Go to the documentation of this file.
00001 // $Id: GroupSelectorRules.cc,v 1.2 2008/06/20 23:17:59 wmtan Exp $
00002 
00003 #include <algorithm>
00004 #include <iterator>
00005 #include <ostream>
00006 #include <cctype>
00007 
00008 #include "boost/algorithm/string.hpp"
00009 
00010 #include "DataFormats/Provenance/interface/BranchDescription.h"
00011 #include "FWCore/Framework/interface/GroupSelectorRules.h"
00012 #include "FWCore/ParameterSet/interface/ParameterSet.h"
00013 #include "FWCore/Utilities/interface/EDMException.h"
00014 #include "FWCore/Utilities/interface/Algorithms.h"
00015 
00016 
00017 namespace edm {
00018 // The following typedef is used only in this implementation file, in
00019 // order to shorten several lines of code.
00020 typedef std::vector<edm::BranchDescription const*> VCBDP;
00021 
00022   namespace 
00023   {
00024   
00025     //--------------------------------------------------
00026     // function partial_match is a helper for Rule. It encodes the
00027     // matching of std::strings, and knows about wildcarding rules.
00028     inline
00029     bool
00030     partial_match(const boost::regex& regularExpression,
00031                   const std::string& branchstring)
00032     {
00033       if (regularExpression.empty()) {
00034         if (branchstring == "") return true;
00035         else return false;
00036       }
00037       return boost::regex_match(branchstring, regularExpression);
00038     }
00039   }
00040 
00041   //--------------------------------------------------  
00042   // Class Rule is used to determine whether or not a given branch
00043   // (really a Group, as described by the BranchDescription object
00044   // that specifies that Group) matches a 'rule' specified by the
00045   // configuration. Each Rule is configured with a single std::string from
00046   // the configuration file.
00047   //
00048   // The configuration std::string is of the form:
00049   //
00050   //   'keep <spec>'            ** or **
00051   //   'drop <spec>'
00052   //
00053   // where '<spec>' is of the form:
00054   //
00055   //   <product type>_<module label>_<instance name>_<process name>
00056   //
00057   // The 3 underscores must always be present.  The four fields can
00058   // be empty or composed of alphanumeric characters.  "*" is an
00059   // allowed wildcard that will match 0 or more of any characters.
00060   // "?" is the other allowed wilcard that will match exactly one
00061   // character.  There is one exception to this, the entire '<spec>'
00062   // can be one single "*" without any underscores and this is
00063   // interpreted as "*_*_*_*".  Anything else will lead to an exception
00064   // being thrown.
00065   //
00066   // This class has much room for optimization. This should be
00067   // revisited as soon as profiling data are available.
00068 
00069   GroupSelectorRules::Rule::Rule(std::string const& s, std::string const& parameterName, std::string const& owner) :
00070     selectflag_(),
00071     productType_(),
00072     moduleLabel_(),
00073     instanceName_(),
00074     processName_()
00075   {
00076     if (s.size() < 6)
00077       throw edm::Exception(edm::errors::Configuration)
00078         << "Invalid statement in configuration file\n"
00079         << "In " << owner << " parameter named '" << parameterName << "'\n"
00080         << "Rule must have at least 6 characters because it must\n"
00081         << "specify 'keep ' or 'drop ' and also supply a pattern.\n"
00082         << "This is the invalid output configuration rule:\n" 
00083         << "    " << s << "\n"
00084         << "Exception thrown from GroupSelectorRules::Rule\n";
00085 
00086     if (s.substr(0,4) == "keep")
00087       selectflag_ = true;
00088     else if (s.substr(0,4) == "drop")
00089       selectflag_ = false;
00090     else
00091       throw edm::Exception(edm::errors::Configuration)
00092         << "Invalid statement in configuration file\n"
00093         << "In " << owner << " parameter named '" << parameterName << "'\n"
00094         << "Rule must specify 'keep ' or 'drop ' and also supply a pattern.\n"
00095         << "This is the invalid output configuration rule:\n" 
00096         << "    " << s << "\n"
00097         << "Exception thrown from GroupSelectorRules::Rule\n";
00098 
00099     if ( !std::isspace(s[4]) ) {
00100 
00101       throw edm::Exception(edm::errors::Configuration)
00102         << "Invalid statement in configuration file\n"
00103         << "In " << owner << " parameter named '" << parameterName << "'\n"
00104         << "In each rule, 'keep' or 'drop' must be followed by a space\n"
00105         << "This is the invalid output configuration rule:\n" 
00106         << "    " << s << "\n"
00107         << "Exception thrown from GroupSelectorRules::Rule\n";
00108     }
00109 
00110     // Now pull apart the std::string to get at the bits and pieces of the
00111     // specification...
00112     
00113     // Grab from after 'keep/drop ' (note the space!) to the end of
00114     // the std::string...
00115     std::string spec(s.begin()+5, s.end());
00116 
00117     // Trim any leading and trailing whitespace from spec
00118     boost::trim(spec);
00119 
00120     if (spec == "*") // special case for wildcard
00121     {
00122       productType_  = ".*";
00123       moduleLabel_  = ".*";
00124       instanceName_ = ".*";
00125       processName_  = ".*";
00126       return;
00127     }
00128     else
00129     {
00130       std::vector<std::string> parts;
00131       boost::split(parts, spec, boost::is_any_of("_"));
00132 
00133       // The std::vector must contain at least 4 parts
00134       // and none may be empty.
00135       bool good = (parts.size() == 4);
00136 
00137       // Require all the std::strings to contain only alphanumberic
00138       // characters or "*" or "?"
00139       if (good) 
00140       {
00141         for (int i = 0; i < 4; ++i) {
00142           std::string& field = parts[i];
00143           int size = field.size();
00144           for (int j = 0; j < size; ++j) {
00145             if ( !(isalnum(field[j]) || field[j] == '*' || field[j] == '?') ) {
00146               good = false;
00147             }
00148           }
00149 
00150           // We are using the boost regex library to deal with the wildcards.
00151           // The configuration file uses a syntax that accepts "*" and "?"
00152           // as wildcards so we need to convert these to the syntax used in
00153           // regular expressions.
00154           boost::replace_all(parts[i], "*", ".*");
00155           boost::replace_all(parts[i], "?", ".");
00156         }
00157       }
00158 
00159       if (!good)
00160       {
00161       throw edm::Exception(edm::errors::Configuration)
00162         << "Invalid statement in configuration file\n"
00163         << "In " << owner << " parameter named '" << parameterName << "'\n"
00164         << "In each rule, after 'keep ' or 'drop ' there must\n"
00165         << "be a branch specification of the form 'type_label_instance_process'\n"
00166         << "There must be 4 fields separated by underscores\n"
00167         << "The fields can only contain alphanumeric characters and the wildcards * or ?\n"
00168         << "Alternately, a single * is also allowed for the branch specification\n"
00169         << "This is the invalid output configuration rule:\n" 
00170         << "    " << s << "\n"
00171         << "Exception thrown from GroupSelectorRules::Rule\n";
00172       }
00173 
00174       // Assign the std::strings to the regex (regular expression) objects
00175       // If the std::string is empty we skip the assignment and leave
00176       // the regular expression also empty.
00177 
00178       if (parts[0] != "") productType_  = parts[0];
00179       if (parts[1] != "") moduleLabel_  = parts[1];
00180       if (parts[2] != "") instanceName_ = parts[2];
00181       if (parts[3] != "") processName_  = parts[3];
00182     }
00183   }
00184 
00185   void
00186   GroupSelectorRules::Rule::applyToAll(std::vector<BranchSelectState>& branchstates) const {
00187     std::vector<BranchSelectState>::iterator it = branchstates.begin();
00188     std::vector<BranchSelectState>::iterator end = branchstates.end();
00189     for (; it != end; ++it) applyToOne(it->desc, it->selectMe);
00190   }
00191 
00192   void
00193   GroupSelectorRules::applyToAll(std::vector<BranchSelectState>& branchstates) const {
00194     std::vector<Rule>::const_iterator it = rules_.begin();
00195     std::vector<Rule>::const_iterator end = rules_.end();
00196     for (; it != end; ++it) it->applyToAll(branchstates);
00197   }
00198 
00199 //   bool
00200 //   Rule::applyToOne(edm::BranchDescription const* branch) const
00201 //   {
00202 //     bool match = 
00203 //       partial_match(productType_, branch->friendlyClassName()) && 
00204 //       partial_match(moduleLabel_, branch->moduleLabel()) &&
00205 //       partial_match(instanceName_, branch->productInstanceName()) &&
00206 //       partial_match(processName_, branch->processName());
00207 
00208 //     return match ? selectflag_ : !selectflag_;      
00209 //   }
00210 
00211   void
00212   GroupSelectorRules::Rule::applyToOne(edm::BranchDescription const* branch,
00213                    bool& result) const
00214   {
00215     if (this->appliesTo(branch)) result = selectflag_;    
00216   }
00217 
00218   bool
00219   GroupSelectorRules::Rule::appliesTo(edm::BranchDescription const* branch) const
00220   {
00221     return
00222       partial_match(productType_, branch->friendlyClassName()) && 
00223       partial_match(moduleLabel_, branch->moduleLabel()) &&
00224       partial_match(instanceName_, branch->productInstanceName()) &&
00225       partial_match(processName_, branch->processName());
00226   }
00227 
00228   GroupSelectorRules::GroupSelectorRules(ParameterSet const& pset,
00229                                std::string const& parameterName,
00230                                std::string const& parameterOwnerName) :
00231   rules_(),
00232   parameterName_(parameterName),
00233   parameterOwnerName_(parameterOwnerName)
00234   {
00235     // Fill the rules.
00236     // If there is no parameter whose name is parameterName_ in the
00237     // ParameterSet we are given, we use the following default.
00238     std::vector<std::string> defaultCommands(1U, std::string("keep *"));
00239 
00240     std::vector<std::string> commands = 
00241       pset.getUntrackedParameter<std::vector<std::string> >(parameterName,
00242                                                     defaultCommands);
00243     rules_.reserve(commands.size());
00244     for(std::vector<std::string>::const_iterator it = commands.begin(), end = commands.end();
00245         it != end; ++it) {
00246       rules_.push_back(Rule(*it, parameterName, parameterOwnerName));
00247     }
00248     keepAll_ = commands.size() == 1 && commands[0] == defaultCommands[0];
00249   }
00250 }

Generated on Tue Jun 9 17:36:10 2009 for CMSSW by  doxygen 1.5.4