CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Classes | Functions | Variables
cmsHarvester Namespace Reference

Classes

class  CMSHarvester
 CMSHarvester class. More...
 
class  CMSHarvesterHelpFormatter
 Helper class: CMSHarvesterHelpFormatter. More...
 
class  DBSXMLHandler
 Helper class: DBSXMLHandler. More...
 
class  Error
 Helper class: Error exception. More...
 
class  Usage
 Helper class: Usage exception. More...
 

Functions

def build_dataset_ignore_list
 
def build_dataset_list
 def dbs_check_dataset_num_events(self, dataset_name): """Figure out the number of events in each run of this dataset. More...
 
def build_dataset_use_list
 
def build_datasets_information
 
def build_runs_ignore_list
 
def build_runs_list
 
def build_runs_use_list
 
def check_cmssw
 
def check_dataset_list
 
def check_dbs
 
def check_globaltag
 
def check_globaltag_contains_ref_hist_key
 
def check_globaltag_exists
 
def check_input_status
 
def check_ref_hist_mappings
 
def check_ref_hist_tag
 
def create_and_check_castor_dir
 
def create_and_check_castor_dirs
 
def create_castor_path_name_common
 
def create_castor_path_name_special
 
def create_config_file_name
 
def create_crab_config
 
def create_es_prefer_snippet
 
def create_harvesting_config
 
def create_harvesting_config_file_name
 
def create_harvesting_output_file_name
 
def create_me_extraction_config
 
More...
 
def create_me_summary_config_file_name
 
def create_me_summary_output_file_name
 
def create_multicrab_block_name
 
def create_multicrab_config
 
def create_output_file_name
 
def dbs_check_dataset_spread
 def dbs_resolve_dataset_number_of_sites(self, dataset_name): """Ask DBS across how many sites this dataset has been spread out. More...
 
def dbs_resolve_cmssw_version
 
def dbs_resolve_dataset_name
 
def dbs_resolve_datatype
 
def dbs_resolve_globaltag
 
def dbs_resolve_number_of_events
 
def dbs_resolve_runs
 def dbs_resolve_dataset_number_of_events(self, dataset_name): """Ask DBS across how many events this dataset has been spread out. More...
 
def escape_dataset_name
 
def load_ref_hist_mappings
 
def option_handler_caf_access
 
def option_handler_castor_dir
 def option_handler_dataset_name(self, option, opt_str, value, parser): """Specify the name(s) of the dataset(s) to be processed. More...
 
def option_handler_crab_submission
 
def option_handler_list_types
 
def option_handler_no_t1access
 
def option_handler_preferred_site
 
def option_handler_saveByLumiSection
 
def option_handler_sites
 
def parse_cmd_line_options
 
def pick_a_site
 
def process_dataset_ignore_list
 
def process_runs_use_and_ignore_lists
 
def ref_hist_mappings_needed
 
def run
 
def setup_dbs
 

Now we try to do a very simple DBS search.

More...
 
def setup_harvesting_info
 
def show_exit_message
 
def singlify_datasets
 
def write_crab_config
 def create_harvesting_config(self, dataset_name): """Create the Python harvesting configuration for a given job. More...
 
def write_harvesting_config
 
def write_me_extraction_config
 
def write_multicrab_config
 

Variables

string __author__ = "Jeroen Hegeman (jeroen.hegeman@cern.ch),"
 
string __version__ = "3.8.2p1"
 File : cmsHarvest.py Authors : Jeroen Hegeman (jeroe.nosp@m.n.he.nosp@m.geman.nosp@m.@cer.nosp@m.n.ch) Niklas Pietsch (nikla.nosp@m.s.pi.nosp@m.etsch.nosp@m.@des.nosp@m.y.de) Franseco Costanza (franc.nosp@m.esco.nosp@m..cost.nosp@m.anza.nosp@m.@desy.nosp@m..de) Last change: 20100308. More...
 
string action = "callback"
 
list all_file_names = files_info[run_number]
 
list all_t1
 
 caf_access
 
 callback = self.option_handler_input_Jsonrunfile,
 
 castor_base_dir
 
list castor_dir = self.datasets_information[dataset_name]
 

CRAB

More...
 
tuple castor_path_common = self.create_castor_path_name_common(dataset_name)
 

DEBUG DEBUG DEBUG

This is probably only useful to make sure we don't muck

things up, right?

Figure out across how many sites this sample has been spread.

More...
 
tuple castor_paths
 
 castor_prefix = self.castor_prefix
 
string cmd = "rfstat %s"
 self.logger.debug("Path is now `%s'" % \ path) More...
 
list cmssw_version = self.datasets_information[dataset_name]
 
list complete_sites
 site_names_ref = set(files_info[run_number].values()[0][1]) for site_names_tmp in files_info[run_number].values()[1:]: if set(site_names_tmp[1]) != site_names_ref: mirrored = False break More...
 
tuple config_builder = ConfigBuilder(config_options, with_input=True)
 
 config_contents = config_builder.pythonCfgCode
 

In case this file is the second step (the real harvesting

step) of the two-step harvesting we have to tell it to use

our local files.

More...
 
tuple config_file_name = self.create_me_summary_config_file_name(dataset_name)
 

Only add the alarming piece to the file name if this is

a spread-out dataset.

More...
 
list connect_name = self.frontier_connection_name["globaltag"]
 
dictionary country_codes
 
string crab_config = "\n"
 

CRAB

More...
 
 crab_submission
 
list customisations = [""]
 
tuple dataset_name_escaped = self.escape_dataset_name(dataset_name)
 
tuple dataset_names = self.datasets_to_use.keys()
 
 dataset_names_after_checks = dataset_names_after_checks_tmp
 
tuple dataset_names_after_checks_tmp = copy.deepcopy(dataset_names_after_checks)
 
 datasets_information
 
 datasets_to_ignore
 
 datasets_to_use
 
list datatype = self.datasets_information[dataset_name]
 
 dbs_api
 
tuple empty_runs = dict(tmp)
 
tuple es_prefer_snippet = self.create_es_prefer_snippet(dataset_name)
 
int exit_code = 1
 
list file_name = handler.results["file.name"]
 
list files_at_site
 
dictionary files_info = {}
 
list files_without_sites
 
list globaltag = self.datasets_information[dataset_name]
 
 harvesting_info
 
 harvesting_mode
 
 harvesting_type
 
string help = "Jsonfile containing dictionary of run/lumisections pairs. "
 
string index = "site_%02d"
 
 Jsonfilename
 
 Jsonlumi
 
int loop = 0
 

CMSSW

More...
 
string marker = "\n"
 
list marker_lines = []
 
string metavar = "JSONRUNFILE"
 
 mirrored = None
 
string msg = "Could not create directory `%s'"
 class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": site_name = str(attrs["STORAGEELEMENT_SENAME"])

TODO TODO TODO

Ugly hack to get around cases like this:

$ dbs search –query="find dataset, site, file.count where dataset=/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO"

Using DBS instance at: http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet

Processing ...

More...
 
tuple multicrab_block_name
 
string multicrab_config = "\n"
 
list multicrab_config_lines = []
 
tuple nevents = int(handler.results["file.numevents"][index])
 
 non_t1access
 
 nr_max_sites
 
dictionary num_events_catalog = {}
 
tuple num_events_dataset = sum(tmp)
 
tuple num_sites
 
               if self.datasets_information[dataset_name]["num_events"][run_number] != 0:
                   pdb.set_trace()

DEBUG DEBUG DEBUG end More...

 
int number_max_sites = self.nr_max_sites+1
 
 option_parser
 
 output_file_name = self.\
 
tuple path = os.path.join(path, piece)
 else:

Piece not in the list, fine.

More...
 
tuple permissions = extract_permissions(output)
 
list permissions_new = []
 
string permissions_target = "775"
 
 preferred_site
 
 ref_hist_mappings_file_name
 
tuple run_number = int(handler.results["run.number"][index])
 
list runs = self.datasets_to_use[dataset_name]
 
 runs_to_ignore
 
 runs_to_use
 
 saveByLumiSection
 
tuple se_name = choice(t1_sites)
 
string sep = "#"
 
 site_name = None
 
tuple site_names = list(set([j for i in files_info[run_number].values() for j in i[1]]))
 
list sites = [self.preferred_site]
 
list sites_forbidden = []
 
list sites_with_complete_copies = []
 
 skip_this_path_piece = True
 self.logger.debug("Checking CASTOR path piece `%s'" % \ piece) More...
 
list t1_sites = []
 
list tmp
 

TODO TODO TODO

Need to think about where this should go, but

somewhere we have to move over the fact that we want

to process all runs for each dataset that we're

considering.

More...
 
tuple traceback_string = traceback.format_exc()
 
string twiki_url = "https://twiki.cern.ch/twiki/bin/view/CMS/CmsHarvester"
 
string type = "string"
 
tuple use_es_prefer = (self.harvesting_type == "RelVal")
 
 use_refs = use_es_preferor\
 
 UserName = output
 
 workflow_name = dataset_name
 

Function Documentation

def cmsHarvester.build_dataset_ignore_list (   self)
Build a list of datasets to ignore.

NOTE: We should always have a list of datasets to process, but
it may be that we don't have a list of datasets to ignore.

Definition at line 3445 of file cmsHarvester.py.

3446  def build_dataset_ignore_list(self):
3447  """Build a list of datasets to ignore.
3448 
3449  NOTE: We should always have a list of datasets to process, but
3450  it may be that we don't have a list of datasets to ignore.
3451 
3452  """
3453 
3454  self.logger.info("Building list of datasets to ignore...")
3455 
3456  input_method = self.input_method["datasets"]["ignore"]
3457  input_name = self.input_name["datasets"]["ignore"]
3458  dataset_names = self.build_dataset_list(input_method,
3459  input_name)
3460  self.datasets_to_ignore = dict(list(zip(dataset_names,
3461  [None] * len(dataset_names))))
3462 
3463  self.logger.info(" found %d dataset(s) to ignore:" % \
3464  len(dataset_names))
3465  for dataset in dataset_names:
3466  self.logger.info(" `%s'" % dataset)
3467 
3468  # End of build_dataset_ignore_list.
def build_dataset_ignore_list
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.build_dataset_list (   self,
  input_method,
  input_name 
)

def dbs_check_dataset_num_events(self, dataset_name): """Figure out the number of events in each run of this dataset.

     This is a more efficient way of doing this than calling
    dbs_resolve_number_of_events for each run.         # BUG BUG BUG

This might very well not work at all for spread-out samples. (?)

BUG BUG BUG end

""" # DEBUG DEBUG DEBUG

If we get here DBS should have been set up already.

assert not self.dbs_api is None

DEBUG DEBUG DEBUG end

api = self.dbs_api dbs_query = "find run.number, file.name, file.numevents where dataset = %s " \ "and dataset.status = VALID" % \ dataset_name try: api_result = api.executeQuery(dbs_query) except DbsApiException: msg = "ERROR: Could not execute DBS query" self.logger.fatal(msg) raise Error(msg) try: files_info = {} class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": run_number = int(attrs["RUNS_RUNNUMBER"]) file_name = str(attrs["FILES_LOGICALFILENAME"]) nevents = int(attrs["FILES_NUMBEROFEVENTS"]) try: files_info[run_number][file_name] = nevents except KeyError: files_info[run_number] = {file_name: nevents} xml.sax.parseString(api_result, Handler()) except SAXParseException: msg = "ERROR: Could not parse DBS server output" self.logger.fatal(msg) raise Error(msg) num_events_catalog = {} for run_number in files_info.keys(): num_events_catalog[run_number] = sum(files_info[run_number].values()) # End of dbs_check_dataset_num_events. return num_events_catalog End of old version.

Build a list of all datasets to be processed.

Definition at line 3358 of file cmsHarvester.py.

References dbs_resolve_dataset_name(), and list().

3359  def build_dataset_list(self, input_method, input_name):
3360  """Build a list of all datasets to be processed.
3361 
3362  """
3363 
3364  dataset_names = []
3365 
3366  # It may be, but only for the list of datasets to ignore, that
3367  # the input method and name are None because nothing was
3368  # specified. In that case just an empty list is returned.
3369  if input_method is None:
3370  pass
3371  elif input_method == "dataset":
3372  # Input comes from a dataset name directly on the command
3373  # line. But, this can also contain wildcards so we need
3374  # DBS to translate it conclusively into a list of explicit
3375  # dataset names.
3376  self.logger.info("Asking DBS for dataset names")
3377  dataset_names = self.dbs_resolve_dataset_name(input_name)
3378  elif input_method == "datasetfile":
3379  # In this case a file containing a list of dataset names
3380  # is specified. Still, each line may contain wildcards so
3381  # this step also needs help from DBS.
3382  # NOTE: Lines starting with a `#' are ignored.
3383  self.logger.info("Reading input from list file `%s'" % \
3384  input_name)
3385  try:
3386  listfile = open("/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/bin/%s" %input_name, "r")
3387  print "open listfile"
3388  for dataset in listfile:
3389  # Skip empty lines.
3390  dataset_stripped = dataset.strip()
3391  if len(dataset_stripped) < 1:
3392  continue
3393  # Skip lines starting with a `#'.
3394  if dataset_stripped[0] != "#":
3395  dataset_names.extend(self. \
3396  dbs_resolve_dataset_name(dataset_stripped))
3397  listfile.close()
3398  except IOError:
3399  msg = "ERROR: Could not open input list file `%s'" % \
3400  input_name
3401  self.logger.fatal(msg)
3402  raise Error(msg)
3403  else:
3404  # DEBUG DEBUG DEBUG
3405  # We should never get here.
3406  assert False, "Unknown input method `%s'" % input_method
3407  # DEBUG DEBUG DEBUG end
3408 
3409  # Remove duplicates from the dataset list.
3410  # NOTE: There should not be any duplicates in any list coming
3411  # from DBS, but maybe the user provided a list file with less
3412  # care.
3413  dataset_names = list(set(dataset_names))
3414 
3415  # Store for later use.
3416  dataset_names.sort()
3417 
3418  # End of build_dataset_list.
3419  return dataset_names
Helper class: Error exception.
def build_dataset_list
def dbs_check_dataset_num_events(self, dataset_name): &quot;&quot;"Figure out the number of events in each run ...
def dbs_resolve_dataset_name
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.build_dataset_use_list (   self)
Build a list of datasets to process.

Definition at line 3422 of file cmsHarvester.py.

3423  def build_dataset_use_list(self):
3424  """Build a list of datasets to process.
3425 
3426  """
3427 
3428  self.logger.info("Building list of datasets to consider...")
3429 
3430  input_method = self.input_method["datasets"]["use"]
3431  input_name = self.input_name["datasets"]["use"]
3432  dataset_names = self.build_dataset_list(input_method,
3433  input_name)
3434  self.datasets_to_use = dict(list(zip(dataset_names,
3435  [None] * len(dataset_names))))
3436 
3437  self.logger.info(" found %d dataset(s) to process:" % \
3438  len(dataset_names))
3439  for dataset in dataset_names:
3440  self.logger.info(" `%s'" % dataset)
3441 
3442  # End of build_dataset_use_list.
def build_dataset_use_list
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.build_datasets_information (   self)
Obtain all information on the datasets that we need to run.

Use DBS to figure out all required information on our
datasets, like the run numbers and the GlobalTag. All
information is stored in the datasets_information member
variable.

Definition at line 5323 of file cmsHarvester.py.

5324  def build_datasets_information(self):
5325  """Obtain all information on the datasets that we need to run.
5326 
5327  Use DBS to figure out all required information on our
5328  datasets, like the run numbers and the GlobalTag. All
5329  information is stored in the datasets_information member
5330  variable.
5331 
5332  """
5333 
5334  # Get a list of runs in the dataset.
5335  # NOTE: The harvesting has to be done run-by-run, so we
5336  # split up datasets based on the run numbers. Strictly
5337  # speaking this is not (yet?) necessary for Monte Carlo
5338  # since all those samples use run number 1. Still, this
5339  # general approach should work for all samples.
5340 
5341  # Now loop over all datasets in the list and process them.
5342  # NOTE: This processing has been split into several loops
5343  # to be easier to follow, sacrificing a bit of efficiency.
5344  self.datasets_information = {}
5345  self.logger.info("Collecting information for all datasets to process")
5346  dataset_names = self.datasets_to_use.keys()
5347  dataset_names.sort()
5348  for dataset_name in dataset_names:
5349 
5350  # Tell the user which dataset: nice with many datasets.
5351  sep_line = "-" * 30
5352  self.logger.info(sep_line)
5353  self.logger.info(" `%s'" % dataset_name)
5354  self.logger.info(sep_line)
5355 
5356  runs = self.dbs_resolve_runs(dataset_name)
5357  self.logger.info(" found %d run(s)" % len(runs))
5358  if len(runs) > 0:
5359  self.logger.debug(" run number(s): %s" % \
5360  ", ".join([str(i) for i in runs]))
5361  else:
5362  # DEBUG DEBUG DEBUG
5363  # This should never happen after the DBS checks.
5364  self.logger.warning(" --> skipping dataset "
5365  "without any runs")
5366  assert False, "Panic: found a dataset without runs " \
5367  "after DBS checks!"
5368  # DEBUG DEBUG DEBUG end
5369 
5370  cmssw_version = self.dbs_resolve_cmssw_version(dataset_name)
5371  self.logger.info(" found CMSSW version `%s'" % cmssw_version)
5372 
5373  # Figure out if this is data or MC.
5374  datatype = self.dbs_resolve_datatype(dataset_name)
5375  self.logger.info(" sample is data or MC? --> %s" % \
5376  datatype)
5377 
5378  ###
5379 
5380  # Try and figure out the GlobalTag to be used.
5381  if self.globaltag is None:
5382  globaltag = self.dbs_resolve_globaltag(dataset_name)
5383  else:
5384  globaltag = self.globaltag
5385 
5386  self.logger.info(" found GlobalTag `%s'" % globaltag)
5387 
5388  # DEBUG DEBUG DEBUG
5389  if globaltag == "":
5390  # Actually we should not even reach this point, after
5391  # our dataset sanity checks.
5392  assert datatype == "data", \
5393  "ERROR Empty GlobalTag for MC dataset!!!"
5394  # DEBUG DEBUG DEBUG end
5395 
5396  ###
5397 
5398  # DEBUG DEBUG DEBUG
5399  #tmp = self.dbs_check_dataset_spread_old(dataset_name)
5400  # DEBUG DEBUG DEBUG end
5401  sites_catalog = self.dbs_check_dataset_spread(dataset_name)
5402 
5403  # Extract the total event counts.
5404  num_events = {}
5405  for run_number in sites_catalog.keys():
5406  num_events[run_number] = sites_catalog \
5407  [run_number]["all_sites"]
5408  del sites_catalog[run_number]["all_sites"]
5409 
5410  # Extract the information about whether or not datasets
5411  # are mirrored.
5412  mirror_catalog = {}
5413  for run_number in sites_catalog.keys():
5414  mirror_catalog[run_number] = sites_catalog \
5415  [run_number]["mirrored"]
5416  del sites_catalog[run_number]["mirrored"]
5417 
5418  # BUG BUG BUG
5419  # I think I could now get rid of that and just fill the
5420  # "sites" entry with the `inverse' of this
5421  # num_events_catalog(?).
5422  #num_sites = self.dbs_resolve_dataset_number_of_sites(dataset_name)
5423  #sites_catalog = self.dbs_check_dataset_spread(dataset_name)
5424  #sites_catalog = dict(zip(num_events_catalog.keys(),
5425  # [[j for i in num_events_catalog.values() for j in i.keys()]]))
5426  # BUG BUG BUG end
def build_datasets_information
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def cmsHarvester.build_runs_ignore_list (   self)
Build a list of runs to ignore.

NOTE: We should always have a list of runs to process, but
it may be that we don't have a list of runs to ignore.

Definition at line 3543 of file cmsHarvester.py.

3544  def build_runs_ignore_list(self):
3545  """Build a list of runs to ignore.
3546 
3547  NOTE: We should always have a list of runs to process, but
3548  it may be that we don't have a list of runs to ignore.
3549 
3550  """
3551 
3552  self.logger.info("Building list of runs to ignore...")
3553 
3554  input_method = self.input_method["runs"]["ignore"]
3555  input_name = self.input_name["runs"]["ignore"]
3556  runs = self.build_runs_list(input_method, input_name)
3557  self.runs_to_ignore = dict(list(zip(runs, [None] * len(runs))))
3558 
3559  self.logger.info(" found %d run(s) to ignore:" % \
3560  len(runs))
3561  if len(runs) > 0:
3562  self.logger.info(" %s" % ", ".join([str(i) for i in runs]))
3563 
3564  # End of build_runs_ignore_list().
def build_runs_ignore_list
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.build_runs_list (   self,
  input_method,
  input_name 
)

Definition at line 3471 of file cmsHarvester.py.

References list().

3472  def build_runs_list(self, input_method, input_name):
3473 
3474  runs = []
3475 
3476  # A list of runs (either to use or to ignore) is not
3477  # required. This protects against `empty cases.'
3478  if input_method is None:
3479  pass
3480  elif input_method == "runs":
3481  # A list of runs was specified directly from the command
3482  # line.
3483  self.logger.info("Reading list of runs from the " \
3484  "command line")
3485  runs.extend([int(i.strip()) \
3486  for i in input_name.split(",") \
3487  if len(i.strip()) > 0])
3488  elif input_method == "runslistfile":
3489  # We were passed a file containing a list of runs.
3490  self.logger.info("Reading list of runs from file `%s'" % \
3491  input_name)
3492  try:
3493  listfile = open(input_name, "r")
3494  for run in listfile:
3495  # Skip empty lines.
3496  run_stripped = run.strip()
3497  if len(run_stripped) < 1:
3498  continue
3499  # Skip lines starting with a `#'.
3500  if run_stripped[0] != "#":
3501  runs.append(int(run_stripped))
3502  listfile.close()
3503  except IOError:
3504  msg = "ERROR: Could not open input list file `%s'" % \
3505  input_name
3506  self.logger.fatal(msg)
3507  raise Error(msg)
3508 
3509  else:
3510  # DEBUG DEBUG DEBUG
3511  # We should never get here.
3512  assert False, "Unknown input method `%s'" % input_method
3513  # DEBUG DEBUG DEBUG end
3514 
3515  # Remove duplicates, sort and done.
3516  runs = list(set(runs))
3517 
3518  # End of build_runs_list().
3519  return runs
Helper class: Error exception.
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.build_runs_use_list (   self)
Build a list of runs to process.

Definition at line 3522 of file cmsHarvester.py.

3523  def build_runs_use_list(self):
3524  """Build a list of runs to process.
3525 
3526  """
3527 
3528  self.logger.info("Building list of runs to consider...")
3529 
3530  input_method = self.input_method["runs"]["use"]
3531  input_name = self.input_name["runs"]["use"]
3532  runs = self.build_runs_list(input_method, input_name)
3533  self.runs_to_use = dict(list(zip(runs, [None] * len(runs))))
3534 
3535  self.logger.info(" found %d run(s) to process:" % \
3536  len(runs))
3537  if len(runs) > 0:
3538  self.logger.info(" %s" % ", ".join([str(i) for i in runs]))
3539 
3540  # End of build_runs_list().
def build_runs_use_list
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.check_cmssw (   self)
Check if CMSSW is setup.

Definition at line 2333 of file cmsHarvester.py.

2334  def check_cmssw(self):
2335  """Check if CMSSW is setup.
2336 
2337  """
2338 
2339  # Try to access the CMSSW_VERSION environment variable. If
2340  # it's something useful we consider CMSSW to be set up
2341  # properly. Otherwise we raise an error.
2342  cmssw_version = os.getenv("CMSSW_VERSION")
2343  if cmssw_version is None:
2344  self.logger.fatal("It seems CMSSW is not setup...")
2345  self.logger.fatal("($CMSSW_VERSION is empty)")
2346  raise Error("ERROR: CMSSW needs to be setup first!")
2347 
2348  self.cmssw_version = cmssw_version
2349  self.logger.info("Found CMSSW version %s properly set up" % \
2350  self.cmssw_version)
2351 
2352  # End of check_cmsssw.
2353  return True
Helper class: Error exception.
def cmsHarvester.check_dataset_list (   self)
Check list of dataset names for impossible ones.

Two kinds of checks are done:
- Checks for things that do not make sense. These lead to
  errors and skipped datasets.
- Sanity checks. For these warnings are issued but the user is
  considered to be the authoritative expert.

Checks performed:
- The CMSSW version encoded in the dataset name should match
  self.cmssw_version. This is critical.
- There should be some events in the dataset/run. This is
  critical in the sense that CRAB refuses to create jobs for
  zero events. And yes, this does happen in practice. E.g. the
  reprocessed CRAFT08 datasets contain runs with zero events.
- A cursory check is performed to see if the harvesting type
  makes sense for the data type. This should prevent the user
  from inadvertently running RelVal for data.
- It is not possible to run single-step harvesting jobs on
  samples that are not fully contained at a single site.
- Each dataset/run has to be available at at least one site.

Definition at line 3796 of file cmsHarvester.py.

3797  def check_dataset_list(self):
3798  """Check list of dataset names for impossible ones.
3799 
3800  Two kinds of checks are done:
3801  - Checks for things that do not make sense. These lead to
3802  errors and skipped datasets.
3803  - Sanity checks. For these warnings are issued but the user is
3804  considered to be the authoritative expert.
3805 
3806  Checks performed:
3807  - The CMSSW version encoded in the dataset name should match
3808  self.cmssw_version. This is critical.
3809  - There should be some events in the dataset/run. This is
3810  critical in the sense that CRAB refuses to create jobs for
3811  zero events. And yes, this does happen in practice. E.g. the
3812  reprocessed CRAFT08 datasets contain runs with zero events.
3813  - A cursory check is performed to see if the harvesting type
3814  makes sense for the data type. This should prevent the user
3815  from inadvertently running RelVal for data.
3816  - It is not possible to run single-step harvesting jobs on
3817  samples that are not fully contained at a single site.
3818  - Each dataset/run has to be available at at least one site.
3819 
3820  """
3821 
3822  self.logger.info("Performing sanity checks on dataset list...")
3823 
3824  dataset_names_after_checks = copy.deepcopy(self.datasets_to_use)
3825 
3826  for dataset_name in self.datasets_to_use.keys():
3827 
3828  # Check CMSSW version.
3829  version_from_dataset = self.datasets_information[dataset_name] \
3830  ["cmssw_version"]
3831  if version_from_dataset != self.cmssw_version:
3832  msg = " CMSSW version mismatch for dataset `%s' " \
3833  "(%s vs. %s)" % \
3834  (dataset_name,
3835  self.cmssw_version, version_from_dataset)
3836  if self.force_running:
3837  # Expert mode: just warn, then continue.
3838  self.logger.warning("%s " \
3839  "--> `force mode' active: " \
3840  "run anyway" % msg)
3841  else:
3842  del dataset_names_after_checks[dataset_name]
3843  self.logger.warning("%s " \
3844  "--> skipping" % msg)
3845  continue
3846 
3847  ###
3848 
3849  # Check that the harvesting type makes sense for the
3850  # sample. E.g. normally one would not run the DQMOffline
3851  # harvesting on Monte Carlo.
3852  # TODO TODO TODO
3853  # This should be further refined.
3854  suspicious = False
3855  datatype = self.datasets_information[dataset_name]["datatype"]
3856  if datatype == "data":
3857  # Normally only DQM harvesting is run on data.
3858  if self.harvesting_type != "DQMOffline":
3859  suspicious = True
3860  elif datatype == "mc":
3861  if self.harvesting_type == "DQMOffline":
3862  suspicious = True
3863  else:
3864  # Doh!
3865  assert False, "ERROR Impossible data type `%s' " \
3866  "for dataset `%s'" % \
3867  (datatype, dataset_name)
3868  if suspicious:
3869  msg = " Normally one does not run `%s' harvesting " \
3870  "on %s samples, are you sure?" % \
3871  (self.harvesting_type, datatype)
3872  if self.force_running:
3873  self.logger.warning("%s " \
3874  "--> `force mode' active: " \
3875  "run anyway" % msg)
3876  else:
3877  del dataset_names_after_checks[dataset_name]
3878  self.logger.warning("%s " \
3879  "--> skipping" % msg)
3880  continue
3881 
3882  # TODO TODO TODO end
3883 
3884  ###
3885 
3886  # BUG BUG BUG
3887  # For the moment, due to a problem with DBS, I cannot
3888  # figure out the GlobalTag for data by myself. (For MC
3889  # it's no problem.) This means that unless a GlobalTag was
3890  # specified from the command line, we will have to skip
3891  # any data datasets.
3892 
3893  if datatype == "data":
3894  if self.globaltag is None:
3895  msg = "For data datasets (like `%s') " \
3896  "we need a GlobalTag" % \
3897  dataset_name
3898  del dataset_names_after_checks[dataset_name]
3899  self.logger.warning("%s " \
3900  "--> skipping" % msg)
3901  continue
3902 
3903  # BUG BUG BUG end
3904 
3905  ###
3906 
3907  # Check if the GlobalTag exists and (if we're using
3908  # reference histograms) if it's ready to be used with
3909  # reference histograms.
3910  globaltag = self.datasets_information[dataset_name]["globaltag"]
3911  if not globaltag in self.globaltag_check_cache:
3912  if self.check_globaltag(globaltag):
3913  self.globaltag_check_cache.append(globaltag)
3914  else:
3915  msg = "Something is wrong with GlobalTag `%s' " \
3916  "used by dataset `%s'!" % \
3917  (globaltag, dataset_name)
3918  if self.use_ref_hists:
3919  msg += "\n(Either it does not exist or it " \
3920  "does not contain the required key to " \
3921  "be used with reference histograms.)"
3922  else:
3923  msg += "\n(It probably just does not exist.)"
3924  self.logger.fatal(msg)
3925  raise Usage(msg)
3926 
3927  ###
3928 
3929  # Require that each run is available at least somewhere.
3930  runs_without_sites = [i for (i, j) in \
3931  self.datasets_information[dataset_name] \
3932  ["sites"].items() \
3933  if len(j) < 1 and \
3934  i in self.datasets_to_use[dataset_name]]
3935  if len(runs_without_sites) > 0:
3936  for run_without_sites in runs_without_sites:
3937  try:
3938  dataset_names_after_checks[dataset_name].remove(run_without_sites)
3939  except KeyError:
3940  pass
3941  self.logger.warning(" removed %d unavailable run(s) " \
3942  "from dataset `%s'" % \
3943  (len(runs_without_sites), dataset_name))
3944  self.logger.debug(" (%s)" % \
3945  ", ".join([str(i) for i in \
3946  runs_without_sites]))
3947 
3948  ###
3949 
3950  # Unless we're running two-step harvesting: only allow
3951  # samples located on a single site.
3952  if not self.harvesting_mode == "two-step":
3953  for run_number in self.datasets_to_use[dataset_name]:
# DEBUG DEBUG DEBUG
def check_dataset_list
Helper class: Usage exception.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def cmsHarvester.check_dbs (   self)
Check if DBS is setup.

Definition at line 2356 of file cmsHarvester.py.

2357  def check_dbs(self):
2358  """Check if DBS is setup.
2359 
2360  """
2361 
2362  # Try to access the DBSCMD_HOME environment variable. If this
2363  # looks useful we consider DBS to be set up
2364  # properly. Otherwise we raise an error.
2365  dbs_home = os.getenv("DBSCMD_HOME")
2366  if dbs_home is None:
2367  self.logger.fatal("It seems DBS is not setup...")
2368  self.logger.fatal(" $DBSCMD_HOME is empty")
2369  raise Error("ERROR: DBS needs to be setup first!")
Helper class: Error exception.
def cmsHarvester.check_globaltag (   self,
  globaltag = None 
)
Check if globaltag exists.

Check if globaltag exists as GlobalTag in the database given
by self.frontier_connection_name['globaltag']. If globaltag is
None, self.globaltag is used instead.

If we're going to use reference histograms this method also
checks for the existence of the required key in the GlobalTag.

Definition at line 4503 of file cmsHarvester.py.

4504  def check_globaltag(self, globaltag=None):
4505  """Check if globaltag exists.
4506 
4507  Check if globaltag exists as GlobalTag in the database given
4508  by self.frontier_connection_name['globaltag']. If globaltag is
4509  None, self.globaltag is used instead.
4510 
4511  If we're going to use reference histograms this method also
4512  checks for the existence of the required key in the GlobalTag.
4513 
4514  """
4515 
4516  if globaltag is None:
4517  globaltag = self.globaltag
4518 
4519  # All GlobalTags should end in `::All', right?
4520  if globaltag.endswith("::All"):
4521  globaltag = globaltag[:-5]
4522 
4523  connect_name = self.frontier_connection_name["globaltag"]
4524  # BUG BUG BUG
4525  # There is a bug in cmscond_tagtree_list: some magic is
4526  # missing from the implementation requiring one to specify
4527  # explicitly the name of the squid to connect to. Since the
4528  # cmsHarvester can only be run from the CERN network anyway,
4529  # cmsfrontier:8000 is hard-coded in here. Not nice but it
4530  # works.
4531  connect_name = connect_name.replace("frontier://",
4532  "frontier://cmsfrontier:8000/")
4533  # BUG BUG BUG end
4534  connect_name += self.db_account_name_cms_cond_globaltag()
4535 
4536  tag_exists = self.check_globaltag_exists(globaltag, connect_name)
4537 
4538  #----------
4539 
4540  tag_contains_ref_hist_key = False
4541  if self.use_ref_hists and tag_exists:
4542  # Check for the key required to use reference histograms.
4543  tag_contains_ref_hist_key = self.check_globaltag_contains_ref_hist_key(globaltag, connect_name)
4544 
4545  #----------
4546 
4547  if self.use_ref_hists:
4548  ret_val = tag_exists and tag_contains_ref_hist_key
4549  else:
4550  ret_val = tag_exists
4551 
4552  #----------
4553 
4554  # End of check_globaltag.
4555  return ret_val
def cmsHarvester.check_globaltag_contains_ref_hist_key (   self,
  globaltag,
  connect_name 
)
Check if globaltag contains the required RefHistos key.

Definition at line 4600 of file cmsHarvester.py.

4601  def check_globaltag_contains_ref_hist_key(self, globaltag, connect_name):
4602  """Check if globaltag contains the required RefHistos key.
4603 
4604  """
4605 
4606  # Check for the key required to use reference histograms.
4607  tag_contains_key = None
4608  ref_hist_key = "RefHistos"
4609  self.logger.info("Checking existence of reference " \
4610  "histogram key `%s' in GlobalTag `%s'" % \
4611  (ref_hist_key, globaltag))
4612  self.logger.debug(" (Using database connection `%s')" % \
4613  connect_name)
4614  cmd = "cmscond_tagtree_list -c %s -T %s -n %s" % \
4615  (connect_name, globaltag, ref_hist_key)
4616  (status, output) = commands.getstatusoutput(cmd)
4617  if status != 0 or \
4618  output.find("error") > -1:
4619  msg = "Could not check existence of key `%s'" % \
4620  (ref_hist_key, connect_name)
4621  self.logger.fatal(msg)
4622  self.logger.debug("Command used:")
4623  self.logger.debug(" %s" % cmd)
4624  self.logger.debug("Output received:")
4625  self.logger.debug(" %s" % output)
4626  raise Error(msg)
4627  if len(output) < 1:
4628  self.logger.debug("Required key for use of reference " \
4629  "histograms `%s' does not exist " \
4630  "in GlobalTag `%s':" % \
4631  (ref_hist_key, globaltag))
4632  self.logger.debug("Output received:")
4633  self.logger.debug(output)
4634  tag_contains_key = False
4635  else:
4636  tag_contains_key = True
4637 
4638  self.logger.info(" GlobalTag contains `%s' key? -> %s" % \
4639  (ref_hist_key, tag_contains_key))
4640 
4641  # End of check_globaltag_contains_ref_hist_key.
4642  return tag_contains_key
Helper class: Error exception.
def check_globaltag_contains_ref_hist_key
def cmsHarvester.check_globaltag_exists (   self,
  globaltag,
  connect_name 
)
Check if globaltag exists.

Definition at line 4558 of file cmsHarvester.py.

References split.

4559  def check_globaltag_exists(self, globaltag, connect_name):
4560  """Check if globaltag exists.
4561 
4562  """
4563 
4564  self.logger.info("Checking existence of GlobalTag `%s'" % \
4565  globaltag)
4566  self.logger.debug(" (Using database connection `%s')" % \
4567  connect_name)
4568 
4569  cmd = "cmscond_tagtree_list -c %s -T %s" % \
4570  (connect_name, globaltag)
4571  (status, output) = commands.getstatusoutput(cmd)
4572  if status != 0 or \
4573  output.find("error") > -1:
4574  msg = "Could not check existence of GlobalTag `%s' in `%s'" % \
4575  (globaltag, connect_name)
4576  if output.find(".ALL_TABLES not found") > -1:
4577  msg = "%s\n" \
4578  "Missing database account `%s'" % \
4579  (msg, output.split(".ALL_TABLES")[0].split()[-1])
4580  self.logger.fatal(msg)
4581  self.logger.debug("Command used:")
4582  self.logger.debug(" %s" % cmd)
4583  self.logger.debug("Output received:")
4584  self.logger.debug(output)
4585  raise Error(msg)
4586  if output.find("does not exist") > -1:
4587  self.logger.debug("GlobalTag `%s' does not exist in `%s':" % \
4588  (globaltag, connect_name))
4589  self.logger.debug("Output received:")
4590  self.logger.debug(output)
4591  tag_exists = False
4592  else:
4593  tag_exists = True
4594  self.logger.info(" GlobalTag exists? -> %s" % tag_exists)
4595 
4596  # End of check_globaltag_exists.
4597  return tag_exists
def check_globaltag_exists
Helper class: Error exception.
double split
Definition: MVATrainer.cc:139
def cmsHarvester.check_input_status (   self)
Check completeness and correctness of input information.

Check that all required information has been specified and
that, at least as far as can be easily checked, it makes
sense.

NOTE: This is also where any default values are applied.

Definition at line 2192 of file cmsHarvester.py.

References join().

2193  def check_input_status(self):
2194  """Check completeness and correctness of input information.
2195 
2196  Check that all required information has been specified and
2197  that, at least as far as can be easily checked, it makes
2198  sense.
2199 
2200  NOTE: This is also where any default values are applied.
2201 
2202  """
2203 
2204  self.logger.info("Checking completeness/correctness of input...")
2205 
2206  # The cmsHarvester does not take (i.e. understand) any
2207  # arguments so there should not be any.
2208  if len(self.args) > 0:
2209  msg = "Sorry but I don't understand `%s'" % \
2210  (" ".join(self.args))
2211  self.logger.fatal(msg)
2212  raise Usage(msg)
2213 
2214  # BUG BUG BUG
2215  # While we wait for some bugs left and right to get fixed, we
2216  # disable two-step.
2217  if self.harvesting_mode == "two-step":
2218  msg = "--------------------\n" \
2219  " Sorry, but for the moment (well, till it works)" \
2220  " the two-step mode has been disabled.\n" \
2221  "--------------------\n"
2222  self.logger.fatal(msg)
2223  raise Error(msg)
2224  # BUG BUG BUG end
2225 
2226  # We need a harvesting method to be specified
2227  if self.harvesting_type is None:
2228  msg = "Please specify a harvesting type"
2229  self.logger.fatal(msg)
2230  raise Usage(msg)
2231  # as well as a harvesting mode.
2232  if self.harvesting_mode is None:
2233  self.harvesting_mode = self.harvesting_mode_default
2234  msg = "No harvesting mode specified --> using default `%s'" % \
2235  self.harvesting_mode
2236  self.logger.warning(msg)
2237  #raise Usage(msg)
2238 
2239  ###
2240 
2241  # We need an input method so we can find the dataset name(s).
2242  if self.input_method["datasets"]["use"] is None:
2243  msg = "Please specify an input dataset name " \
2244  "or a list file name"
2245  self.logger.fatal(msg)
2246  raise Usage(msg)
2247 
2248  # DEBUG DEBUG DEBUG
2249  # If we get here, we should also have an input name.
2250  assert not self.input_name["datasets"]["use"] is None
2251  # DEBUG DEBUG DEBUG end
2252 
2253  ###
2254 
2255  # The same holds for the reference histogram mapping file (if
2256  # we're using references).
2257  if self.use_ref_hists:
2258  if self.ref_hist_mappings_file_name is None:
2259  self.ref_hist_mappings_file_name = self.ref_hist_mappings_file_name_default
2260  msg = "No reference histogram mapping file specified --> " \
2261  "using default `%s'" % \
2262  self.ref_hist_mappings_file_name
2263  self.logger.warning(msg)
2264 
2265  ###
2266 
2267  # We need to know where to put the stuff (okay, the results)
2268  # on CASTOR.
2269  if self.castor_base_dir is None:
2270  self.castor_base_dir = self.castor_base_dir_default
2271  msg = "No CASTOR area specified -> using default `%s'" % \
2272  self.castor_base_dir
2273  self.logger.warning(msg)
2274  #raise Usage(msg)
2275 
2276  # Only the CERN CASTOR area is supported.
2277  if not self.castor_base_dir.startswith(self.castor_prefix):
2278  msg = "CASTOR area does not start with `%s'" % \
2279  self.castor_prefix
2280  self.logger.fatal(msg)
2281  if self.castor_base_dir.find("castor") > -1 and \
2282  not self.castor_base_dir.find("cern.ch") > -1:
2283  self.logger.fatal("Only CERN CASTOR is supported")
2284  raise Usage(msg)
2285 
2286  ###
2287 
2288  # TODO TODO TODO
2289  # This should be removed in the future, once I find out how to
2290  # get the config file used to create a given dataset from DBS.
2291 
2292  # For data we need to have a GlobalTag. (For MC we can figure
2293  # it out by ourselves.)
2294  if self.globaltag is None:
2295  self.logger.warning("No GlobalTag specified. This means I cannot")
2296  self.logger.warning("run on data, only on MC.")
2297  self.logger.warning("I will skip all data datasets.")
2298 
2299  # TODO TODO TODO end
2300 
2301  # Make sure the GlobalTag ends with `::All'.
2302  if not self.globaltag is None:
2303  if not self.globaltag.endswith("::All"):
2304  self.logger.warning("Specified GlobalTag `%s' does " \
2305  "not end in `::All' --> " \
2306  "appending this missing piece" % \
2307  self.globaltag)
2308  self.globaltag = "%s::All" % self.globaltag
2309 
2310  ###
2311 
2312  # Dump some info about the Frontier connections used.
2313  for (key, value) in self.frontier_connection_name.iteritems():
2314  frontier_type_str = "unknown"
2315  if key == "globaltag":
2316  frontier_type_str = "the GlobalTag"
2317  elif key == "refhists":
2318  frontier_type_str = "the reference histograms"
2319  non_str = None
2320  if self.frontier_connection_overridden[key] == True:
2321  non_str = "non-"
2322  else:
2323  non_str = ""
2324  self.logger.info("Using %sdefault Frontier " \
2325  "connection for %s: `%s'" % \
2326  (non_str, frontier_type_str, value))
2327 
2328  ###
2329 
2330  # End of check_input_status.
def check_input_status
Helper class: Error exception.
Helper class: Usage exception.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def cmsHarvester.check_ref_hist_mappings (   self)
Make sure all necessary reference histograms exist.

Check that for each of the datasets to be processed a
reference histogram is specified and that that histogram
exists in the database.

NOTE: There's a little complication here. Since this whole
thing was designed to allow (in principle) harvesting of both
data and MC datasets in one go, we need to be careful to check
the availability fof reference mappings only for those
datasets that need it.

Definition at line 5283 of file cmsHarvester.py.

5284  def check_ref_hist_mappings(self):
5285  """Make sure all necessary reference histograms exist.
5286 
5287  Check that for each of the datasets to be processed a
5288  reference histogram is specified and that that histogram
5289  exists in the database.
5290 
5291  NOTE: There's a little complication here. Since this whole
5292  thing was designed to allow (in principle) harvesting of both
5293  data and MC datasets in one go, we need to be careful to check
5294  the availability fof reference mappings only for those
5295  datasets that need it.
5296 
5297  """
5298 
5299  self.logger.info("Checking reference histogram mappings")
5300 
5301  for dataset_name in self.datasets_to_use:
5302  try:
5303  ref_hist_name = self.ref_hist_mappings[dataset_name]
5304  except KeyError:
5305  msg = "ERROR: No reference histogram mapping found " \
5306  "for dataset `%s'" % \
5307  dataset_name
5308  self.logger.fatal(msg)
5309  raise Error(msg)
5310 
5311  if not self.check_ref_hist_tag(ref_hist_name):
5312  msg = "Reference histogram tag `%s' " \
5313  "(used for dataset `%s') does not exist!" % \
5314  (ref_hist_name, dataset_name)
5315  self.logger.fatal(msg)
5316  raise Usage(msg)
5317 
5318  self.logger.info(" Done checking reference histogram mappings.")
5319 
5320  # End of check_ref_hist_mappings.
Helper class: Error exception.
Helper class: Usage exception.
def check_ref_hist_mappings
def cmsHarvester.check_ref_hist_tag (   self,
  tag_name 
)
Check the existence of tag_name in database connect_name.

Check if tag_name exists as a reference histogram tag in the
database given by self.frontier_connection_name['refhists'].

Definition at line 4645 of file cmsHarvester.py.

References join().

4646  def check_ref_hist_tag(self, tag_name):
4647  """Check the existence of tag_name in database connect_name.
4648 
4649  Check if tag_name exists as a reference histogram tag in the
4650  database given by self.frontier_connection_name['refhists'].
4651 
4652  """
4653 
4654  connect_name = self.frontier_connection_name["refhists"]
4655  connect_name += self.db_account_name_cms_cond_dqm_summary()
4656 
4657  self.logger.debug("Checking existence of reference " \
4658  "histogram tag `%s'" % \
4659  tag_name)
4660  self.logger.debug(" (Using database connection `%s')" % \
4661  connect_name)
4662 
4663  cmd = "cmscond_list_iov -c %s" % \
4664  connect_name
4665  (status, output) = commands.getstatusoutput(cmd)
4666  if status != 0:
4667  msg = "Could not check existence of tag `%s' in `%s'" % \
4668  (tag_name, connect_name)
4669  self.logger.fatal(msg)
4670  self.logger.debug("Command used:")
4671  self.logger.debug(" %s" % cmd)
4672  self.logger.debug("Output received:")
4673  self.logger.debug(output)
4674  raise Error(msg)
4675  if not tag_name in output.split():
4676  self.logger.debug("Reference histogram tag `%s' " \
4677  "does not exist in `%s'" % \
4678  (tag_name, connect_name))
4679  self.logger.debug(" Existing tags: `%s'" % \
4680  "', `".join(output.split()))
4681  tag_exists = False
4682  else:
4683  tag_exists = True
4684  self.logger.debug(" Reference histogram tag exists? " \
4685  "-> %s" % tag_exists)
4686 
4687  # End of check_ref_hist_tag.
4688  return tag_exists
Helper class: Error exception.
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def check_ref_hist_tag
def cmsHarvester.create_and_check_castor_dir (   self,
  castor_dir 
)
Check existence of the give CASTOR dir, if necessary create
it.

Some special care has to be taken with several things like
setting the correct permissions such that CRAB can store the
output results. Of course this means that things like
/castor/cern.ch/ and user/j/ have to be recognised and treated
properly.

NOTE: Only CERN CASTOR area (/castor/cern.ch/) supported for
the moment.

NOTE: This method uses some slightly tricky caching to make
sure we don't keep over and over checking the same base paths.

Definition at line 1489 of file cmsHarvester.py.

References spr.find().

1490  def create_and_check_castor_dir(self, castor_dir):
1491  """Check existence of the give CASTOR dir, if necessary create
1492  it.
1493 
1494  Some special care has to be taken with several things like
1495  setting the correct permissions such that CRAB can store the
1496  output results. Of course this means that things like
1497  /castor/cern.ch/ and user/j/ have to be recognised and treated
1498  properly.
1499 
1500  NOTE: Only CERN CASTOR area (/castor/cern.ch/) supported for
1501  the moment.
1502 
1503  NOTE: This method uses some slightly tricky caching to make
1504  sure we don't keep over and over checking the same base paths.
1505 
1506  """
1507 
1508  ###
1509 
1510  # Local helper function to fully split a path into pieces.
1511  def split_completely(path):
1512  (parent_path, name) = os.path.split(path)
1513  if name == "":
1514  return (parent_path, )
1515  else:
1516  return split_completely(parent_path) + (name, )
1517 
1518  ###
1519 
1520  # Local helper function to check rfio (i.e. CASTOR)
1521  # directories.
1522  def extract_permissions(rfstat_output):
1523  """Parse the output from rfstat and return the
1524  5-digit permissions string."""
1525 
1526  permissions_line = [i for i in output.split("\n") \
1527  if i.lower().find("protection") > -1]
1528  regexp = re.compile(".*\(([0123456789]{5})\).*")
1529  match = regexp.search(rfstat_output)
1530  if not match or len(match.groups()) != 1:
1531  msg = "Could not extract permissions " \
1532  "from output: %s" % rfstat_output
1533  self.logger.fatal(msg)
1534  raise Error(msg)
1535  permissions = match.group(1)
1536 
1537  # End of extract_permissions.
1538  return permissions
1539 
1540  ###
1541 
1542  # These are the pieces of CASTOR directories that we do not
1543  # want to touch when modifying permissions.
1544 
1545  # NOTE: This is all a bit involved, basically driven by the
1546  # fact that one wants to treat the `j' directory of
1547  # `/castor/cern.ch/user/j/jhegeman/' specially.
1548  # BUG BUG BUG
1549  # This should be simplified, for example by comparing to the
1550  # CASTOR prefix or something like that.
1551  # BUG BUG BUG end
1552  castor_paths_dont_touch = {
1553  0: ["/", "castor", "cern.ch", "cms", "store", "temp",
1554  "dqm", "offline", "user"],
1555  -1: ["user", "store"]
1556  }
1557 
1558  self.logger.debug("Checking CASTOR path `%s'" % castor_dir)
1559 
1560  ###
1561 
1562  # First we take the full CASTOR path apart.
1563  castor_path_pieces = split_completely(castor_dir)
1564 
1565  # Now slowly rebuild the CASTOR path and see if a) all
1566  # permissions are set correctly and b) the final destination
1567  # exists.
1568  path = ""
1569  check_sizes = castor_paths_dont_touch.keys()
1570  check_sizes.sort()
1571  len_castor_path_pieces = len(castor_path_pieces)
1572  for piece_index in xrange (len_castor_path_pieces):
1573  skip_this_path_piece = False
piece = castor_path_pieces[piece_index]
void find(edm::Handle< EcalRecHitCollection > &hits, DetId thisDet, std::vector< EcalRecHitCollection::const_iterator > &hit, bool debug=false)
Definition: FindCaloHit.cc:7
Helper class: Error exception.
def create_and_check_castor_dir
def cmsHarvester.create_and_check_castor_dirs (   self)
Make sure all required CASTOR output dirs exist.

This checks the CASTOR base dir specified by the user as well
as all the subdirs required by the current set of jobs.

Definition at line 1429 of file cmsHarvester.py.

References list(), and bookConverter.max.

1430  def create_and_check_castor_dirs(self):
1431  """Make sure all required CASTOR output dirs exist.
1432 
1433  This checks the CASTOR base dir specified by the user as well
1434  as all the subdirs required by the current set of jobs.
1435 
1436  """
1437 
1438  self.logger.info("Checking (and if necessary creating) CASTOR " \
1439  "output area(s)...")
1440 
1441  # Call the real checker method for the base dir.
1442  self.create_and_check_castor_dir(self.castor_base_dir)
1443 
1444  # Now call the checker for all (unique) subdirs.
1445  castor_dirs = []
1446  for (dataset_name, runs) in self.datasets_to_use.iteritems():
1447 
1448  for run in runs:
1449  castor_dirs.append(self.datasets_information[dataset_name] \
1450  ["castor_path"][run])
1451  castor_dirs_unique = list(set(castor_dirs))
1452  castor_dirs_unique.sort()
1453  # This can take some time. E.g. CRAFT08 has > 300 runs, each
1454  # of which will get a new directory. So we show some (rough)
1455  # info in between.
1456  ndirs = len(castor_dirs_unique)
1457  step = max(ndirs / 10, 1)
1458  for (i, castor_dir) in enumerate(castor_dirs_unique):
1459  if (i + 1) % step == 0 or \
1460  (i + 1) == ndirs:
1461  self.logger.info(" %d/%d" % \
1462  (i + 1, ndirs))
1463  self.create_and_check_castor_dir(castor_dir)
1464 
1465  # Now check if the directory is empty. If (an old version
1466  # of) the output file already exists CRAB will run new
1467  # jobs but never copy the results back. We assume the user
1468  # knows what they are doing and only issue a warning in
1469  # case the directory is not empty.
1470  self.logger.debug("Checking if path `%s' is empty" % \
1471  castor_dir)
1472  cmd = "rfdir %s" % castor_dir
1473  (status, output) = commands.getstatusoutput(cmd)
1474  if status != 0:
1475  msg = "Could not access directory `%s'" \
1476  " !!! This is bad since I should have just" \
1477  " created it !!!" % castor_dir
1478  self.logger.fatal(msg)
1479  raise Error(msg)
1480  if len(output) > 0:
1481  self.logger.warning("Output directory `%s' is not empty:" \
1482  " new jobs will fail to" \
1483  " copy back output" % \
1484  castor_dir)
1485 
1486  # End of create_and_check_castor_dirs.
def create_and_check_castor_dirs
Helper class: Error exception.
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run
def cmsHarvester.create_castor_path_name_common (   self,
  dataset_name 
)
Build the common part of the output path to be used on
CASTOR.

This consists of the CASTOR area base path specified by the
user and a piece depending on the data type (data vs. MC), the
harvesting type and the dataset name followed by a piece
containing the run number and event count. (See comments in
create_castor_path_name_special for details.) This method
creates the common part, without run number and event count.

Definition at line 1325 of file cmsHarvester.py.

References create_castor_path_name_special(), and python.rootplot.root2matplotlib.replace().

1326  def create_castor_path_name_common(self, dataset_name):
1327  """Build the common part of the output path to be used on
1328  CASTOR.
1329 
1330  This consists of the CASTOR area base path specified by the
1331  user and a piece depending on the data type (data vs. MC), the
1332  harvesting type and the dataset name followed by a piece
1333  containing the run number and event count. (See comments in
1334  create_castor_path_name_special for details.) This method
1335  creates the common part, without run number and event count.
1336 
1337  """
1338 
1339  castor_path = self.castor_base_dir
1340 
1341  ###
1342 
1343  # The data type: data vs. mc.
1344  datatype = self.datasets_information[dataset_name]["datatype"]
1345  datatype = datatype.lower()
1346  castor_path = os.path.join(castor_path, datatype)
1347 
1348  # The harvesting type.
1349  harvesting_type = self.harvesting_type
1350  harvesting_type = harvesting_type.lower()
1351  castor_path = os.path.join(castor_path, harvesting_type)
1352 
1353  # The CMSSW release version (only the `digits'). Note that the
1354  # CMSSW version used here is the version used for harvesting,
1355  # not the one from the dataset. This does make the results
1356  # slightly harder to find. On the other hand it solves
1357  # problems in case one re-harvests a given dataset with a
1358  # different CMSSW version, which would lead to ambiguous path
1359  # names. (Of course for many cases the harvesting is done with
1360  # the same CMSSW version the dataset was created with.)
1361  release_version = self.cmssw_version
1362  release_version = release_version.lower(). \
1363  replace("cmssw", ""). \
1364  strip("_")
1365  castor_path = os.path.join(castor_path, release_version)
1366 
1367  # The dataset name.
1368  dataset_name_escaped = self.escape_dataset_name(dataset_name)
1369  castor_path = os.path.join(castor_path, dataset_name_escaped)
1370 
1371  ###
1372 
1373  castor_path = os.path.normpath(castor_path)
1374 
1375  # End of create_castor_path_name_common.
1376  return castor_path
def create_castor_path_name_common
def cmsHarvester.create_castor_path_name_special (   self,
  dataset_name,
  run_number,
  castor_path_common 
)
Create the specialised part of the CASTOR output dir name.

NOTE: To avoid clashes with `incremental harvesting'
(re-harvesting when a dataset grows) we have to include the
event count in the path name. The underlying `problem' is that
CRAB does not overwrite existing output files so if the output
file already exists CRAB will fail to copy back the output.

NOTE: It's not possible to create different kinds of
harvesting jobs in a single call to this tool. However, in
principle it could be possible to create both data and MC jobs
in a single go.

NOTE: The number of events used in the path name is the
_total_ number of events in the dataset/run at the time of
harvesting. If we're doing partial harvesting the final
results will reflect lower statistics. This is a) the easiest
to code and b) the least likely to lead to confusion if
someone ever decides to swap/copy around file blocks between
sites.

Definition at line 1381 of file cmsHarvester.py.

Referenced by create_castor_path_name_common().

1382  castor_path_common):
1383  """Create the specialised part of the CASTOR output dir name.
1384 
1385  NOTE: To avoid clashes with `incremental harvesting'
1386  (re-harvesting when a dataset grows) we have to include the
1387  event count in the path name. The underlying `problem' is that
1388  CRAB does not overwrite existing output files so if the output
1389  file already exists CRAB will fail to copy back the output.
1390 
1391  NOTE: It's not possible to create different kinds of
1392  harvesting jobs in a single call to this tool. However, in
1393  principle it could be possible to create both data and MC jobs
1394  in a single go.
1395 
1396  NOTE: The number of events used in the path name is the
1397  _total_ number of events in the dataset/run at the time of
1398  harvesting. If we're doing partial harvesting the final
1399  results will reflect lower statistics. This is a) the easiest
1400  to code and b) the least likely to lead to confusion if
1401  someone ever decides to swap/copy around file blocks between
1402  sites.
1403 
1404  """
1405 
1406  castor_path = castor_path_common
1407 
1408  ###
1409 
1410  # The run number part.
1411  castor_path = os.path.join(castor_path, "run_%d" % run_number)
1412 
1413  ###
1414 
1415  # The event count (i.e. the number of events we currently see
1416  # for this dataset).
1417  #nevents = self.datasets_information[dataset_name] \
1418  # ["num_events"][run_number]
1419  castor_path = os.path.join(castor_path, "nevents")
1420 
1421  ###
1422 
1423  castor_path = os.path.normpath(castor_path)
1424 
1425  # End of create_castor_path_name_special.
1426  return castor_path
def cmsHarvester.create_config_file_name (   self,
  dataset_name,
  run_number 
)
Generate the name of the configuration file to be run by
CRAB.

Depending on the harvesting mode (single-step or two-step)
this is the name of the real harvesting configuration or the
name of the first-step ME summary extraction configuration.

Definition at line 4066 of file cmsHarvester.py.

4067  def create_config_file_name(self, dataset_name, run_number):
4068  """Generate the name of the configuration file to be run by
4069  CRAB.
4070 
4071  Depending on the harvesting mode (single-step or two-step)
4072  this is the name of the real harvesting configuration or the
4073  name of the first-step ME summary extraction configuration.
4074 
4075  """
4076 
4077  if self.harvesting_mode == "single-step":
4078  config_file_name = self.create_harvesting_config_file_name(dataset_name)
4079  elif self.harvesting_mode == "single-step-allow-partial":
config_file_name = self.create_harvesting_config_file_name(dataset_name)
def create_config_file_name
def cmsHarvester.create_crab_config (   self)
Create a CRAB configuration for a given job.

NOTE: This is _not_ a complete (as in: submittable) CRAB
configuration. It is used to store the common settings for the
multicrab configuration.

NOTE: Only CERN CASTOR area (/castor/cern.ch/) is supported.

NOTE: According to CRAB, you `Must define exactly two of
total_number_of_events, events_per_job, or
number_of_jobs.'. For single-step harvesting we force one job,
for the rest we don't really care.

# BUG BUG BUG
# With the current version of CRAB (2.6.1), in which Daniele
# fixed the behaviour of no_block_boundary for me, one _has to
# specify_ the total_number_of_events and one single site in
# the se_white_list.
# BUG BUG BUG end

Definition at line 4234 of file cmsHarvester.py.

4235  def create_crab_config(self):
4236  """Create a CRAB configuration for a given job.
4237 
4238  NOTE: This is _not_ a complete (as in: submittable) CRAB
4239  configuration. It is used to store the common settings for the
4240  multicrab configuration.
4241 
4242  NOTE: Only CERN CASTOR area (/castor/cern.ch/) is supported.
4243 
4244  NOTE: According to CRAB, you `Must define exactly two of
4245  total_number_of_events, events_per_job, or
4246  number_of_jobs.'. For single-step harvesting we force one job,
4247  for the rest we don't really care.
4248 
4249  # BUG BUG BUG
4250  # With the current version of CRAB (2.6.1), in which Daniele
4251  # fixed the behaviour of no_block_boundary for me, one _has to
4252  # specify_ the total_number_of_events and one single site in
4253  # the se_white_list.
4254  # BUG BUG BUG end
4255 
4256  """
4257 
4258  tmp = []
4259 
4260  # This is the stuff we will need to fill in.
4261  castor_prefix = self.castor_prefix
4262 
4263  tmp.append(self.config_file_header())
4264  tmp.append("")
def create_crab_config
def cmsHarvester.create_es_prefer_snippet (   self,
  dataset_name 
)
Build the es_prefer snippet for the reference histograms.

The building of the snippet is wrapped in some care-taking
code that figures out the name of the reference histogram set
and makes sure the corresponding tag exists.

Definition at line 4691 of file cmsHarvester.py.

References join().

4692  def create_es_prefer_snippet(self, dataset_name):
4693  """Build the es_prefer snippet for the reference histograms.
4694 
4695  The building of the snippet is wrapped in some care-taking
4696  code that figures out the name of the reference histogram set
4697  and makes sure the corresponding tag exists.
4698 
4699  """
4700 
4701  # Figure out the name of the reference histograms tag.
4702  # NOTE: The existence of these tags has already been checked.
4703  ref_hist_tag_name = self.ref_hist_mappings[dataset_name]
4704 
4705  connect_name = self.frontier_connection_name["refhists"]
4706  connect_name += self.db_account_name_cms_cond_dqm_summary()
4707  record_name = "DQMReferenceHistogramRootFileRcd"
4708 
4709  # Build up the code snippet.
4710  code_lines = []
4711  code_lines.append("from CondCore.DBCommon.CondDBSetup_cfi import *")
4712  code_lines.append("process.ref_hist_source = cms.ESSource(\"PoolDBESSource\", CondDBSetup,")
4713  code_lines.append(" connect = cms.string(\"%s\")," % connect_name)
4714  code_lines.append(" toGet = cms.VPSet(cms.PSet(record = cms.string(\"%s\")," % record_name)
4715  code_lines.append(" tag = cms.string(\"%s\"))," % ref_hist_tag_name)
4716  code_lines.append(" )")
4717  code_lines.append(" )")
4718  code_lines.append("process.es_prefer_ref_hist_source = cms.ESPrefer(\"PoolDBESSource\", \"ref_hist_source\")")
4719 
4720  snippet = "\n".join(code_lines)
4721 
4722  # End of create_es_prefer_snippet.
4723  return snippet
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def create_es_prefer_snippet
def cmsHarvester.create_harvesting_config (   self,
  dataset_name 
)
Create the Python harvesting configuration for harvesting.

The basic configuration is created by
Configuration.PyReleaseValidation.ConfigBuilder. (This mimics
what cmsDriver.py does.) After that we add some specials
ourselves.

NOTE: On one hand it may not be nice to circumvent
cmsDriver.py, on the other hand cmsDriver.py does not really
do anything itself. All the real work is done by the
ConfigBuilder so there is not much risk that we miss out on
essential developments of cmsDriver in the future.

Definition at line 4726 of file cmsHarvester.py.

4727  def create_harvesting_config(self, dataset_name):
4728  """Create the Python harvesting configuration for harvesting.
4729 
4730  The basic configuration is created by
4731  Configuration.PyReleaseValidation.ConfigBuilder. (This mimics
4732  what cmsDriver.py does.) After that we add some specials
4733  ourselves.
4734 
4735  NOTE: On one hand it may not be nice to circumvent
4736  cmsDriver.py, on the other hand cmsDriver.py does not really
4737  do anything itself. All the real work is done by the
4738  ConfigBuilder so there is not much risk that we miss out on
4739  essential developments of cmsDriver in the future.
4740 
4741  """
4742 
4743  # Setup some options needed by the ConfigBuilder.
4744  config_options = defaultOptions
4745 
4746  # These are fixed for all kinds of harvesting jobs. Some of
4747  # them are not needed for the harvesting config, but to keep
4748  # the ConfigBuilder happy.
4749  config_options.name = "harvesting"
4750  config_options.scenario = "pp"
4751  config_options.number = 1
4752  config_options.arguments = self.ident_string()
4753  config_options.evt_type = config_options.name
4754  config_options.customisation_file = None
4755  config_options.filein = "dummy_value"
4756  config_options.filetype = "EDM"
4757  # This seems to be new in CMSSW 3.3.X, no clue what it does.
4758  config_options.gflash = "dummy_value"
4759  # This seems to be new in CMSSW 3.3.0.pre6, no clue what it
4760  # does.
#config_options.himix = "dummy_value"
def create_harvesting_config
def cmsHarvester.create_harvesting_config_file_name (   self,
  dataset_name 
)

Definition at line 4098 of file cmsHarvester.py.

Referenced by write_harvesting_config().

4099  def create_harvesting_config_file_name(self, dataset_name):
4100  "Generate the name to be used for the harvesting config file."
4101 
4102  file_name_base = "harvesting.py"
4103  dataset_name_escaped = self.escape_dataset_name(dataset_name)
4104  config_file_name = file_name_base.replace(".py",
4105  "_%s.py" % \
4106  dataset_name_escaped)
4107 
4108  # End of create_harvesting_config_file_name.
4109  return config_file_name
def create_harvesting_config_file_name
def cmsHarvester.create_harvesting_output_file_name (   self,
  dataset_name,
  run_number 
)
Generate the name to be used for the harvesting output file.

This harvesting output file is the _final_ ROOT output file
containing the harvesting results. In case of two-step
harvesting there is an intermediate ME output file as well.

Definition at line 4170 of file cmsHarvester.py.

4171  def create_harvesting_output_file_name(self, dataset_name, run_number):
4172  """Generate the name to be used for the harvesting output file.
4173 
4174  This harvesting output file is the _final_ ROOT output file
4175  containing the harvesting results. In case of two-step
4176  harvesting there is an intermediate ME output file as well.
4177 
4178  """
4179 
4180  dataset_name_escaped = self.escape_dataset_name(dataset_name)
4181 
4182  # Hmmm, looking at the code for the DQMFileSaver this might
4183  # actually be the place where the first part of this file
4184  # naming scheme comes from.
4185  # NOTE: It looks like the `V0001' comes from the DQM
4186  # version. This is something that cannot be looked up from
4187  # here, so let's hope it does not change too often.
4188  output_file_name = "DQM_V0001_R%09d__%s.root" % \
4189  (run_number, dataset_name_escaped)
4190  if self.harvesting_mode.find("partial") > -1:
4191  # Only add the alarming piece to the file name if this is
4192  # a spread-out dataset.
4193  if self.datasets_information[dataset_name] \
4194  ["mirrored"][run_number] == False:
4195  output_file_name = output_file_name.replace(".root", \
4196  "_partial.root")
4197 
4198  # End of create_harvesting_output_file_name.
4199  return output_file_name
def create_harvesting_output_file_name
def cmsHarvester.create_me_extraction_config (   self,
  dataset_name 
)

def create_harvesting_config_two_step(self, dataset_name):
    """Create the Python harvesting configuration for two-step
    harvesting.         """         # BUG BUG BUG
    config_contents = self.create_harvesting_config_single_step(dataset_name)

BUG BUG BUG end

End of create_harvesting_config_two_step.

return config_contents

 

Definition at line 4952 of file cmsHarvester.py.

References create_output_file_name(), and join().

4953  def create_me_extraction_config(self, dataset_name):
4954  """
4955 
4956  """
4957 
4958  # Big chunk of hard-coded Python. Not such a big deal since
4959  # this does not do much and is not likely to break.
4960  tmp = []
4961  tmp.append(self.config_file_header())
4962  tmp.append("")
4963  tmp.append("import FWCore.ParameterSet.Config as cms")
4964  tmp.append("")
4965  tmp.append("process = cms.Process(\"ME2EDM\")")
4966  tmp.append("")
4967  tmp.append("# Import of standard configurations")
4968  tmp.append("process.load(\"Configuration/EventContent/EventContent_cff\")")
4969  tmp.append("")
4970  tmp.append("# We don't really process any events, just keep this set to one to")
4971  tmp.append("# make sure things work.")
4972  tmp.append("process.maxEvents = cms.untracked.PSet(")
4973  tmp.append(" input = cms.untracked.int32(1)")
4974  tmp.append(" )")
4975  tmp.append("")
4976  tmp.append("process.options = cms.untracked.PSet(")
4977  tmp.append(" Rethrow = cms.untracked.vstring(\"ProductNotFound\")")
4978  tmp.append(" )")
4979  tmp.append("")
4980  tmp.append("process.source = cms.Source(\"PoolSource\",")
4981  tmp.append(" processingMode = \\")
4982  tmp.append(" cms.untracked.string(\"RunsAndLumis\"),")
4983  tmp.append(" fileNames = \\")
4984  tmp.append(" cms.untracked.vstring(\"no_file_specified\")")
4985  tmp.append(" )")
4986  tmp.append("")
4987  tmp.append("# Output definition: drop everything except for the monitoring.")
4988  tmp.append("process.output = cms.OutputModule(")
4989  tmp.append(" \"PoolOutputModule\",")
4990  tmp.append(" outputCommands = \\")
4991  tmp.append(" cms.untracked.vstring(\"drop *\", \\")
4992  tmp.append(" \"keep *_MEtoEDMConverter_*_*\"),")
4993  output_file_name = self. \
4994  create_output_file_name(dataset_name)
4995  tmp.append(" fileName = \\")
4996  tmp.append(" cms.untracked.string(\"%s\")," % output_file_name)
4997  tmp.append(" dataset = cms.untracked.PSet(")
4998  tmp.append(" dataTier = cms.untracked.string(\"RECO\"),")
4999  tmp.append(" filterName = cms.untracked.string(\"\")")
5000  tmp.append(" )")
5001  tmp.append(" )")
5002  tmp.append("")
5003  tmp.append("# Additional output definition")
5004  tmp.append("process.out_step = cms.EndPath(process.output)")
5005  tmp.append("")
5006  tmp.append("# Schedule definition")
5007  tmp.append("process.schedule = cms.Schedule(process.out_step)")
5008  tmp.append("")
5009 
5010  config_contents = "\n".join(tmp)
5011 
5012  # End of create_me_extraction_config.
5013  return config_contents
def create_me_extraction_config
def create_output_file_name
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def cmsHarvester.create_me_summary_config_file_name (   self,
  dataset_name 
)

Definition at line 4112 of file cmsHarvester.py.

Referenced by write_me_extraction_config().

4113  def create_me_summary_config_file_name(self, dataset_name):
4114  "Generate the name of the ME summary extraction config file."
4115 
4116  file_name_base = "me_extraction.py"
4117  dataset_name_escaped = self.escape_dataset_name(dataset_name)
4118  config_file_name = file_name_base.replace(".py",
4119  "_%s.py" % \
4120  dataset_name_escaped)
4121 
4122  # End of create_me_summary_config_file_name.
4123  return config_file_name
def create_me_summary_config_file_name
def cmsHarvester.create_me_summary_output_file_name (   self,
  dataset_name 
)
Generate the name of the intermediate ME file name to be
used in two-step harvesting.

Definition at line 4202 of file cmsHarvester.py.

4203  def create_me_summary_output_file_name(self, dataset_name):
4204  """Generate the name of the intermediate ME file name to be
4205  used in two-step harvesting.
4206 
4207  """
4208 
4209  dataset_name_escaped = self.escape_dataset_name(dataset_name)
4210  output_file_name = "me_summary_%s.root" % \
4211  dataset_name_escaped
4212 
4213  # End of create_me_summary_output_file_name.
4214  return output_file_name
def create_me_summary_output_file_name
def cmsHarvester.create_multicrab_block_name (   self,
  dataset_name,
  run_number,
  index 
)
Create the block name to use for this dataset/run number.

This is what appears in the brackets `[]' in multicrab.cfg. It
is used as the name of the job and to create output
directories.

Definition at line 4217 of file cmsHarvester.py.

4218  def create_multicrab_block_name(self, dataset_name, run_number, index):
4219  """Create the block name to use for this dataset/run number.
4220 
4221  This is what appears in the brackets `[]' in multicrab.cfg. It
4222  is used as the name of the job and to create output
4223  directories.
4224 
4225  """
4226 
4227  dataset_name_escaped = self.escape_dataset_name(dataset_name)
4228  block_name = "%s_%09d_%s" % (dataset_name_escaped, run_number, index)
4229 
4230  # End of create_multicrab_block_name.
4231  return block_name
def create_multicrab_block_name
def cmsHarvester.create_multicrab_config (   self)
Create a multicrab.cfg file for all samples.

This creates the contents for a multicrab.cfg file that uses
the crab.cfg file (generated elsewhere) for the basic settings
and contains blocks for each run of each dataset.

# BUG BUG BUG
# The fact that it's necessary to specify the se_white_list
# and the total_number_of_events is due to our use of CRAB
# version 2.6.1. This should no longer be necessary in the
# future.
# BUG BUG BUG end

Definition at line 4314 of file cmsHarvester.py.

4315  def create_multicrab_config(self):
4316  """Create a multicrab.cfg file for all samples.
4317 
4318  This creates the contents for a multicrab.cfg file that uses
4319  the crab.cfg file (generated elsewhere) for the basic settings
4320  and contains blocks for each run of each dataset.
4321 
4322  # BUG BUG BUG
4323  # The fact that it's necessary to specify the se_white_list
4324  # and the total_number_of_events is due to our use of CRAB
4325  # version 2.6.1. This should no longer be necessary in the
4326  # future.
4327  # BUG BUG BUG end
4328 
4329  """
def create_multicrab_config
def cmsHarvester.create_output_file_name (   self,
  dataset_name,
  run_number = None 
)
Create the name of the output file name to be used.

This is the name of the output file of the `first step'. In
the case of single-step harvesting this is already the final
harvesting output ROOT file. In the case of two-step
harvesting it is the name of the intermediary ME summary
file.

Definition at line 4126 of file cmsHarvester.py.

Referenced by create_me_extraction_config().

4127  def create_output_file_name(self, dataset_name, run_number=None):
4128  """Create the name of the output file name to be used.
4129 
4130  This is the name of the output file of the `first step'. In
4131  the case of single-step harvesting this is already the final
4132  harvesting output ROOT file. In the case of two-step
4133  harvesting it is the name of the intermediary ME summary
4134  file.
4135 
4136  """
4137 
4138  # BUG BUG BUG
4139  # This method has become a bit of a mess. Originally it was
4140  # nice to have one entry point for both single- and two-step
4141  # output file names. However, now the former needs the run
4142  # number, while the latter does not even know about run
4143  # numbers. This should be fixed up a bit.
4144  # BUG BUG BUG end
4145 
4146  if self.harvesting_mode == "single-step":
4147  # DEBUG DEBUG DEBUG
4148  assert not run_number is None
4149  # DEBUG DEBUG DEBUG end
4150  output_file_name = self.create_harvesting_output_file_name(dataset_name, run_number)
4151  elif self.harvesting_mode == "single-step-allow-partial":
4152  # DEBUG DEBUG DEBUG
4153  assert not run_number is None
4154  # DEBUG DEBUG DEBUG end
4155  output_file_name = self.create_harvesting_output_file_name(dataset_name, run_number)
4156  elif self.harvesting_mode == "two-step":
4157  # DEBUG DEBUG DEBUG
4158  assert run_number is None
4159  # DEBUG DEBUG DEBUG end
4160  output_file_name = self.create_me_summary_output_file_name(dataset_name)
4161  else:
4162  # This should not be possible, but hey...
4163  assert False, "ERROR Unknown harvesting mode `%s'" % \
4164  self.harvesting_mode
4165 
4166  # End of create_harvesting_output_file_name.
4167  return output_file_name
def create_output_file_name
def cmsHarvester.dbs_check_dataset_spread (   self,
  dataset_name 
)

def dbs_resolve_dataset_number_of_sites(self, dataset_name): """Ask DBS across how many sites this dataset has been spread out.

     This is especially useful to check that we do not submit a job
    supposed to run on a complete sample that is not contained at
    a single site.         """         # DEBUG DEBUG DEBUG

If we get here DBS should have been set up already.

assert not self.dbs_api is None

DEBUG DEBUG DEBUG end

api = self.dbs_api dbs_query = "find count(site) where dataset = %s " \ "and dataset.status = VALID" % \ dataset_name try: api_result = api.executeQuery(dbs_query) except DbsApiException: raise Error("ERROR: Could not execute DBS query") try: num_sites = [] class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": num_sites.append(str(attrs["COUNT_STORAGEELEMENT"])) xml.sax.parseString(api_result, Handler()) except SAXParseException: raise Error("ERROR: Could not parse DBS server output") # DEBUG DEBUG DEBUG assert len(num_sites) == 1

DEBUG DEBUG DEBUG end

num_sites = int(num_sites[0]) # End of dbs_resolve_dataset_number_of_sites. return num_sites def dbs_check_dataset_spread(self, dataset_name): """Figure out across how many sites this dataset is spread. NOTE: This is something we need to figure out per run, since we want to submit harvesting jobs per run. Basically three things can happen with a given dataset:

assert not self.dbs_api is None

DEBUG DEBUG DEBUG end

api = self.dbs_api dbs_query = "find run, run.numevents, site, file.count " \ "where dataset = %s " \ "and dataset.status = VALID" % \ dataset_name try: api_result = api.executeQuery(dbs_query) except DbsApiException: msg = "ERROR: Could not execute DBS query" self.logger.fatal(msg) raise Error(msg) # Index things by run number. No cross-check is done to make

sure we get results for each and every run in the

dataset. I'm not sure this would make sense since we'd be

cross-checking DBS info with DBS info anyway. Note that we

use the file count per site to see if we're dealing with an

incomplete vs. a mirrored dataset.

sample_info = {} try: class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": run_number = int(attrs["RUNS_RUNNUMBER"]) site_name = str(attrs["STORAGEELEMENT_SENAME"]) file_count = int(attrs["COUNT_FILES"])

BUG BUG BUG

Doh! For some reason DBS never returns any other

event count than zero.

event_count = int(attrs["RUNS_NUMBEROFEVENTS"])

BUG BUG BUG end

info = (site_name, file_count, event_count) try: sample_info[run_number].append(info) except KeyError: sample_info[run_number] = [info] xml.sax.parseString(api_result, Handler()) except SAXParseException: msg = "ERROR: Could not parse DBS server output" self.logger.fatal(msg) raise Error(msg) # Now translate this into a slightly more usable mapping. sites = {} for (run_number, site_info) in sample_info.iteritems():

Quick-n-dirty trick to see if all file counts are the

same.

unique_file_counts = set([i[1] for i in site_info]) if len(unique_file_counts) == 1:

Okay, so this must be a mirrored dataset.

We have to pick one but we have to be careful. We

cannot submit to things like a T0, a T1, or CAF.

site_names = [self.pick_a_site([i[0] for i in site_info])] nevents = [site_info[0][2]] else:

Looks like this is a spread-out sample.

site_names = [i[0] for i in site_info] nevents = [i[2] for i in site_info] sites[run_number] = zip(site_names, nevents) self.logger.debug("Sample `%s' spread is:" % dataset_name) run_numbers = sites.keys() run_numbers.sort() for run_number in run_numbers: self.logger.debug(" run # %6d: %d sites (%s)" % \ (run_number, len(sites[run_number]), ", ".join([i[0] for i in sites[run_number]]))) # End of dbs_check_dataset_spread. return sites # DEBUG DEBUG DEBUG

Just kept for debugging now.

def dbs_check_dataset_spread_old(self, dataset_name): """Figure out across how many sites this dataset is spread. NOTE: This is something we need to figure out per run, since we want to submit harvesting jobs per run. Basically three things can happen with a given dataset:

assert not self.dbs_api is None

DEBUG DEBUG DEBUG end

api = self.dbs_api dbs_query = "find run, run.numevents, site, file.count " \ "where dataset = %s " \ "and dataset.status = VALID" % \ dataset_name try: api_result = api.executeQuery(dbs_query) except DbsApiException: msg = "ERROR: Could not execute DBS query" self.logger.fatal(msg) raise Error(msg) # Index things by run number. No cross-check is done to make

sure we get results for each and every run in the

dataset. I'm not sure this would make sense since we'd be

cross-checking DBS info with DBS info anyway. Note that we

use the file count per site to see if we're dealing with an

incomplete vs. a mirrored dataset.

sample_info = {} try: class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": run_number = int(attrs["RUNS_RUNNUMBER"]) site_name = str(attrs["STORAGEELEMENT_SENAME"]) file_count = int(attrs["COUNT_FILES"])

BUG BUG BUG

Doh! For some reason DBS never returns any other

event count than zero.

event_count = int(attrs["RUNS_NUMBEROFEVENTS"])

BUG BUG BUG end

info = (site_name, file_count, event_count) try: sample_info[run_number].append(info) except KeyError: sample_info[run_number] = [info] xml.sax.parseString(api_result, Handler()) except SAXParseException: msg = "ERROR: Could not parse DBS server output" self.logger.fatal(msg) raise Error(msg) # Now translate this into a slightly more usable mapping. sites = {} for (run_number, site_info) in sample_info.iteritems():

Quick-n-dirty trick to see if all file counts are the

same.

unique_file_counts = set([i[1] for i in site_info]) if len(unique_file_counts) == 1:

Okay, so this must be a mirrored dataset.

We have to pick one but we have to be careful. We

cannot submit to things like a T0, a T1, or CAF.

site_names = [self.pick_a_site([i[0] for i in site_info])] nevents = [site_info[0][2]] else:

Looks like this is a spread-out sample.

site_names = [i[0] for i in site_info] nevents = [i[2] for i in site_info] sites[run_number] = zip(site_names, nevents) self.logger.debug("Sample `%s' spread is:" % dataset_name) run_numbers = sites.keys() run_numbers.sort() for run_number in run_numbers: self.logger.debug(" run # %6d: %d site(s) (%s)" % \ (run_number, len(sites[run_number]), ", ".join([i[0] for i in sites[run_number]]))) # End of dbs_check_dataset_spread_old. return sites

DEBUG DEBUG DEBUG end

Figure out the number of events in each run of this dataset.

This is a more efficient way of doing this than calling
dbs_resolve_number_of_events for each run.

Definition at line 3077 of file cmsHarvester.py.

References assert().

3078  def dbs_check_dataset_spread(self, dataset_name):
3079  """Figure out the number of events in each run of this dataset.
3080 
3081  This is a more efficient way of doing this than calling
3082  dbs_resolve_number_of_events for each run.
3083 
3084  """
3085 
3086  self.logger.debug("Checking spread of dataset `%s'" % dataset_name)
3087 
3088  # DEBUG DEBUG DEBUG
3089  # If we get here DBS should have been set up already.
3090  assert not self.dbs_api is None
3091  # DEBUG DEBUG DEBUG end
3092 
3093  api = self.dbs_api
3094  dbs_query = "find run.number, site, file.name, file.numevents " \
3095  "where dataset = %s " \
3096  "and dataset.status = VALID" % \
3097  dataset_name
3098  try:
3099  api_result = api.executeQuery(dbs_query)
3100  except DBSAPI.dbsApiException.DbsApiException:
3101  msg = "ERROR: Could not execute DBS query"
3102  self.logger.fatal(msg)
3103  raise Error(msg)
3104 
3105  handler = DBSXMLHandler(["run.number", "site", "file.name", "file.numevents"])
3106  parser = xml.sax.make_parser()
3107  parser.setContentHandler(handler)
3108 
3109  try:
# OBSOLETE OBSOLETE OBSOLETE
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_check_dataset_spread
def dbs_resolve_dataset_number_of_sites(self, dataset_name): &quot;&quot;"Ask DBS across how many sites this da...
def cmsHarvester.dbs_resolve_cmssw_version (   self,
  dataset_name 
)
Ask DBS for the CMSSW version used to create this dataset.

Definition at line 2475 of file cmsHarvester.py.

References assert().

2476  def dbs_resolve_cmssw_version(self, dataset_name):
2477  """Ask DBS for the CMSSW version used to create this dataset.
2478 
2479  """
2480 
2481  # DEBUG DEBUG DEBUG
2482  # If we get here DBS should have been set up already.
2483  assert not self.dbs_api is None
2484  # DEBUG DEBUG DEBUG end
2485 
2486  api = self.dbs_api
2487  dbs_query = "find algo.version where dataset = %s " \
2488  "and dataset.status = VALID" % \
2489  dataset_name
2490  try:
2491  api_result = api.executeQuery(dbs_query)
2492  except DBSAPI.dbsApiException.DbsApiException:
2493  msg = "ERROR: Could not execute DBS query"
2494  self.logger.fatal(msg)
2495  raise Error(msg)
2496 
2497  handler = DBSXMLHandler(["algo.version"])
2498  parser = xml.sax.make_parser()
2499  parser.setContentHandler(handler)
2500 
2501  try:
2502  xml.sax.parseString(api_result, handler)
2503  except SAXParseException:
2504  msg = "ERROR: Could not parse DBS server output"
2505  self.logger.fatal(msg)
2506  raise Error(msg)
2507 
2508  # DEBUG DEBUG DEBUG
2509  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2510  # DEBUG DEBUG DEBUG end
2511 
2512  cmssw_version = handler.results.values()[0]
2513 
2514  # DEBUG DEBUG DEBUG
2515  assert len(cmssw_version) == 1
2516  # DEBUG DEBUG DEBUG end
2517 
2518  cmssw_version = cmssw_version[0]
2519 
2520  # End of dbs_resolve_cmssw_version.
2521  return cmssw_version
assert(m_qm.get())
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_resolve_cmssw_version
def cmsHarvester.dbs_resolve_dataset_name (   self,
  dataset_name 
)
Use DBS to resolve a wildcarded dataset name.

Definition at line 2419 of file cmsHarvester.py.

References assert().

Referenced by build_dataset_list().

2420  def dbs_resolve_dataset_name(self, dataset_name):
2421  """Use DBS to resolve a wildcarded dataset name.
2422 
2423  """
2424 
2425  # DEBUG DEBUG DEBUG
2426  # If we get here DBS should have been set up already.
2427  assert not self.dbs_api is None
2428  # DEBUG DEBUG DEBUG end
2429 
2430  # Some minor checking to make sure that whatever we've been
2431  # given as dataset name actually sounds like a dataset name.
2432  if not (dataset_name.startswith("/") and \
2433  dataset_name.endswith("RECO")):
2434  self.logger.warning("Dataset name `%s' does not sound " \
2435  "like a valid dataset name!" % \
2436  dataset_name)
2437 
2438  #----------
2439 
2440  api = self.dbs_api
2441  dbs_query = "find dataset where dataset like %s " \
2442  "and dataset.status = VALID" % \
2443  dataset_name
2444  try:
2445  api_result = api.executeQuery(dbs_query)
2446  except DBSAPI.dbsApiException.DbsApiException:
2447  msg = "ERROR: Could not execute DBS query"
2448  self.logger.fatal(msg)
2449  raise Error(msg)
2450 
2451  # Setup parsing.
2452  handler = DBSXMLHandler(["dataset"])
2453  parser = xml.sax.make_parser()
2454  parser.setContentHandler(handler)
2455 
2456  # Parse.
2457  try:
2458  xml.sax.parseString(api_result, handler)
2459  except SAXParseException:
2460  msg = "ERROR: Could not parse DBS server output"
2461  self.logger.fatal(msg)
2462  raise Error(msg)
2463 
2464  # DEBUG DEBUG DEBUG
2465  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2466  # DEBUG DEBUG DEBUG end
2467 
2468  # Extract the results.
2469  datasets = handler.results.values()[0]
2470 
2471  # End of dbs_resolve_dataset_name.
2472  return datasets
assert(m_qm.get())
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_resolve_dataset_name
def cmsHarvester.dbs_resolve_datatype (   self,
  dataset_name 
)
Ask DBS for the the data type (data or mc) of a given
dataset.

Definition at line 2683 of file cmsHarvester.py.

References assert().

2684  def dbs_resolve_datatype(self, dataset_name):
2685  """Ask DBS for the the data type (data or mc) of a given
2686  dataset.
2687 
2688  """
2689 
2690  # DEBUG DEBUG DEBUG
2691  # If we get here DBS should have been set up already.
2692  assert not self.dbs_api is None
2693  # DEBUG DEBUG DEBUG end
2694 
2695  api = self.dbs_api
2696  dbs_query = "find datatype.type where dataset = %s " \
2697  "and dataset.status = VALID" % \
2698  dataset_name
2699  try:
2700  api_result = api.executeQuery(dbs_query)
2701  except DBSAPI.dbsApiException.DbsApiException:
2702  msg = "ERROR: Could not execute DBS query"
2703  self.logger.fatal(msg)
2704  raise Error(msg)
2705 
2706  handler = DBSXMLHandler(["datatype.type"])
2707  parser = xml.sax.make_parser()
2708  parser.setContentHandler(handler)
2709 
2710  try:
2711  xml.sax.parseString(api_result, handler)
2712  except SAXParseException:
2713  msg = "ERROR: Could not parse DBS server output"
2714  self.logger.fatal(msg)
2715  raise Error(msg)
2716 
2717  # DEBUG DEBUG DEBUG
2718  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2719  # DEBUG DEBUG DEBUG end
2720 
2721  datatype = handler.results.values()[0]
2722 
2723  # DEBUG DEBUG DEBUG
2724  assert len(datatype) == 1
2725  # DEBUG DEBUG DEBUG end
2726 
2727  datatype = datatype[0]
2728 
2729  # End of dbs_resolve_datatype.
2730  return datatype
assert(m_qm.get())
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_resolve_datatype
def cmsHarvester.dbs_resolve_globaltag (   self,
  dataset_name 
)
Ask DBS for the globaltag corresponding to a given dataset.

# BUG BUG BUG
# This does not seem to work for data datasets? E.g. for
# /Cosmics/Commissioning08_CRAFT0831X_V1_311_ReReco_FromSuperPointing_v1/RAW-RECO
# Probaly due to the fact that the GlobalTag changed during
# datataking...
BUG BUG BUG end

Definition at line 2627 of file cmsHarvester.py.

References assert().

2628  def dbs_resolve_globaltag(self, dataset_name):
2629  """Ask DBS for the globaltag corresponding to a given dataset.
2630 
2631  # BUG BUG BUG
2632  # This does not seem to work for data datasets? E.g. for
2633  # /Cosmics/Commissioning08_CRAFT0831X_V1_311_ReReco_FromSuperPointing_v1/RAW-RECO
2634  # Probaly due to the fact that the GlobalTag changed during
2635  # datataking...
2636  BUG BUG BUG end
2637 
2638  """
2639 
2640  # DEBUG DEBUG DEBUG
2641  # If we get here DBS should have been set up already.
2642  assert not self.dbs_api is None
2643  # DEBUG DEBUG DEBUG end
2644 
2645  api = self.dbs_api
2646  dbs_query = "find dataset.tag where dataset = %s " \
2647  "and dataset.status = VALID" % \
2648  dataset_name
2649  try:
2650  api_result = api.executeQuery(dbs_query)
2651  except DBSAPI.dbsApiException.DbsApiException:
2652  msg = "ERROR: Could not execute DBS query"
2653  self.logger.fatal(msg)
2654  raise Error(msg)
2655 
2656  handler = DBSXMLHandler(["dataset.tag"])
2657  parser = xml.sax.make_parser()
2658  parser.setContentHandler(parser)
2659 
2660  try:
2661  xml.sax.parseString(api_result, handler)
2662  except SAXParseException:
2663  msg = "ERROR: Could not parse DBS server output"
2664  self.logger.fatal(msg)
2665  raise Error(msg)
2666 
2667  # DEBUG DEBUG DEBUG
2668  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2669  # DEBUG DEBUG DEBUG end
2670 
2671  globaltag = handler.results.values()[0]
2672 
2673  # DEBUG DEBUG DEBUG
2674  assert len(globaltag) == 1
2675  # DEBUG DEBUG DEBUG end
2676 
2677  globaltag = globaltag[0]
2678 
2679  # End of dbs_resolve_globaltag.
2680  return globaltag
assert(m_qm.get())
def dbs_resolve_globaltag
Helper class: Error exception.
Helper class: DBSXMLHandler.
def cmsHarvester.dbs_resolve_number_of_events (   self,
  dataset_name,
  run_number = None 
)
Determine the number of events in a given dataset (and run).

Ask DBS for the number of events in a dataset. If a run number
is specified the number of events returned is that in that run
of that dataset. If problems occur we throw an exception.

# BUG BUG BUG
# Since DBS does not return the number of events correctly,
# neither for runs nor for whole datasets, we have to work
# around that a bit...
# BUG BUG BUG end

Definition at line 2736 of file cmsHarvester.py.

References assert().

2737  def dbs_resolve_number_of_events(self, dataset_name, run_number=None):
2738  """Determine the number of events in a given dataset (and run).
2739 
2740  Ask DBS for the number of events in a dataset. If a run number
2741  is specified the number of events returned is that in that run
2742  of that dataset. If problems occur we throw an exception.
2743 
2744  # BUG BUG BUG
2745  # Since DBS does not return the number of events correctly,
2746  # neither for runs nor for whole datasets, we have to work
2747  # around that a bit...
2748  # BUG BUG BUG end
2749 
2750  """
2751 
2752  # DEBUG DEBUG DEBUG
2753  # If we get here DBS should have been set up already.
2754  assert not self.dbs_api is None
2755  # DEBUG DEBUG DEBUG end
2756 
2757  api = self.dbs_api
2758  dbs_query = "find file.name, file.numevents where dataset = %s " \
2759  "and dataset.status = VALID" % \
2760  dataset_name
2761  if not run_number is None:
2762  dbs_query = dbq_query + (" and run = %d" % run_number)
2763  try:
2764  api_result = api.executeQuery(dbs_query)
2765  except DBSAPI.dbsApiException.DbsApiException:
2766  msg = "ERROR: Could not execute DBS query"
2767  self.logger.fatal(msg)
2768  raise Error(msg)
2769 
2770  handler = DBSXMLHandler(["file.name", "file.numevents"])
2771  parser = xml.sax.make_parser()
2772  parser.setContentHandler(handler)
2773 
2774  try:
2775  xml.sax.parseString(api_result, handler)
2776  except SAXParseException:
2777  msg = "ERROR: Could not parse DBS server output"
2778  self.logger.fatal(msg)
2779  raise Error(msg)
2780 
2781  # DEBUG DEBUG DEBUG
2782  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2783  # DEBUG DEBUG DEBUG end
2784 
2785  num_events = sum(handler.results["file.numevents"])
2786 
2787  # End of dbs_resolve_number_of_events.
2788  return num_events
assert(m_qm.get())
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_resolve_number_of_events
def cmsHarvester.dbs_resolve_runs (   self,
  dataset_name 
)

def dbs_resolve_dataset_number_of_events(self, dataset_name): """Ask DBS across how many events this dataset has been spread out.

     This is especially useful to check that we do not submit a job
    supposed to run on a complete sample that is not contained at
    a single site.         """         # DEBUG DEBUG DEBUG

If we get here DBS should have been set up already.

assert not self.dbs_api is None

DEBUG DEBUG DEBUG end

api = self.dbs_api dbs_query = "find count(site) where dataset = %s " \ "and dataset.status = VALID" % \ dataset_name try: api_result = api.executeQuery(dbs_query) except DbsApiException: raise Error("ERROR: Could not execute DBS query") try: num_events = [] class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": num_events.append(str(attrs["COUNT_STORAGEELEMENT"])) xml.sax.parseString(api_result, Handler()) except SAXParseException: raise Error("ERROR: Could not parse DBS server output") # DEBUG DEBUG DEBUG assert len(num_events) == 1

DEBUG DEBUG DEBUG end

num_events = int(num_events[0]) # End of dbs_resolve_dataset_number_of_events. return num_events

Ask DBS for the list of runs in a given dataset.

# NOTE: This does not (yet?) skip/remove empty runs. There is
# a bug in the DBS entry run.numevents (i.e. it always returns
# zero) which should be fixed in the `next DBS release'.
# See also:
#   https://savannah.cern.ch/bugs/?53452
#   https://savannah.cern.ch/bugs/?53711

Definition at line 2569 of file cmsHarvester.py.

References assert().

2570  def dbs_resolve_runs(self, dataset_name):
2571  """Ask DBS for the list of runs in a given dataset.
2572 
2573  # NOTE: This does not (yet?) skip/remove empty runs. There is
2574  # a bug in the DBS entry run.numevents (i.e. it always returns
2575  # zero) which should be fixed in the `next DBS release'.
2576  # See also:
2577  # https://savannah.cern.ch/bugs/?53452
2578  # https://savannah.cern.ch/bugs/?53711
2579 
2580  """
2581 
2582  # TODO TODO TODO
2583  # We should remove empty runs as soon as the above mentioned
2584  # bug is fixed.
2585  # TODO TODO TODO end
2586 
2587  # DEBUG DEBUG DEBUG
2588  # If we get here DBS should have been set up already.
2589  assert not self.dbs_api is None
2590  # DEBUG DEBUG DEBUG end
2591 
2592  api = self.dbs_api
2593  dbs_query = "find run where dataset = %s " \
2594  "and dataset.status = VALID" % \
2595  dataset_name
2596  try:
2597  api_result = api.executeQuery(dbs_query)
2598  except DBSAPI.dbsApiException.DbsApiException:
2599  msg = "ERROR: Could not execute DBS query"
2600  self.logger.fatal(msg)
2601  raise Error(msg)
2602 
2603  handler = DBSXMLHandler(["run"])
2604  parser = xml.sax.make_parser()
2605  parser.setContentHandler(handler)
2606 
2607  try:
2608  xml.sax.parseString(api_result, handler)
2609  except SAXParseException:
2610  msg = "ERROR: Could not parse DBS server output"
2611  self.logger.fatal(msg)
2612  raise Error(msg)
2613 
2614  # DEBUG DEBUG DEBUG
2615  assert(handler.check_results_validity()), "ERROR The DBSXMLHandler screwed something up!"
2616  # DEBUG DEBUG DEBUG end
2617 
2618  runs = handler.results.values()[0]
2619  # Turn strings into integers.
2620  runs = [int(i) for i in runs]
2621  runs.sort()
2622 
2623  # End of dbs_resolve_runs.
2624  return runs
assert(m_qm.get())
Helper class: Error exception.
Helper class: DBSXMLHandler.
def dbs_resolve_runs
def dbs_resolve_dataset_number_of_events(self, dataset_name): &quot;&quot;"Ask DBS across how many events this ...
def cmsHarvester.escape_dataset_name (   self,
  dataset_name 
)
Escape a DBS dataset name.

Escape a DBS dataset name such that it does not cause trouble
with the file system. This means turning each `/' into `__',
except for the first one which is just removed.

Definition at line 4047 of file cmsHarvester.py.

4048  def escape_dataset_name(self, dataset_name):
4049  """Escape a DBS dataset name.
4050 
4051  Escape a DBS dataset name such that it does not cause trouble
4052  with the file system. This means turning each `/' into `__',
4053  except for the first one which is just removed.
4054 
4055  """
4056 
4057  escaped_dataset_name = dataset_name
4058  escaped_dataset_name = escaped_dataset_name.strip("/")
4059  escaped_dataset_name = escaped_dataset_name.replace("/", "__")
4060 
4061  return escaped_dataset_name
def escape_dataset_name
def cmsHarvester.load_ref_hist_mappings (   self)
Load the reference histogram mappings from file.

The dataset name to reference histogram name mappings are read
from a text file specified in self.ref_hist_mappings_file_name.

Definition at line 5207 of file cmsHarvester.py.

References mergeVDriftHistosByStation.file, and bookConverter.max.

5208  def load_ref_hist_mappings(self):
5209  """Load the reference histogram mappings from file.
5210 
5211  The dataset name to reference histogram name mappings are read
5212  from a text file specified in self.ref_hist_mappings_file_name.
5213 
5214  """
5215 
5216  # DEBUG DEBUG DEBUG
5217  assert len(self.ref_hist_mappings) < 1, \
5218  "ERROR Should not be RE-loading " \
5219  "reference histogram mappings!"
5220  # DEBUG DEBUG DEBUG end
5221 
5222  self.logger.info("Loading reference histogram mappings " \
5223  "from file `%s'" % \
5224  self.ref_hist_mappings_file_name)
5225 
5226  mappings_lines = None
5227  try:
5228  mappings_file = file(self.ref_hist_mappings_file_name, "r")
5229  mappings_lines = mappings_file.readlines()
5230  mappings_file.close()
5231  except IOError:
5232  msg = "ERROR: Could not open reference histogram mapping "\
5233  "file `%s'" % self.ref_hist_mappings_file_name
5234  self.logger.fatal(msg)
5235  raise Error(msg)
5236 
5237  ##########
5238 
5239  # The format we expect is: two white-space separated pieces
5240  # per line. The first the dataset name for which the reference
5241  # should be used, the second one the name of the reference
5242  # histogram in the database.
5243 
5244  for mapping in mappings_lines:
5245  # Skip comment lines.
5246  if not mapping.startswith("#"):
5247  mapping = mapping.strip()
5248  if len(mapping) > 0:
5249  mapping_pieces = mapping.split()
5250  if len(mapping_pieces) != 2:
5251  msg = "ERROR: The reference histogram mapping " \
5252  "file contains a line I don't " \
5253  "understand:\n %s" % mapping
5254  self.logger.fatal(msg)
5255  raise Error(msg)
5256  dataset_name = mapping_pieces[0].strip()
5257  ref_hist_name = mapping_pieces[1].strip()
5258  # We don't want people to accidentally specify
5259  # multiple mappings for the same dataset. Just
5260  # don't accept those cases.
5261  if dataset_name in self.ref_hist_mappings:
5262  msg = "ERROR: The reference histogram mapping " \
5263  "file contains multiple mappings for " \
5264  "dataset `%s'."
5265  self.logger.fatal(msg)
5266  raise Error(msg)
5267 
5268  # All is well that ends well.
5269  self.ref_hist_mappings[dataset_name] = ref_hist_name
5270 
5271  ##########
5272 
5273  self.logger.info(" Successfully loaded %d mapping(s)" % \
5274  len(self.ref_hist_mappings))
5275  max_len = max([len(i) for i in self.ref_hist_mappings.keys()])
5276  for (map_from, map_to) in self.ref_hist_mappings.iteritems():
5277  self.logger.info(" %-*s -> %s" % \
5278  (max_len, map_from, map_to))
5279 
5280  # End of load_ref_hist_mappings.
def load_ref_hist_mappings
Helper class: Error exception.
def cmsHarvester.option_handler_caf_access (   self,
  option,
  opt_str,
  value,
  parser 
)
Set the self.caf_access flag to try and create jobs that
run on the CAF.

Definition at line 1101 of file cmsHarvester.py.

1102  def option_handler_caf_access(self, option, opt_str, value, parser):
1103  """Set the self.caf_access flag to try and create jobs that
1104  run on the CAF.
1105 
1106  """
1107  self.caf_access = True
1108 
1109  self.logger.warning("Running in `caf_access' mode. " \
1110  "Will try to create jobs that run " \
1111  "on CAF but no" \
1112  "further promises...")
1113 
1114  # End of option_handler_caf_access.
def option_handler_caf_access
def cmsHarvester.option_handler_castor_dir (   self,
  option,
  opt_str,
  value,
  parser 
)

def option_handler_dataset_name(self, option, opt_str, value, parser): """Specify the name(s) of the dataset(s) to be processed.

     It is checked to make sure that no dataset name or listfile
    names are given yet. If all is well (i.e. we still have a
    clean slate) the dataset name is stored for later use,
    otherwise a Usage exception is raised.         """         if not self.input_method is None:
        if self.input_method == "dataset":
            raise Usage("Please only feed me one dataset specification")
        elif self.input_method == "listfile":
            raise Usage("Cannot specify both dataset and input list file")
        else:
            assert False, "Unknown input method `%s'" % self.input_method
    self.input_method = "dataset"
    self.input_name = value
    self.logger.info("Input method used: %s" % self.input_method)         # End of option_handler_dataset_name.     ##########     def option_handler_listfile_name(self, option, opt_str, value, parser):
    """Specify the input list file containing datasets to be processed.         It is checked to make sure that no dataset name or listfile
    names are given yet. If all is well (i.e. we still have a
    clean slate) the listfile name is stored for later use,
    otherwise a Usage exception is raised.         """         if not self.input_method is None:
        if self.input_method == "listfile":
            raise Usage("Please only feed me one list file")
        elif self.input_method == "dataset":
            raise Usage("Cannot specify both dataset and input list file")
        else:
            assert False, "Unknown input method `%s'" % self.input_method
    self.input_method = "listfile"
    self.input_name = value
    self.logger.info("Input method used: %s" % self.input_method)         # End of option_handler_listfile_name. @verbatim Specify where on CASTOR the output should go.

At the moment only output to CERN CASTOR is supported. Eventually the harvested results should go into the central place for DQM on CASTOR anyway.

Definition at line 1059 of file cmsHarvester.py.

1060  def option_handler_castor_dir(self, option, opt_str, value, parser):
1061  """Specify where on CASTOR the output should go.
1062 
1063  At the moment only output to CERN CASTOR is
1064  supported. Eventually the harvested results should go into the
1065  central place for DQM on CASTOR anyway.
1066 
1067  """
1068 
1069  # Check format of specified CASTOR area.
1070  castor_dir = value
1071  #castor_dir = castor_dir.lstrip(os.path.sep)
1072  castor_prefix = self.castor_prefix
1073 
1074  # Add a leading slash if necessary and clean up the path.
1075  castor_dir = os.path.join(os.path.sep, castor_dir)
1076  self.castor_base_dir = os.path.normpath(castor_dir)
1077 
1078  self.logger.info("CASTOR (base) area to be used: `%s'" % \
1079  self.castor_base_dir)
1080 
1081  # End of option_handler_castor_dir.
def option_handler_castor_dir
def option_handler_dataset_name(self, option, opt_str, value, parser): &quot;&quot;"Specify the name(s) of the ...
def cmsHarvester.option_handler_crab_submission (   self,
  option,
  opt_str,
  value,
  parser 
)
Crab jobs are not created and
    "submitted automatically",

Definition at line 1129 of file cmsHarvester.py.

1130  def option_handler_crab_submission(self, option, opt_str, value, parser):
1131  """Crab jobs are not created and
1132  "submitted automatically",
1133  """
1134  self.crab_submission = True
1135 
1136  # End of option_handler_crab_submission.
def option_handler_crab_submission
def cmsHarvester.option_handler_list_types (   self,
  option,
  opt_str,
  value,
  parser 
)
List all harvesting types and their mappings.

This lists all implemented harvesting types with their
corresponding mappings to sequence names. This had to be
separated out from the help since it depends on the CMSSW
version and was making things a bit of a mess.

NOTE: There is no way (at least not that I could come up with)
to code this in a neat generic way that can be read both by
this method and by setup_harvesting_info(). Please try hard to
keep these two methods in sync!

Definition at line 1151 of file cmsHarvester.py.

1152  def option_handler_list_types(self, option, opt_str, value, parser):
1153  """List all harvesting types and their mappings.
1154 
1155  This lists all implemented harvesting types with their
1156  corresponding mappings to sequence names. This had to be
1157  separated out from the help since it depends on the CMSSW
1158  version and was making things a bit of a mess.
1159 
1160  NOTE: There is no way (at least not that I could come up with)
1161  to code this in a neat generic way that can be read both by
1162  this method and by setup_harvesting_info(). Please try hard to
1163  keep these two methods in sync!
1164 
1165  """
1166 
1167  sep_line = "-" * 50
1168  sep_line_short = "-" * 20
1169 
1170  print sep_line
1171  print "The following harvesting types are available:"
1172  print sep_line
1173 
1174  print "`RelVal' maps to:"
1175  print " pre-3_3_0 : HARVESTING:validationHarvesting"
1176  print " 3_4_0_pre2 and later: HARVESTING:validationHarvesting+dqmHarvesting"
1177  print " Exceptions:"
1178  print " 3_3_0_pre1-4 : HARVESTING:validationHarvesting"
1179  print " 3_3_0_pre6 : HARVESTING:validationHarvesting"
1180  print " 3_4_0_pre1 : HARVESTING:validationHarvesting"
1181 
1182  print sep_line_short
1183 
1184  print "`RelValFS' maps to:"
1185  print " always : HARVESTING:validationHarvestingFS"
1186 
1187  print sep_line_short
1188 
1189  print "`MC' maps to:"
1190  print " always : HARVESTING:validationprodHarvesting"
1191 
1192  print sep_line_short
1193 
1194  print "`DQMOffline' maps to:"
1195  print " always : HARVESTING:dqmHarvesting"
1196 
1197  print sep_line
1198 
1199  # We're done, let's quit. (This is the same thing optparse
1200  # does after printing the help.)
1201  raise SystemExit
1202 
1203  # End of option_handler_list_types.
def option_handler_list_types
def cmsHarvester.option_handler_no_t1access (   self,
  option,
  opt_str,
  value,
  parser 
)
Set the self.no_t1access flag to try and create jobs that
run without special `t1access' role.

Definition at line 1084 of file cmsHarvester.py.

1085  def option_handler_no_t1access(self, option, opt_str, value, parser):
1086  """Set the self.no_t1access flag to try and create jobs that
1087  run without special `t1access' role.
1088 
1089  """
1091  self.non_t1access = True
1092 
1093  self.logger.warning("Running in `non-t1access' mode. " \
1094  "Will try to create jobs that run " \
1095  "without special rights but no " \
1096  "further promises...")
1097 
1098  # End of option_handler_no_t1access.
def option_handler_no_t1access
def cmsHarvester.option_handler_preferred_site (   self,
  option,
  opt_str,
  value,
  parser 
)

Definition at line 1145 of file cmsHarvester.py.

1146  def option_handler_preferred_site(self, option, opt_str, value, parser):
1148  self.preferred_site = value
def option_handler_preferred_site
def cmsHarvester.option_handler_saveByLumiSection (   self,
  option,
  opt_str,
  value,
  parser 
)
Set process.dqmSaver.saveByLumiSectiont=1 in cfg harvesting file

Definition at line 1117 of file cmsHarvester.py.

1118  def option_handler_saveByLumiSection(self, option, opt_str, value, parser):
1119  """Set process.dqmSaver.saveByLumiSectiont=1 in cfg harvesting file
1120  """
1121  self.saveByLumiSection = True
1122 
1123  self.logger.warning("waning concerning saveByLumiSection option")
1124 
1125  # End of option_handler_saveByLumiSection.
1126 
def option_handler_saveByLumiSection
def cmsHarvester.option_handler_sites (   self,
  option,
  opt_str,
  value,
  parser 
)

Definition at line 1139 of file cmsHarvester.py.

1140  def option_handler_sites(self, option, opt_str, value, parser):
1142  self.nr_max_sites = value
def option_handler_sites
def cmsHarvester.parse_cmd_line_options (   self)

Definition at line 1870 of file cmsHarvester.py.

1871  def parse_cmd_line_options(self):
1872 
1873  # Set up the command line parser. Note that we fix up the help
1874  # formatter so that we can add some text pointing people to
1875  # the Twiki etc.
1876  parser = optparse.OptionParser(version="%s %s" % \
1877  ("%prog", self.version),
1878  formatter=CMSHarvesterHelpFormatter())
1880  self.option_parser = parser
1881 
1882  # The debug switch.
1883  parser.add_option("-d", "--debug",
1884  help="Switch on debug mode",
1885  action="callback",
1886  callback=self.option_handler_debug)
1887 
1888  # The quiet switch.
1889  parser.add_option("-q", "--quiet",
1890  help="Be less verbose",
1891  action="callback",
1892  callback=self.option_handler_quiet)
1893 
1894  # The force switch. If this switch is used sanity checks are
1895  # performed but failures do not lead to aborts. Use with care.
1896  parser.add_option("", "--force",
1897  help="Force mode. Do not abort on sanity check "
1898  "failures",
1899  action="callback",
1900  callback=self.option_handler_force)
1901 
1902  # Choose between the different kinds of harvesting.
1903  parser.add_option("", "--harvesting_type",
1904  help="Harvesting type: %s" % \
1905  ", ".join(self.harvesting_types),
1906  action="callback",
1907  callback=self.option_handler_harvesting_type,
1908  type="string",
1909  metavar="HARVESTING_TYPE")
1910 
1911  # Choose between single-step and two-step mode.
1912  parser.add_option("", "--harvesting_mode",
1913  help="Harvesting mode: %s (default = %s)" % \
1914  (", ".join(self.harvesting_modes),
1915  self.harvesting_mode_default),
1916  action="callback",
1917  callback=self.option_handler_harvesting_mode,
1918  type="string",
1919  metavar="HARVESTING_MODE")
1920 
1921  # Override the GlobalTag chosen by the cmsHarvester.
1922  parser.add_option("", "--globaltag",
1923  help="GlobalTag to use. Default is the ones " \
1924  "the dataset was created with for MC, for data" \
1925  "a GlobalTag has to be specified.",
1926  action="callback",
1927  callback=self.option_handler_globaltag,
1928  type="string",
1929  metavar="GLOBALTAG")
1930 
1931  # Allow switching off of reference histograms.
1932  parser.add_option("", "--no-ref-hists",
1933  help="Don't use any reference histograms",
1934  action="callback",
1935  callback=self.option_handler_no_ref_hists)
1936 
1937  # Allow the default (i.e. the one that should be used)
1938  # Frontier connection to be overridden.
1939  parser.add_option("", "--frontier-connection",
1940  help="Use this Frontier connection to find " \
1941  "GlobalTags and LocalTags (for reference " \
1942  "histograms).\nPlease only use this for " \
1943  "testing.",
1944  action="callback",
1945  callback=self.option_handler_frontier_connection,
1946  type="string",
1947  metavar="FRONTIER")
1948 
1949  # Similar to the above but specific to the Frontier connection
1950  # to be used for the GlobalTag.
1951  parser.add_option("", "--frontier-connection-for-globaltag",
1952  help="Use this Frontier connection to find " \
1953  "GlobalTags.\nPlease only use this for " \
1954  "testing.",
1955  action="callback",
1956  callback=self.option_handler_frontier_connection,
1957  type="string",
1958  metavar="FRONTIER")
1959 
1960  # Similar to the above but specific to the Frontier connection
1961  # to be used for the reference histograms.
1962  parser.add_option("", "--frontier-connection-for-refhists",
1963  help="Use this Frontier connection to find " \
1964  "LocalTags (for reference " \
1965  "histograms).\nPlease only use this for " \
1966  "testing.",
1967  action="callback",
1968  callback=self.option_handler_frontier_connection,
1969  type="string",
1970  metavar="FRONTIER")
1971 
1972  # Option to specify the name (or a regexp) of the dataset(s)
1973  # to be used.
1974  parser.add_option("", "--dataset",
1975  help="Name (or regexp) of dataset(s) to process",
1976  action="callback",
1977  #callback=self.option_handler_dataset_name,
1978  callback=self.option_handler_input_spec,
1979  type="string",
1980  #dest="self.input_name",
1981  metavar="DATASET")
1982 
1983  # Option to specify the name (or a regexp) of the dataset(s)
1984  # to be ignored.
1985  parser.add_option("", "--dataset-ignore",
1986  help="Name (or regexp) of dataset(s) to ignore",
1987  action="callback",
1988  callback=self.option_handler_input_spec,
1989  type="string",
1990  metavar="DATASET-IGNORE")
1991 
1992  # Option to specify the name (or a regexp) of the run(s)
1993  # to be used.
1994  parser.add_option("", "--runs",
1995  help="Run number(s) to process",
1996  action="callback",
1997  callback=self.option_handler_input_spec,
1998  type="string",
1999  metavar="RUNS")
2000 
2001  # Option to specify the name (or a regexp) of the run(s)
2002  # to be ignored.
2003  parser.add_option("", "--runs-ignore",
2004  help="Run number(s) to ignore",
2005  action="callback",
2006  callback=self.option_handler_input_spec,
2007  type="string",
2008  metavar="RUNS-IGNORE")
2009 
2010  # Option to specify a file containing a list of dataset names
2011  # (or regexps) to be used.
2012  parser.add_option("", "--datasetfile",
2013  help="File containing list of dataset names " \
2014  "(or regexps) to process",
2015  action="callback",
2016  #callback=self.option_handler_listfile_name,
2017  callback=self.option_handler_input_spec,
2018  type="string",
2019  #dest="self.input_name",
2020  metavar="DATASETFILE")
2021 
2022  # Option to specify a file containing a list of dataset names
2023  # (or regexps) to be ignored.
2024  parser.add_option("", "--datasetfile-ignore",
2025  help="File containing list of dataset names " \
2026  "(or regexps) to ignore",
2027  action="callback",
2028  callback=self.option_handler_input_spec,
2029  type="string",
2030  metavar="DATASETFILE-IGNORE")
2031 
2032  # Option to specify a file containing a list of runs to be
2033  # used.
2034  parser.add_option("", "--runslistfile",
2035  help="File containing list of run numbers " \
2036  "to process",
2037  action="callback",
2038  callback=self.option_handler_input_spec,
2039  type="string",
2040  metavar="RUNSLISTFILE")
2041 
2042  # Option to specify a file containing a list of runs
2043  # to be ignored.
2044  parser.add_option("", "--runslistfile-ignore",
2045  help="File containing list of run numbers " \
2046  "to ignore",
2047  action="callback",
2048  callback=self.option_handler_input_spec,
2049  type="string",
2050  metavar="RUNSLISTFILE-IGNORE")
2051 
# Option to specify a Jsonfile contaning a list of runs
def parse_cmd_line_options
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
Helper class: CMSHarvesterHelpFormatter.
def cmsHarvester.pick_a_site (   self,
  sites,
  cmssw_version 
)

Definition at line 1706 of file cmsHarvester.py.

1707  def pick_a_site(self, sites, cmssw_version):
def cmsHarvester.process_dataset_ignore_list (   self)
Update the list of datasets taking into account the ones to
ignore.

Both lists have been generated before from DBS and both are
assumed to be unique.

NOTE: The advantage of creating the ignore list from DBS (in
case a regexp is given) and matching that instead of directly
matching the ignore criterion against the list of datasets (to
consider) built from DBS is that in the former case we're sure
that all regexps are treated exactly as DBS would have done
without the cmsHarvester.

NOTE: This only removes complete samples. Exclusion of single
runs is done by the book keeping. So the assumption is that a
user never wants to harvest just part (i.e. n out of N runs)
of a sample.

Definition at line 3567 of file cmsHarvester.py.

3568  def process_dataset_ignore_list(self):
3569  """Update the list of datasets taking into account the ones to
3570  ignore.
3571 
3572  Both lists have been generated before from DBS and both are
3573  assumed to be unique.
3574 
3575  NOTE: The advantage of creating the ignore list from DBS (in
3576  case a regexp is given) and matching that instead of directly
3577  matching the ignore criterion against the list of datasets (to
3578  consider) built from DBS is that in the former case we're sure
3579  that all regexps are treated exactly as DBS would have done
3580  without the cmsHarvester.
3581 
3582  NOTE: This only removes complete samples. Exclusion of single
3583  runs is done by the book keeping. So the assumption is that a
3584  user never wants to harvest just part (i.e. n out of N runs)
3585  of a sample.
3586 
3587  """
3588 
3589  self.logger.info("Processing list of datasets to ignore...")
3590 
3591  self.logger.debug("Before processing ignore list there are %d " \
3592  "datasets in the list to be processed" % \
3593  len(self.datasets_to_use))
3594 
3595  # Simple approach: just loop and search.
3596  dataset_names_filtered = copy.deepcopy(self.datasets_to_use)
3597  for dataset_name in self.datasets_to_use.keys():
3598  if dataset_name in self.datasets_to_ignore.keys():
3599  del dataset_names_filtered[dataset_name]
3600 
3601  self.logger.info(" --> Removed %d dataset(s)" % \
3602  (len(self.datasets_to_use) -
3603  len(dataset_names_filtered)))
3604 
3605  self.datasets_to_use = dataset_names_filtered
3606 
3607  self.logger.debug("After processing ignore list there are %d " \
3608  "datasets in the list to be processed" % \
3609  len(self.datasets_to_use))
def process_dataset_ignore_list
def cmsHarvester.process_runs_use_and_ignore_lists (   self)

Definition at line 3614 of file cmsHarvester.py.

3616 
3617  self.logger.info("Processing list of runs to use and ignore...")
3618 
3619  # This basically adds all runs in a dataset to be processed,
3620  # except for any runs that are not specified in the `to use'
3621  # list and any runs that are specified in the `to ignore'
3622  # list.
3623 
3624  # NOTE: It is assumed that those lists make sense. The input
3625  # should be checked against e.g. overlapping `use' and
3626  # `ignore' lists.
3627 
3628  runs_to_use = self.runs_to_use
3629  runs_to_ignore = self.runs_to_ignore
3630 
3631  for dataset_name in self.datasets_to_use:
3632  runs_in_dataset = self.datasets_information[dataset_name]["runs"]
3633 
3634  # First some sanity checks.
3635  runs_to_use_tmp = []
3636  for run in runs_to_use:
3637  if not run in runs_in_dataset:
3638  self.logger.warning("Dataset `%s' does not contain " \
3639  "requested run %d " \
3640  "--> ignoring `use' of this run" % \
3641  (dataset_name, run))
3642  else:
3643  runs_to_use_tmp.append(run)
3644 
3645  if len(runs_to_use) > 0:
3646  runs = runs_to_use_tmp
3647  self.logger.info("Using %d out of %d runs " \
3648  "of dataset `%s'" % \
3649  (len(runs), len(runs_in_dataset),
3650  dataset_name))
3651  else:
3652  runs = runs_in_dataset
3653 
3654  if len(runs_to_ignore) > 0:
3655  runs_tmp = []
3656  for run in runs:
3657  if not run in runs_to_ignore:
3658  runs_tmp.append(run)
3659  self.logger.info("Ignoring %d out of %d runs " \
3660  "of dataset `%s'" % \
3661  (len(runs)- len(runs_tmp),
3662  len(runs_in_dataset),
3663  dataset_name))
3664  runs = runs_tmp
3665 
3666  if self.todofile != "YourToDofile.txt":
3667  runs_todo = []
3668  print "Reading runs from file /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/%s" %self.todofile
3669  cmd="grep %s /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/harvesting/%s | cut -f5 -d' '" %(dataset_name,self.todofile)
3670  (status, output)=commands.getstatusoutput(cmd)
3671  for run in runs:
3672  run_str="%s" %run
3673  if run_str in output:
3674  runs_todo.append(run)
3675  self.logger.info("Using %d runs " \
3676  "of dataset `%s'" % \
3677  (len(runs_todo),
3678  dataset_name))
3679  runs=runs_todo
3680 
3681  Json_runs = []
3682  if self.Jsonfilename != "YourJSON.txt":
3683  good_runs = []
3684  self.Jsonlumi = True
3685  # We were passed a Jsonfile containing a dictionary of
3686  # run/lunisection-pairs
3687  self.logger.info("Reading runs and lumisections from file `%s'" % \
3688  self.Jsonfilename)
3689  try:
3690  Jsonfile = open(self.Jsonfilename, "r")
3691  for names in Jsonfile:
3692  dictNames= eval(str(names))
3693  for key in dictNames:
3694  intkey=int(key)
3695  Json_runs.append(intkey)
3696  Jsonfile.close()
3697  except IOError:
3698  msg = "ERROR: Could not open Jsonfile `%s'" % \
3699  input_name
3700  self.logger.fatal(msg)
3701  raise Error(msg)
3702  for run in runs:
3703  if run in Json_runs:
3704  good_runs.append(run)
3705  self.logger.info("Using %d runs " \
3706  "of dataset `%s'" % \
3707  (len(good_runs),
3708  dataset_name))
3709  runs=good_runs
3710  if (self.Jsonrunfilename != "YourJSON.txt") and (self.Jsonfilename == "YourJSON.txt"):
3711  good_runs = []
3712  # We were passed a Jsonfile containing a dictionary of
3713  # run/lunisection-pairs
3714  self.logger.info("Reading runs from file `%s'" % \
3715  self.Jsonrunfilename)
3716  try:
3717  Jsonfile = open(self.Jsonrunfilename, "r")
3718  for names in Jsonfile:
3719  dictNames= eval(str(names))
3720  for key in dictNames:
3721  intkey=int(key)
3722  Json_runs.append(intkey)
3723  Jsonfile.close()
3724  except IOError:
3725  msg = "ERROR: Could not open Jsonfile `%s'" % \
3726  input_name
3727  self.logger.fatal(msg)
3728  raise Error(msg)
3729  for run in runs:
3730  if run in Json_runs:
3731  good_runs.append(run)
3732  self.logger.info("Using %d runs " \
3733  "of dataset `%s'" % \
3734  (len(good_runs),
3735  dataset_name))
3736  runs=good_runs
3737 
3738  self.datasets_to_use[dataset_name] = runs
3739 
3740  # End of process_runs_use_and_ignore_lists().
Helper class: Error exception.
def process_runs_use_and_ignore_lists
def cmsHarvester.ref_hist_mappings_needed (   self,
  dataset_name = None 
)
Check if we need to load and check the reference mappings.

For data the reference histograms should be taken
automatically from the GlobalTag, so we don't need any
mappings. For RelVals we need to know a mapping to be used in
the es_prefer code snippet (different references for each of
the datasets.)

WARNING: This implementation is a bit convoluted.

Definition at line 5173 of file cmsHarvester.py.

5174  def ref_hist_mappings_needed(self, dataset_name=None):
5175  """Check if we need to load and check the reference mappings.
5176 
5177  For data the reference histograms should be taken
5178  automatically from the GlobalTag, so we don't need any
5179  mappings. For RelVals we need to know a mapping to be used in
5180  the es_prefer code snippet (different references for each of
5181  the datasets.)
5182 
5183  WARNING: This implementation is a bit convoluted.
5184 
5185  """
5186 
5187  # If no dataset name given, do everything, otherwise check
5188  # only this one dataset.
5189  if not dataset_name is None:
5190  data_type = self.datasets_information[dataset_name] \
5191  ["datatype"]
5192  mappings_needed = (data_type == "mc")
5193  # DEBUG DEBUG DEBUG
5194  if not mappings_needed:
5195  assert data_type == "data"
5196  # DEBUG DEBUG DEBUG end
5197  else:
5198  tmp = [self.ref_hist_mappings_needed(dataset_name) \
5199  for dataset_name in \
5200  self.datasets_information.keys()]
5201  mappings_needed = (True in tmp)
5202 
5203  # End of ref_hist_mappings_needed.
5204  return mappings_needed
def ref_hist_mappings_needed
def cmsHarvester.run (   self)

Definition at line 5525 of file cmsHarvester.py.

References update.

5526  def run(self):
5527  "Main entry point of the CMS harvester."
5528 
5529  # Start with a positive thought.
5530  exit_code = 0
5531 
5532  try:
5533 
5534  try:
5535 
5536  # Parse all command line options and arguments
5537  self.parse_cmd_line_options()
5538  # and check that they make sense.
5539  self.check_input_status()
5540 
5541  # Check if CMSSW is setup.
5542  self.check_cmssw()
5543 
5544  # Check if DBS is setup,
5545  self.check_dbs()
5546  # and if all is fine setup the Python side.
5547  self.setup_dbs()
5548 
5549  # Fill our dictionary with all the required info we
5550  # need to understand harvesting jobs. This needs to be
5551  # done after the CMSSW version is known.
5552  self.setup_harvesting_info()
5553 
5554  # Obtain list of dataset names to consider
5555  self.build_dataset_use_list()
5556  # and the list of dataset names to ignore.
5557  self.build_dataset_ignore_list()
5558 
5559  # The same for the runs lists (if specified).
5560  self.build_runs_use_list()
5561  self.build_runs_ignore_list()
5562 
5563  # Process the list of datasets to ignore and fold that
5564  # into the list of datasets to consider.
5565  # NOTE: The run-based selection is done later since
5566  # right now we don't know yet which runs a dataset
5567  # contains.
5568  self.process_dataset_ignore_list()
5569 
5570  # Obtain all required information on the datasets,
5571  # like run numbers and GlobalTags.
5572  self.build_datasets_information()
5573 
5574  if self.use_ref_hists and \
5575  self.ref_hist_mappings_needed():
5576  # Load the dataset name to reference histogram
5577  # name mappings from file.
5578  self.load_ref_hist_mappings()
5579  # Now make sure that for all datasets we want to
5580  # process there is a reference defined. Otherwise
5581  # just bomb out before wasting any more time.
5582  self.check_ref_hist_mappings()
5583  else:
5584  self.logger.info("No need to load reference " \
5585  "histogram mappings file")
5586 
# OBSOLETE OBSOLETE OBSOLETE
def cmsHarvester.setup_dbs (   self)

Now we try to do a very simple DBS search.

If that works

instead of giving us the `Unsupported API call' crap, we

should be good to go.

NOTE: Not ideal, I know, but it reduces the amount of

complaints I get...

cmd = "dbs search --query=\"find dataset where dataset = impossible"" (status, output) = commands.getstatusoutput(cmd) pdb.set_trace() if status != 0 or \ output.lower().find("unsupported api call") > -1: self.logger.fatal("It seems DBS is not setup...") self.logger.fatal(" %s returns crap:" % cmd) for line in output.split("\n"): self.logger.fatal(" %s" % line) raise Error("ERROR: DBS needs to be setup first!")

Setup the Python side of DBS.

For more information see the DBS Python API documentation:
https://twiki.cern.ch/twiki/bin/view/CMS/DBSApiDocumentation

Definition at line 2393 of file cmsHarvester.py.

2394  def setup_dbs(self):
2395  """Setup the Python side of DBS.
2396 
2397  For more information see the DBS Python API documentation:
2398  https://twiki.cern.ch/twiki/bin/view/CMS/DBSApiDocumentation
2399 
2400  """
2401 
2402  try:
2403  args={}
2404  args["url"]= "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/" \
2405  "servlet/DBSServlet"
2406  api = DbsApi(args)
2407  self.dbs_api = api
2408 
2409  except DBSAPI.dbsApiException.DbsApiException as ex:
2410  self.logger.fatal("Caught DBS API exception %s: %s " % \
2411  (ex.getClassName(), ex.getErrorMessage()))
2412  if ex.getErrorCode() not in (None, ""):
2413  logger.debug("DBS exception error code: ", ex.getErrorCode())
2414  raise
2415 
2416  # End of setup_dbs.
def setup_dbs
Now we try to do a very simple DBS search.
def cmsHarvester.setup_harvesting_info (   self)
Fill our dictionary with all info needed to understand
harvesting.

This depends on the CMSSW version since at some point the
names and sequences were modified.

NOTE: There is no way (at least not that I could come up with)
to code this in a neat generic way that can be read both by
this method and by option_handler_list_types(). Please try
hard to keep these two methods in sync!

Definition at line 1206 of file cmsHarvester.py.

1207  def setup_harvesting_info(self):
1208  """Fill our dictionary with all info needed to understand
1209  harvesting.
1210 
1211  This depends on the CMSSW version since at some point the
1212  names and sequences were modified.
1213 
1214  NOTE: There is no way (at least not that I could come up with)
1215  to code this in a neat generic way that can be read both by
1216  this method and by option_handler_list_types(). Please try
1217  hard to keep these two methods in sync!
1218 
1219  """
1220 
1221  assert not self.cmssw_version is None, \
1222  "ERROR setup_harvesting() requires " \
1223  "self.cmssw_version to be set!!!"
1224 
1225  harvesting_info = {}
1226 
1227  # This is the version-independent part.
1228  harvesting_info["DQMOffline"] = {}
1229  harvesting_info["DQMOffline"]["beamspot"] = None
1230  harvesting_info["DQMOffline"]["eventcontent"] = None
1231  harvesting_info["DQMOffline"]["harvesting"] = "AtRunEnd"
1232 
1233  harvesting_info["RelVal"] = {}
1234  harvesting_info["RelVal"]["beamspot"] = None
1235  harvesting_info["RelVal"]["eventcontent"] = None
1236  harvesting_info["RelVal"]["harvesting"] = "AtRunEnd"
1237 
1238  harvesting_info["RelValFS"] = {}
1239  harvesting_info["RelValFS"]["beamspot"] = None
1240  harvesting_info["RelValFS"]["eventcontent"] = None
1241  harvesting_info["RelValFS"]["harvesting"] = "AtRunEnd"
1242 
1243  harvesting_info["MC"] = {}
1244  harvesting_info["MC"]["beamspot"] = None
1245  harvesting_info["MC"]["eventcontent"] = None
1246  harvesting_info["MC"]["harvesting"] = "AtRunEnd"
1247 
1248  # This is the version-dependent part. And I know, strictly
1249  # speaking it's not necessary to fill in all three types since
1250  # in a single run we'll only use one type anyway. This does
1251  # look more readable, however, and required less thought from
1252  # my side when I put this together.
1253 
1254  # DEBUG DEBUG DEBUG
1255  # Check that we understand our own version naming.
1256  assert self.cmssw_version.startswith("CMSSW_")
1257  # DEBUG DEBUG DEBUG end
1258 
1259  version = self.cmssw_version[6:]
1260 
1261  #----------
1262 
1263  # RelVal
1264  step_string = None
1265  if version < "3_3_0":
1266  step_string = "validationHarvesting"
1267  elif version in ["3_3_0_pre1", "3_3_0_pre2",
1268  "3_3_0_pre3", "3_3_0_pre4",
1269  "3_3_0_pre6", "3_4_0_pre1"]:
1270  step_string = "validationHarvesting"
1271  else:
1272  step_string = "validationHarvesting+dqmHarvesting"
1273 
1274  harvesting_info["RelVal"]["step_string"] = step_string
1275 
1276  # DEBUG DEBUG DEBUG
1277  # Let's make sure we found something.
1278  assert not step_string is None, \
1279  "ERROR Could not decide a RelVal harvesting sequence " \
1280  "for CMSSW version %s" % self.cmssw_version
1281  # DEBUG DEBUG DEBUG end
1282 
1283  #----------
1284 
1285  # RelVal
1286  step_string = "validationHarvestingFS"
1287 
1288  harvesting_info["RelValFS"]["step_string"] = step_string
1289 
1290  #----------
1291 
1292  # MC
1293  step_string = "validationprodHarvesting"
1294 
1295  harvesting_info["MC"]["step_string"] = step_string
1296 
1297  # DEBUG DEBUG DEBUG
1298  # Let's make sure we found something.
1299  assert not step_string is None, \
1300  "ERROR Could not decide a MC harvesting " \
1301  "sequence for CMSSW version %s" % self.cmssw_version
1302  # DEBUG DEBUG DEBUG end
1303 
1304  #----------
1305 
1306  # DQMOffline
1307  step_string = "dqmHarvesting"
1308 
1309  harvesting_info["DQMOffline"]["step_string"] = step_string
1310 
1311  #----------
1313  self.harvesting_info = harvesting_info
1314 
1315  self.logger.info("Based on the CMSSW version (%s) " \
1316  "I decided to use the `HARVESTING:%s' " \
1317  "sequence for %s harvesting" % \
1318  (self.cmssw_version,
1319  self.harvesting_info[self.harvesting_type]["step_string"],
1320  self.harvesting_type))
1321 
1322  # End of setup_harvesting_info.
def setup_harvesting_info
def cmsHarvester.show_exit_message (   self)
Tell the user what to do now, after this part is done.

This should provide the user with some (preferably
copy-pasteable) instructions on what to do now with the setups
and files that have been created.

Definition at line 5472 of file cmsHarvester.py.

5473  def show_exit_message(self):
5474  """Tell the user what to do now, after this part is done.
5475 
5476  This should provide the user with some (preferably
5477  copy-pasteable) instructions on what to do now with the setups
5478  and files that have been created.
5479 
5480  """
5481 
5482  # TODO TODO TODO
5483  # This could be improved a bit.
5484  # TODO TODO TODO end
5485 
5486  sep_line = "-" * 60
5487 
5488  self.logger.info("")
5489  self.logger.info(sep_line)
5490  self.logger.info(" Configuration files have been created.")
5491  self.logger.info(" From here on please follow the usual CRAB instructions.")
5492  self.logger.info(" Quick copy-paste instructions are shown below.")
5493  self.logger.info(sep_line)
5494 
5495  self.logger.info("")
5496  self.logger.info(" Create all CRAB jobs:")
5497  self.logger.info(" multicrab -create")
5498  self.logger.info("")
5499  self.logger.info(" Submit all CRAB jobs:")
5500  self.logger.info(" multicrab -submit")
5501  self.logger.info("")
5502  self.logger.info(" Check CRAB status:")
5503  self.logger.info(" multicrab -status")
5504  self.logger.info("")
5505 
5506  self.logger.info("")
5507  self.logger.info(" For more information please see the CMS Twiki:")
5508  self.logger.info(" %s" % twiki_url)
5509  self.logger.info(sep_line)
5510 
5511  # If there were any jobs for which we could not find a
5512  # matching site show a warning message about that.
5513  if not self.all_sites_found:
5514  self.logger.warning(" For some of the jobs no matching " \
5515  "site could be found")
5516  self.logger.warning(" --> please scan your multicrab.cfg" \
5517  "for occurrences of `%s'." % \
5518  self.no_matching_site_found_str)
5519  self.logger.warning(" You will have to fix those " \
5520  "by hand, sorry.")
5521 
5522  # End of show_exit_message.
def cmsHarvester.singlify_datasets (   self)
Remove all but the largest part of all datasets.

This allows us to harvest at least part of these datasets
using single-step harvesting until the two-step approach
works.

Definition at line 3743 of file cmsHarvester.py.

References mps_monitormerge.items, bookConverter.max, and makeHLTPrescaleTable.values.

3744  def singlify_datasets(self):
3745  """Remove all but the largest part of all datasets.
3746 
3747  This allows us to harvest at least part of these datasets
3748  using single-step harvesting until the two-step approach
3749  works.
3750 
3751  """
3752 
3753  # DEBUG DEBUG DEBUG
3754  assert self.harvesting_mode == "single-step-allow-partial"
3755  # DEBUG DEBUG DEBUG end
3756 
3757  for dataset_name in self.datasets_to_use:
3758  for run_number in self.datasets_information[dataset_name]["runs"]:
3759  max_events = max(self.datasets_information[dataset_name]["sites"][run_number].values())
3760  sites_with_max_events = [i[0] for i in self.datasets_information[dataset_name]["sites"][run_number].items() if i[1] == max_events]
3761  self.logger.warning("Singlifying dataset `%s', " \
3762  "run %d" % \
3763  (dataset_name, run_number))
3764  cmssw_version = self.datasets_information[dataset_name] \
3765  ["cmssw_version"]
3766  selected_site = self.pick_a_site(sites_with_max_events,
3767  cmssw_version)
3768 
3769  # Let's tell the user that we're manhandling this dataset.
3770  nevents_old = self.datasets_information[dataset_name]["num_events"][run_number]
3771  self.logger.warning(" --> " \
3772  "only harvesting partial statistics: " \
3773  "%d out of %d events (5.1%f%%) " \
3774  "at site `%s'" % \
3775  (max_events,
3776  nevents_old,
3777  100. * max_events / nevents_old,
3778  selected_site))
3779  self.logger.warning("!!! Please note that the number of " \
3780  "events in the output path name will " \
3781  "NOT reflect the actual statistics in " \
3782  "the harvested results !!!")
3783 
3784  # We found the site with the highest statistics and
3785  # the corresponding number of events. (CRAB gets upset
3786  # if we ask for more events than there are at a given
3787  # site.) Now update this information in our main
3788  # datasets_information variable.
3789  self.datasets_information[dataset_name]["sites"][run_number] = {selected_site: max_events}
3790  self.datasets_information[dataset_name]["num_events"][run_number] = max_events
3791  #self.datasets_information[dataset_name]["sites"][run_number] = [selected_site]
3792 
3793  # End of singlify_datasets.
def cmsHarvester.write_crab_config (   self)

def create_harvesting_config(self, dataset_name): """Create the Python harvesting configuration for a given job.

     NOTE: The reason to have a single harvesting configuration per
    sample is to be able to specify the GlobalTag corresponding to
    each sample. Since it has been decided that (apart from the
    prompt reco) datasets cannot contain runs with different
    GlobalTags, we don't need a harvesting config per run.         NOTE: This is the place where we distinguish between
    single-step and two-step harvesting modes (at least for the
    Python job configuration).         """         ###         if self.harvesting_mode == "single-step":
        config_contents = self.create_harvesting_config_single_step(dataset_name)
    elif self.harvesting_mode == "two-step":
        config_contents = self.create_harvesting_config_two_step(dataset_name)
    else:

Impossible harvesting mode, we should never get here.

assert False, "ERROR: unknown harvesting mode `%s'" % \ self.harvesting_mode ### # End of create_harvesting_config. return config_contents

Write a CRAB job configuration Python file.

Definition at line 5049 of file cmsHarvester.py.

References mergeVDriftHistosByStation.file.

5050  def write_crab_config(self):
5051  """Write a CRAB job configuration Python file.
5052 
5053  """
5054 
5055  self.logger.info("Writing CRAB configuration...")
5056 
5057  file_name_base = "crab.cfg"
5058 
5059  # Create CRAB configuration.
5060  crab_contents = self.create_crab_config()
5061 
5062  # Write configuration to file.
5063  crab_file_name = file_name_base
5064  try:
5065  crab_file = file(crab_file_name, "w")
5066  crab_file.write(crab_contents)
5067  crab_file.close()
5068  except IOError:
5069  self.logger.fatal("Could not write " \
5070  "CRAB configuration to file `%s'" % \
5071  crab_file_name)
5072  raise Error("ERROR: Could not write to file `%s'!" % \
5073  crab_file_name)
5074 
5075  # End of write_crab_config.
Helper class: Error exception.
def write_crab_config
def create_harvesting_config(self, dataset_name): &quot;&quot;"Create the Python harvesting configuration for a...
def cmsHarvester.write_harvesting_config (   self,
  dataset_name 
)
Write a harvesting job configuration Python file.

NOTE: This knows nothing about single-step or two-step
harvesting. That's all taken care of by
create_harvesting_config.

Definition at line 5107 of file cmsHarvester.py.

References create_harvesting_config_file_name(), and mergeVDriftHistosByStation.file.

5108  def write_harvesting_config(self, dataset_name):
5109  """Write a harvesting job configuration Python file.
5110 
5111  NOTE: This knows nothing about single-step or two-step
5112  harvesting. That's all taken care of by
5113  create_harvesting_config.
5114 
5115  """
5116 
5117  self.logger.debug("Writing harvesting configuration for `%s'..." % \
5118  dataset_name)
5119 
5120  # Create Python configuration.
5121  config_contents = self.create_harvesting_config(dataset_name)
5122 
5123  # Write configuration to file.
5124  config_file_name = self. \
5126  try:
5127  config_file = file(config_file_name, "w")
5128  config_file.write(config_contents)
5129  config_file.close()
5130  except IOError:
5131  self.logger.fatal("Could not write " \
5132  "harvesting configuration to file `%s'" % \
5133  config_file_name)
5134  raise Error("ERROR: Could not write to file `%s'!" % \
5135  config_file_name)
5136 
5137  # End of write_harvesting_config.
Helper class: Error exception.
def write_harvesting_config
def create_harvesting_config_file_name
def cmsHarvester.write_me_extraction_config (   self,
  dataset_name 
)
Write an ME-extraction configuration Python file.

This `ME-extraction' (ME = Monitoring Element) is the first
step of the two-step harvesting.

Definition at line 5140 of file cmsHarvester.py.

References create_me_summary_config_file_name(), and mergeVDriftHistosByStation.file.

5141  def write_me_extraction_config(self, dataset_name):
5142  """Write an ME-extraction configuration Python file.
5143 
5144  This `ME-extraction' (ME = Monitoring Element) is the first
5145  step of the two-step harvesting.
5146 
5147  """
5148 
5149  self.logger.debug("Writing ME-extraction configuration for `%s'..." % \
5150  dataset_name)
5151 
5152  # Create Python configuration.
5153  config_contents = self.create_me_extraction_config(dataset_name)
5154 
5155  # Write configuration to file.
5156  config_file_name = self. \
5158  try:
5159  config_file = file(config_file_name, "w")
5160  config_file.write(config_contents)
5161  config_file.close()
5162  except IOError:
5163  self.logger.fatal("Could not write " \
5164  "ME-extraction configuration to file `%s'" % \
5165  config_file_name)
5166  raise Error("ERROR: Could not write to file `%s'!" % \
5167  config_file_name)
5168 
5169  # End of write_me_extraction_config.
def write_me_extraction_config
Helper class: Error exception.
def create_me_summary_config_file_name
def cmsHarvester.write_multicrab_config (   self)
Write a multi-CRAB job configuration Python file.

Definition at line 5078 of file cmsHarvester.py.

References mergeVDriftHistosByStation.file.

5079  def write_multicrab_config(self):
5080  """Write a multi-CRAB job configuration Python file.
5081 
5082  """
5083 
5084  self.logger.info("Writing multi-CRAB configuration...")
5085 
5086  file_name_base = "multicrab.cfg"
5087 
5088  # Create multi-CRAB configuration.
5089  multicrab_contents = self.create_multicrab_config()
5090 
5091  # Write configuration to file.
5092  multicrab_file_name = file_name_base
5093  try:
5094  multicrab_file = file(multicrab_file_name, "w")
5095  multicrab_file.write(multicrab_contents)
5096  multicrab_file.close()
5097  except IOError:
5098  self.logger.fatal("Could not write " \
5099  "multi-CRAB configuration to file `%s'" % \
5100  multicrab_file_name)
5101  raise Error("ERROR: Could not write to file `%s'!" % \
5102  multicrab_file_name)
5103 
5104  # End of write_multicrab_config.
def write_multicrab_config
Helper class: Error exception.

Variable Documentation

string cmsHarvester.__author__ = "Jeroen Hegeman (jeroen.hegeman@cern.ch),"

Definition at line 38 of file cmsHarvester.py.

string cmsHarvester.__version__ = "3.8.2p1"

File : cmsHarvest.py Authors : Jeroen Hegeman (jeroe.nosp@m.n.he.nosp@m.geman.nosp@m.@cer.nosp@m.n.ch) Niklas Pietsch (nikla.nosp@m.s.pi.nosp@m.etsch.nosp@m.@des.nosp@m.y.de) Franseco Costanza (franc.nosp@m.esco.nosp@m..cost.nosp@m.anza.nosp@m.@desy.nosp@m..de) Last change: 20100308.

Purpose : Main program to run all kinds of harvesting. For more information please refer to the CMS Twiki url mentioned just below here.

Definition at line 37 of file cmsHarvester.py.

string cmsHarvester.action = "callback"

Definition at line 2056 of file cmsHarvester.py.

tuple cmsHarvester.all_file_names = files_info[run_number]

Definition at line 3232 of file cmsHarvester.py.

list cmsHarvester.all_t1
Initial value:
1 = [
2  "srm-cms.cern.ch",
3  "ccsrm.in2p3.fr",
4  "cmssrm-fzk.gridka.de",
5  "cmssrm.fnal.gov",
6  "gridka-dCache.fzk.de",
7  "srm-cms.gridpp.rl.ac.uk",
8  "srm.grid.sinica.edu.tw",
9  "srm2.grid.sinica.edu.tw",
10  "srmcms.pic.es",
11  "storm-fe-cms.cr.cnaf.infn.it"
12  ]

Definition at line 1723 of file cmsHarvester.py.

cmsHarvester.caf_access

Definition at line 1106 of file cmsHarvester.py.

cmsHarvester.callback = self.option_handler_input_Jsonrunfile,

Definition at line 2057 of file cmsHarvester.py.

Referenced by CaloDualConeSelector< HBHERecHit >.selectCallback(), CaloConeSelector< T >.selectCallback(), and edm::ESProducer.setWhatProduced().

cmsHarvester.castor_base_dir

Definition at line 1075 of file cmsHarvester.py.

tuple cmsHarvester.castor_dir = self.datasets_information[dataset_name]

CRAB

GRID

USER

Definition at line 4357 of file cmsHarvester.py.

tuple cmsHarvester.castor_path_common = self.create_castor_path_name_common(dataset_name)

DEBUG DEBUG DEBUG

This is probably only useful to make sure we don't muck

things up, right?

Figure out across how many sites this sample has been spread.

if num_sites == 1: self.logger.info(" sample is contained at a single site") else: self.logger.info(" sample is spread across %d sites" % \ num_sites) if num_sites < 1:

NOTE: This should not happen with any valid dataset.

self.logger.warning(" --> skipping dataset which is not " \ "hosted anywhere")

DEBUG DEBUG DEBUG end

Definition at line 5456 of file cmsHarvester.py.

tuple cmsHarvester.castor_paths
Initial value:
1 = dict(list(zip(runs,
2  [self.create_castor_path_name_special(dataset_name, i, castor_path_common) \
3  for i in runs])))
How EventSelector::AcceptEvent() decides whether to accept an event for output otherwise it is excluding the probing of A single or multiple positive and the trigger will pass if any such matching triggers are PASS or EXCEPTION[A criterion thatmatches no triggers at all is detected and causes a throw.] A single negative with an expectation of appropriate bit checking in the decision and the trigger will pass if any such matching triggers are FAIL or EXCEPTION A wildcarded negative criterion that matches more than one trigger in the trigger list("!*","!HLTx*"if it matches 2 triggers or more) will accept the event if all the matching triggers are FAIL.It will reject the event if any of the triggers are PASS or EXCEPTION(this matches the behavior of"!*"before the partial wildcard feature was incorporated).Triggers which are in the READY state are completely ignored.(READY should never be returned since the trigger paths have been run

Definition at line 5460 of file cmsHarvester.py.

cmsHarvester.castor_prefix = self.castor_prefix

Definition at line 4352 of file cmsHarvester.py.

string cmsHarvester.cmd = "rfstat %s"

self.logger.debug("Path is now `%s'" % \ path)

Definition at line 1632 of file cmsHarvester.py.

Referenced by WatcherStreamFileReader.closeFile(), RunManager.initG4(), and RunManagerMT.initG4().

cmsHarvester.cmssw_version = self.datasets_information[dataset_name]

Definition at line 4395 of file cmsHarvester.py.

list cmsHarvester.complete_sites
Initial value:
1 = [site for site in sites \
2  if site in sites_with_complete_copies]

site_names_ref = set(files_info[run_number].values()[0][1]) for site_names_tmp in files_info[run_number].values()[1:]: if set(site_names_tmp[1]) != site_names_ref: mirrored = False break

Definition at line 3277 of file cmsHarvester.py.

tuple cmsHarvester.config_builder = ConfigBuilder(config_options, with_input=True)

Definition at line 4795 of file cmsHarvester.py.

string cmsHarvester.config_contents = config_builder.pythonCfgCode

In case this file is the second step (the real harvesting

step) of the two-step harvesting we have to tell it to use

our local files.

if self.harvesting_mode == "two-step": castor_dir = self.datasets_information[dataset_name] \ ["castor_path"][run] customisations.append("") customisations.append("# This is the second step (the real") customisations.append("# harvesting step) of a two-step") customisations.append("# harvesting procedure.")

BUG BUG BUG

To be removed in production version.

customisations.append("import pdb")

BUG BUG BUG end

customisations.append("import commands") customisations.append("import os") customisations.append("castor_dir = \"s"" % castor_dir) customisations.append("cmd = "rfdir s" % castor_dir") customisations.append("(status, output) = commands.getstatusoutput(cmd)") customisations.append("if status != 0:") customisations.append(" print "ERROR"") customisations.append(" raise Exception, "ERROR"") customisations.append("file_names = [os.path.join("rfio:s" % path, i) for i in output.split() if i.startswith("EDM_summary") and i.endswith(".root")]") #customisations.append("pdb.set_trace()") customisations.append("process.source.fileNames = cms.untracked.vstring(*file_names)") customisations.append("")

Definition at line 4800 of file cmsHarvester.py.

cmsHarvester.config_file_name = self.create_me_summary_config_file_name(dataset_name)

Only add the alarming piece to the file name if this is

a spread-out dataset.

pdb.set_trace() if self.datasets_information[dataset_name] \ ["mirrored"][run_number] == False: config_file_name = config_file_name.replace(".py", "_partial.py")

Definition at line 4087 of file cmsHarvester.py.

list cmsHarvester.connect_name = self.frontier_connection_name["globaltag"]

Definition at line 4834 of file cmsHarvester.py.

dictionary cmsHarvester.country_codes
Initial value:
1 = {
2  "CAF" : "caf.cern.ch",
3  "CH" : "srm-cms.cern.ch",
4  "FR" : "ccsrm.in2p3.fr",
5  "DE" : "cmssrm-fzk.gridka.de",
6  "GOV" : "cmssrm.fnal.gov",
7  "DE2" : "gridka-dCache.fzk.de",
8  "UK" : "srm-cms.gridpp.rl.ac.uk",
9  "TW" : "srm.grid.sinica.edu.tw",
10  "TW2" : "srm2.grid.sinica.edu.tw",
11  "ES" : "srmcms.pic.es",
12  "IT" : "storm-fe-cms.cr.cnaf.infn.it"
13  }

Definition at line 1736 of file cmsHarvester.py.

string cmsHarvester.crab_config = "\n"

CRAB

GRID

USER

CMSSW

CAF

Definition at line 4307 of file cmsHarvester.py.

cmsHarvester.crab_submission

Definition at line 1133 of file cmsHarvester.py.

list cmsHarvester.customisations = [""]

Definition at line 4830 of file cmsHarvester.py.

tuple cmsHarvester.dataset_name_escaped = self.escape_dataset_name(dataset_name)

Definition at line 4351 of file cmsHarvester.py.

tuple cmsHarvester.dataset_names = self.datasets_to_use.keys()

Definition at line 4346 of file cmsHarvester.py.

cmsHarvester.dataset_names_after_checks = dataset_names_after_checks_tmp

Definition at line 4032 of file cmsHarvester.py.

tuple cmsHarvester.dataset_names_after_checks_tmp = copy.deepcopy(dataset_names_after_checks)

Definition at line 4025 of file cmsHarvester.py.

cmsHarvester.datasets_information

Definition at line 5343 of file cmsHarvester.py.

cmsHarvester.datasets_to_ignore

Definition at line 3459 of file cmsHarvester.py.

cmsHarvester.datasets_to_use

Definition at line 3433 of file cmsHarvester.py.

list cmsHarvester.datatype = self.datasets_information[dataset_name]

Definition at line 4784 of file cmsHarvester.py.

cmsHarvester.dbs_api

Definition at line 2406 of file cmsHarvester.py.

tuple cmsHarvester.empty_runs = dict(tmp)

Definition at line 4009 of file cmsHarvester.py.

tuple cmsHarvester.es_prefer_snippet = self.create_es_prefer_snippet(dataset_name)

Definition at line 4883 of file cmsHarvester.py.

int cmsHarvester.exit_code = 1

Definition at line 5693 of file cmsHarvester.py.

list cmsHarvester.file_name = handler.results["file.name"]

Definition at line 3176 of file cmsHarvester.py.

Referenced by HcalLutManager.create_lut_loader(), SiStripHistoPlotter.createStaticPlot(), DTTPGLutFile.open(), L1TriggerLutFile.open(), TEcnaRead.ReadAverageHighFrequencyNoise(), TEcnaRead.ReadAverageLowFrequencyNoise(), TEcnaRead.ReadAverageMeanCorrelationsBetweenSamples(), TEcnaRead.ReadAveragePedestals(), TEcnaRead.ReadAverageSigmaOfCorrelationsBetweenSamples(), TEcnaRead.ReadAverageTotalNoise(), TEcnaRead.ReadCorrelationsBetweenSamples(), TEcnaRead.ReadCovariancesBetweenSamples(), TEcnaRead.ReadHighFrequencyCorrelationsBetweenChannels(), TEcnaRead.ReadHighFrequencyCovariancesBetweenChannels(), TEcnaRead.ReadHighFrequencyMeanCorrelationsBetweenStins(), TEcnaRead.ReadHighFrequencyNoise(), TEcnaRead.ReadLowFrequencyCorrelationsBetweenChannels(), TEcnaRead.ReadLowFrequencyCovariancesBetweenChannels(), TEcnaRead.ReadLowFrequencyMeanCorrelationsBetweenStins(), TEcnaRead.ReadLowFrequencyNoise(), TEcnaRead.ReadMeanCorrelationsBetweenSamples(), TEcnaRead.ReadNumberOfEventsForSamples(), TEcnaRead.ReadPedestals(), TEcnaRead.ReadRelevantCorrelationsBetweenSamples(), TEcnaRead.ReadRootFileHeader(), TEcnaRead.ReadSampleAdcValues(), TEcnaRead.ReadSampleAdcValuesSameFile(), TEcnaRead.ReadSampleMeans(), TEcnaRead.ReadSampleSigmas(), TEcnaRead.ReadSigmaOfCorrelationsBetweenSamples(), TEcnaRead.ReadStinNumbers(), TEcnaRead.ReadTotalNoise(), and TEcnaRun.WriteRootFile().

tuple cmsHarvester.files_at_site
Initial value:
1 = [i for (i, (j, k)) \
2  in files_info[run_number].items() \
3  if site_name in k]

Definition at line 3236 of file cmsHarvester.py.

dictionary cmsHarvester.files_info = {}

Definition at line 3162 of file cmsHarvester.py.

list cmsHarvester.files_without_sites
Initial value:
1 = [i for (i, j) in \
2  files_info[run_number].items() \
3  if len(j[1]) < 1]

Definition at line 3202 of file cmsHarvester.py.

cmsHarvester.globaltag = self.datasets_information[dataset_name]

Definition at line 4787 of file cmsHarvester.py.

cmsHarvester.harvesting_info

Definition at line 1312 of file cmsHarvester.py.

cmsHarvester.harvesting_mode

Definition at line 2216 of file cmsHarvester.py.

cmsHarvester.harvesting_type

Definition at line 3860 of file cmsHarvester.py.

string cmsHarvester.help = "Jsonfile containing dictionary of run/lumisections pairs. "

Definition at line 2054 of file cmsHarvester.py.

string cmsHarvester.index = "site_%02d"

Definition at line 4379 of file cmsHarvester.py.

Referenced by TrackingTruthAccumulator.accumulateEvent(), edm::UnscheduledCallProducer::WorkerLookup.add(), edm::Principal.addAliasedProduct(), ora::InputRelationalData.addBlobData(), helper::ClusterStorer.addCluster(), ora::InputRelationalData.addData(), edm::IndexIntoFile.addEntry(), ora::InputRelationalData.addId(), pat::PackedTriggerPrescales.addPrescaledTrigger(), edm::Principal.addProduct_(), DetGroupMerger.addSameLevel(), edm::DataMixingTrackingParticleWorker.addTrackingParticlePileups(), edm::DataMixingTrackingParticleWorker.addTrackingParticleSignals(), PhysicsTools::TreeReader.addTypeMulti(), PhysicsTools::TreeReader.addTypeSingle(), HcalCovarianceMatrices.addValues(), HcalCholeskyMatrices.addValues(), CastorCondObjectContainer< Item >.addValues(), HcalCondObjectContainer< Item >.addValues(), ora::InputRelationalData.addWhereId(), ora::SelectOperation.addWhereId(), edm::Principal.adjustIndexesAfterProductRegistryAddition(), AlignmentMonitorMuonResiduals.afterAlignment(), reco::Conversion.algoByName(), HcalGeometry.alignmentBarEndForIndexLocal(), HcalGeometry.alignmentOuterIndexLocal(), HcalGeometry.alignmentTransformIndexLocal(), EcalEndcapGeometry.alignmentTransformIndexLocal(), EcalBarrelGeometry.alignmentTransformIndexLocal(), JetPlotsExample< Jet >.analyze(), JetAnaPythia< Jet >.analyze(), evf::ExceptionGenerator.analyze(), CaloTowerAnalyzer.analyze(), TrackCategoriesAnalyzer.analyze(), TrackingParticleCategoriesAnalyzer.analyze(), EwkMuDQM.analyze(), EwkElecDQM.analyze(), L1uGTTreeProducer.analyze(), PixelVTXMonitor.analyze(), HcalDetDiagNoiseMonitor.analyze(), SimplePhotonAnalyzer.analyze(), VertexHistoryAnalyzer.analyze(), EwkDQM.analyze(), SVTagInfoValidationAnalyzer.analyze(), TrackHistoryAnalyzer.analyze(), recoBSVTagInfoValidationAnalyzer.analyze(), MuonAlignmentAnalyzer.analyze(), BTVHLTOfflineSource.analyze(), GeneralHLTOffline.analyze(), HLTrigReport.analyze(), FourVectorHLT.analyze(), HcalRecHitsAnalyzer.analyze(), HcalRecHitsValidation.analyze(), HeavyFlavorValidation.analyze(), EcalLaserAnalyzerYousi.analyze(), HcalRaddamMuon.analyze(), DetIdSelectorTest.analyze(), OverlapProblemTPAnalyzer.analyze(), PhotonValidator.analyze(), HcalHBHEMuonAnalyzer.analyze(), HLTriggerJSONMonitoring.analyze(), GenPurposeSkimmerData.analyze(), DiJetAnalyzer.analyze(), TriggerJSONMonitoring.analyze(), GammaJetAnalysis.analyze(), TrackerDpgAnalysis.analyze(), GenHFHadronMatcher.analyzeMothers(), CSCConditions.anodeBXoffset(), TKinFitter.applyDeltaA(), TKinFitter.applyDeltaY(), gen::PhotosInterface.applyToVertex(), RPCConeBuilder.areConnected(), HGCalDDDConstants.assignCell(), TShapeAnalysis.assignChannel(), reco::PFDisplacedVertexCandidate.associatedElements(), reco::PFBlock.associatedElements(), reco::btag::TrackData.associatedToVertex(), TrackerHitAssociator.associateFastRecHit(), VEcalCalibBlock.at(), attrEscape(), BackgroundHandler.BackgroundHandler(), TkStripMeasurementDet.badStripBlocks(), TkStripMeasurementDet.badStripCuts(), HcalTBTiming.BeamCoincidenceHits(), RPCSeedPattern.BestRefRecHit(), HcalTBTiming.BH1Hits(), HcalTBTiming.BH2Hits(), HcalTBTiming.BH3Hits(), HcalTBTiming.BH4Hits(), RPCRecHitValid.bookHistograms(), Phase2OTBarrelLayerBuilder.build(), PixelBarrelLayerBuilder.build(), TOBLayerBuilder.build(), FWCaloRecHitDigitSetProxyBuilder.build(), FWPRCaloTowerProxyBuilder.build(), FWPCaloHitProxyBuilder.build(), FWPFEcalRecHitRPProxyBuilder.build(), FWSimpleProxyBuilder.build(), FWPFEcalRecHitLegoProxyBuilder.build(), MuonSeedBuilder.build(), gen::AMPTHadronizer.build_ampt(), gen::HijingHadronizer.build_hijing(), gen::HydjetHadronizer.build_hyjet(), gen::Hydjet2Hadronizer.build_hyjet2(), SiStripFedCabling.buildFedCabling(), XMLConfigReader.buildGP(), pos::PixelNameTranslation.buildROCsFromFEDChannel(), FWSimpleProxyBuilder.buildViewType(), evf::EvFDaqDirector.bumpFile(), LocalCacheFile.cache(), Averages.calc(), CSCTFPtLUT.calcPt(), MedianCommonModeCalculator.calculateCommonMode(), dqmTnP::AbstractFitter.calculateEfficiency(), PileUpSubtractor.calculateOrphanInput(), npstat::ArrayND< Numeric, StackLen, StackDim >.cdfValue(), HcalDDDGeometry.cellGeomPtr(), HcalGeometry.cellGeomPtr(), FWCollectionSummaryTableManager.cellRenderer(), CSCConditions.chamberTimingCorrection(), PrimaryVertexAssignment.chargedHadronVertex(), PFPileUpAlgo.chargedHadronVertex(), PFIsolationEstimator.chargedHadronVertex(), PFPhotonIsolationCalculator.chargedHadronVertex(), RPCSeedPattern.checkSegmentAlgorithmSpecial(), RPCSeedPattern.checkSimplePattern(), TiXmlHandle.Child(), TiXmlHandle.ChildElement(), CSCConditions.chipCorrection(), FWModelContextMenuHandler.chosenItem(), TrajectorySegmentBuilder.cleanCandidates(), SeedClusterRemover.cleanup(), HLTTrackClusterRemoverNew.cleanup(), HITrackClusterRemover.cleanup(), FWFromSliceSelector.clear(), MuonMillepedeAlgorithm.collect(), ora::MappingRules.columnNameForOID(), GeometricDet.component(), HFShower.compute(), HDShower.compute(), TShapeAnalysis.computeShape(), EcalUncalibRecHitRatioMethodAlgo< C >.computeTime(), EcalTPGParamBuilder.computeWeights(), L1GtVmeWriterCore.condIndex2reg(), PrimitiveConverter.convert(), VirtualJetProducer.copyConstituents(), MuonAlignmentFromReference.correctBField(), L1RCTParameters.correctedTPGSum(), reco::GsfComponent5D.covariance(), edm::WaitingTaskList.createNode(), CSCConditions.crossTalk(), CSCConditions.crosstalkIntercept(), CSCConditions.crosstalkSlope(), L1MuGMTPSB.CSCMuon(), CSCTFSectorProcessor.CSCTFSectorProcessor(), customizeTrackingMonitorSeedNumber.customise_trackMon_IterativeTracking_PHASE1(), customizeTrackingMonitorSeedNumber.customise_trackMon_IterativeTracking_PHASE1PU140(), customizeTrackingMonitorSeedNumber.customise_trackMon_IterativeTracking_PHASE1PU70(), pat::MuonSelector.customSelection_(), pat::ElectronSelector.customSelection_(), HLTConfigData.datasetIndex(), reco::PFDisplacedVertexCandidate.dcaPoint(), evf::FastMonitoringService::Encoding.decode(), EcalShapeBase.derivative(), SurveyDet.derivatives(), SiStripLorentzAngleCalibration.derivatives(), SiPixelLorentzAngleCalibration.derivatives(), SiStripBackplaneCalibration.derivatives(), FWEventItem.destroy(), cscdqm::Detector.Detector(), HcalTopology.detId2denseIdCALIB(), HcalTopology.detId2denseIdHT(), TkStripMeasurementDet.detSet(), GenericMVAJetTagComputer.discriminator(), CandidateBoostedDoubleSecondaryVertexComputer.discriminator(), reco::PFBlock.dist(), reco::PFDisplacedVertexCandidate.dist(), CachedTrajectory.distance(), FWHistSliceSelector.doSelect(), FWHFTowerSliceSelector.doSelect(), FWHistSliceSelector.doUnselect(), FWHFTowerSliceSelector.doUnselect(), RPCEfficiencySecond.dqmEndJob(), L1MuGMTPSB.DTBXMuon(), DTROSWordType.DTROSWordType(), JME::JetResolutionObject.dump(), TKStatus.dumpTkDcsStatus(), BeamFitter.dumpTxtFile(), EgammaTowerIsolationNew< NC >.EgammaTowerIsolationNew(), FWModelChangeManager.endChanges(), HcalDetDiagNoiseMonitor.endLuminosityBlock(), EcalEleCalibLooper.endOfLoop(), EcalSimHitsValidProducer.energyInEBMatrix(), EcalSimHitsValidProducer.energyInEEMatrix(), EcalBarrelSimHitsValidation.energyInMatrixEB(), EcalEndcapSimHitsValidation.energyInMatrixEE(), edm::RootTree.entryNumberForIndex(), edm::RefVector< C, T, F >.erase(), edm::RefVectorBase< key_type >.eraseAtIndex(), MuonAlignmentFromReference.eraseNotSelectedResiduals(), HLTBitVariable.eval(), TrackClassifierByProxy< Collection >.evaluate(), VertexClassifierByProxy< reco::SecondaryVertexTagInfoCollection >.evaluate(), L1GtMuonCondition.evaluateCondition(), l1t::MuCondition.evaluateCondition(), L1GtCaloCondition.evaluateCondition(), l1t::CaloCondition.evaluateCondition(), JME::JetResolutionObject.evaluateFormula(), PFPhotonAlgo.EvaluateGCorrMVA(), PFPhotonAlgo.EvaluateResMVA(), ora.existAttribute(), CastorQIEShape.expand(), HcalQIEShape.expand(), ClusterShapeAlgo.fast_AbsZernikeMoment(), EcalClusterToolsT< noZS >.fast_AbsZernikeMoment(), MeasurementDet.fastMeasurements(), pos::PixelCalibConfiguration.fedCardsAndChannels(), SiStripFedCabling.fedConnections(), MuonAlignmentFromReference.fiducialCuts(), CmsShowSearchFiles.fileEntryChanged(), VpspScanTask.fill(), PedestalsTask.fill(), ChannelPattern.Fill(), PedsFullNoiseTask.fill(), PhysicsTools::TreeReader::Value.fill(), HMassResolutionVSPart.Fill(), PFClusterShapeAlgo.fill5x5Map(), TrackingMaterialPlotter.fill_gradient(), FWCandidateTowerProxyBuilder.fillCaloData(), FWPFCandidateTowerProxyBuilder.fillCaloData(), FWCaloTowerProxyBuilderBase.fillCaloData(), FWHFTowerProxyBuilderBase.fillCaloData(), MixCollectionValidation.fillCaloHitTime(), reco.fillCovariance(), GenParticleProducer.fillDaughters(), EcalSimHitsValidProducer.fillEBMatrix(), EcalBarrelSimHitsValidation.fillEBMatrix(), EcalSimHitsValidProducer.fillEEMatrix(), EcalEndcapSimHitsValidation.fillEEMatrix(), MixCollectionValidation.fillGenParticleMulti(), ListGroups.fillGradient(), EwkElecTauHistManager.fillHistograms(), EwkMuTauHistManager.fillHistograms(), Py8toJetInputHEPEVT.fillJetAlgoInput(), EcalTrigPrimFunctionalAlgo.fillMap(), JetMETHLTOfflineSource.fillMEforTriggerNTfired(), CmsShowModelPopup.fillModelPopup(), MixCollectionValidation.fillMultiplicity(), reco::Mustache.FillMustacheVar(), HcalDeadCellMonitor.fillNevents_recentdigis(), HcalDeadCellMonitor.fillNevents_recentrechits(), MuonAlignmentFromReference.fillNtuple(), fillPathSummary(), FWTGeoRecoGeometry::Info.fillPoints(), FWRecoGeometryESProducer.fillPoints(), edm::IndexIntoFile.fillRunOrLumiIndexes(), MixCollectionValidation.fillSimHitTime(), HLTRHemisphere.filter(), DYGenFilter.filter(), FWFileEntry.filterEventsWithCustomParser(), ecaldqm::LedTask.filterRunType(), find(), EcalRecHitsValidation.findBarrelMatrix(), CalibratableTest.findCandidatesInDeltaR(), TagInfoMVACategorySelector.findCategory(), HGCalDDDConstants.findCell(), FWColorRow.FindColorIndex(), EcalRecHitsValidation.findEndcapMatrix(), L1GctHardwareJetFinder.findFinalClusters(), impl.findIndex(), SiStripProcessedRawDigiProducer.findInput(), TrackingMaterialAnalyser.findLayer(), CompositeTECPetal.findPar(), PFClusterShapeAlgo.findPFRHIndexFromDetId(), CalibratableTest.findPrimarySimParticles(), edm::Principal.findProductByLabel(), FFTJetPFPileupCleaner.findSomeVertexWFakes(), sistrip::MeanAndStdDev.fit(), ora::MappingRules.fkNameForIdentity(), reco::TemplatedSecondaryVertexTagInfo< reco::CandIPTagInfo, reco::VertexCompositePtrCandidate >.flightDirection(), reco::TemplatedSecondaryVertexTagInfo< reco::CandIPTagInfo, reco::VertexCompositePtrCandidate >.flightDistance(), MuonSeedBuilder.foundMatchingSegment(), tauImpactParameter::TrackHelixVertexFitter.freeParName(), npstat::ArrayND< Numeric, StackLen, StackDim >.functorFill(), CSCDBGains.gain(), CSCConditions.gain(), GammaSeries(), CSCConditions.gasGainCorrection(), gen::Hydjet2Hadronizer.generatePartonsAndHadronize(), JetTagComputer::TagInfoHelper.get(), l1t::PhysicsToBitConverter.Get32bitWordLinkEven(), l1t::PhysicsToBitConverter.Get32bitWordLinkOdd(), pat::EventHypothesis.getAs(), TkStripMeasurementDet.getBadStripBlocks(), JetTagComputer::TagInfoHelper.getBase(), SiStripDelay.getBaseDelay(), DTMuonMillepede.getbcsMatrix(), HcalRechitIsoCalculator.getBkgSubHcalRechitIso(), edm::PrincipalGetAdapter.getBranchDescription(), edm::Principal.getByToken(), edm::PrincipalGetAdapter.getByToken_(), Gflash.getCalorimeterNumber(), DTMuonMillepede.getCcsMatrix(), HBHELinearMap.getChannelTriple(), CaloSubdetectorGeometry.getClosestCell(), LMFDefFabric.getColor(), GenericMVAComputerCache.getComputer(), VirtualJetProducer.getConstituents(), npstat::LinInterpolatedTableND< Numeric, Axis >.getCoords(), ECFAdder.getECF(), UCTCTP7RawData.getET(), edm::Principal.getExistingProduct(), UCTCTP7RawData.getFB(), UCTCTP7RawData.getFeatureIndex(), HcalRechitIsoCalculator.getHcalRechitIso(), UCTCTP7RawData.getHFFeatureBits(), SiStripCorrelateBadStripAndNoise.getHisto(), SiStripPlotGain.getHisto(), SiStripCorrelateNoise.getHisto(), Fp420AnalysisHistManager.GetHisto(), BscAnalysisHistManager.GetHisto(), Fp420AnalysisHistManager.GetHisto2(), BscAnalysisHistManager.GetHisto2(), SiStripCorrelateBadStripAndNoise.getHistos(), SiStripPlotGain.getHistos(), SiStripCorrelateNoise.getHistos(), EcalBarrelSimHitsValidation.getIdsAroundMax(), EcalEndcapSimHitsValidation.getIdsAroundMax(), UCTCTP7RawData.getIndex(), ecaldqm::MESetMulti.getIndex(), SiStripDelay.getLabelName(), SiStripGain.getLabelName(), UCTCTP7RawData.getLinkStatus(), edm::Principal.getManyByType(), MuonAssociatorByHitsHelper.getMatchedIds(), TKinFitter.getMeasParticle(), ClusterSummary.getModule(), SoftElectronMVAEstimator.GetMVABin(), edm::ProducerSourceBase.getNextItemType(), HcalDbOnline.getObject(), SiStripLorentzAngleCalibration.getParameter(), SiPixelLorentzAngleCalibration.getParameter(), SiStripBackplaneCalibration.getParameter(), SiStripLorentzAngleCalibration.getParameterError(), SiPixelLorentzAngleCalibration.getParameterError(), SiStripBackplaneCalibration.getParameterError(), SiStripBackplaneCalibration.getParameterForDetId(), SiPixelLorentzAngleCalibration.getParameterForDetId(), SiStripLorentzAngleCalibration.getParameterForDetId(), TkModuleGroupSelector.getParameterIndexFromDetId(), DatabasePDG.GetPDGParticleByIndex(), DatabasePDG.GetPDGParticleStatusByIndex(), pat::PackedTriggerPrescales.getPrescaleForIndex(), pat::PackedTriggerPrescales.getPrescaleForName(), edm::Principal.getProductHolder(), edm::Principal.getProductHolderByIndex(), SiStripGain.getRange(), SiStripDelay.getRcdName(), SiStripGain.getRcdName(), UCTCTP7RawData.getRegionSummary(), GBRTree2D.GetResponse(), CaloGeometry.getSubdetectorGeometry(), UCTCTP7RawData.getSummaryIndex(), SiStripGain.getTagNorm(), SiStripDelay.getTagSign(), HcalTestNumberingScheme.getUnitID(), HcalNumberingScheme.getUnitID(), ZdcNumberingScheme.getUnitID(), HGCNumberingScheme.getUnitID(), CastorNumberingScheme.getUnitID(), TKinFitter.getUnmeasParticle(), CaloGeometry.getValidDetIds(), HcalCovarianceMatrices.getValues(), CastorCondObjectContainer< Item >.getValues(), HcalCholeskyMatrices.getValues(), HcalCondObjectContainer< Item >.getValues(), CachedTrajectory.getWideTrajectory(), gen::Herwig6Instance.give(), CmsShowNavigator.goToRunEvent(), TIDLayer.groupedCompatibleDetsV(), Phase2OTEndcapLayer.groupedCompatibleDetsV(), reco::GsfComponent5D.GsfComponent5D(), TkStripMeasurementDet.hasAny128StripBad(), CastorCondObjectContainer< Item >.hashed_id(), HcalRecHitsClient.HcalRecHitsEndjob(), HcalRecHitsDQMClient.HcalRecHitsEndjob(), HFShowerFibreBundle.HFShowerFibreBundle(), HFShowerPMT.HFShowerPMT(), TEcnaHistos.HistoPlot(), HLTSummaryFilter.hltFilter(), CmsShowSearchFiles.hyperlinkClicked(), MuonSeedBuilder.IdentifyShowering(), HcalCondObjectContainerBase.indexFor(), edm::InputTag.indexFor(), HcalHPDRBXMap.indexHPD(), triggerExpression::PathReader.init(), TShapeAnalysis.init(), module.init(), edm::EventProcessor.init(), pftools::LinearCalibrator.initEijMatrix(), Thrust.initialAxis(), CSCTFSectorProcessor.initialize(), FWColorManager.initialize(), MuonAlignmentFromReference.initialize(), edm::StreamSchedule.initializeEarlyDelete(), edm::ProductRegistry.initializeLookupTables(), MultiGaussianStateTransform.innerMultiState1D(), SimpleVFATFrameCollection.Insert(), MuonDigiCollection< CSCDetId, CSCCLCTDigi >.insertDigi(), SimpleVFATFrameCollection.InsertEmptyFrame(), edm::RootTree.insertEntryForIndex(), MagneticFieldGrid.interpolateAtPoint(), TkPixelMeasurementDet.isActive(), TkStripMeasurementDet.isActive(), TkPixelMeasurementDet.isEmpty(), TkStripMeasurementDet.isEmpty(), HLTInclusiveVBFSource.isHLTPathAccepted(), JetMETHLTOfflineSource.isHLTPathAccepted(), CSCConditions.isInBadChamber(), TkStripMeasurementDet.isMasked(), btag::Matching< Delta >.isMatched1st(), btag::Matching< Delta >.isMatched2nd(), RPCCosmicSeedrecHitFinder.isouterLayer(), HLTInclusiveVBFSource.isTriggerObjectFound(), JetMETHLTOfflineSource.isTriggerObjectFound(), QuadrupletSeedMerger.isValidQuadruplet(), CSCDBGasGainCorrection.item(), CSCDBChipSpeedCorrection.item(), CSCDBGains.item(), CSCDBNoiseMatrix.item(), CSCDBPedestals.item(), CSCDBCrosstalk.item(), CSCChamberTimeCorrections.item(), gen::JetMatchingMadgraph.JetMatchingMadgraph(), gen::JetMatchingMGFastJet.JetMatchingMGFastJet(), L1AcceptBunchCrossing.L1AcceptBunchCrossing(), edm::EDConsumerBase.labelsForToken(), TrackQuality.layer(), LayerTriplets.layers(), npstat::ArrayND< Numeric, StackLen, StackDim >.linearValue(), npstat::ArrayND< Numeric, StackLen, StackDim >.linearValueAt(), CSCDBCrosstalk.linter(), python.diff_provenance.difference.list_diff(), python.diffProv.difference.list_diff(), dbUtil.dbUtil.listIndex(), CaloTPGTranscoderULUT.loadHCALCompress(), fftjetcms::FFTJetInterface.loadInputCollection(), HGCalDDDConstants.locateCell(), QualityCutsAnalyzer.LoopOverJetTracksAssociation(), CSCDBCrosstalk.lslope(), HcalTBTiming.M1Hits(), HcalTBTiming.M2Hits(), HcalTBTiming.M3Hits(), main(), L1GctHardwareJetFinder.makeProtoJet(), edm.makeRefTo(), edmNew.makeRefTo(), edm.makeRefToDetSetRefVector(), TemplatedSecondaryVertexProducer< IPTI, VTX >.markUsedTracks(), edm::ContainerMask< T >.mask(), pf2pat::TopProjectorAlgo< Top, Bottom >.maskAncestors(), HGCalDDDConstants.maxCells(), HGCalDDDConstants.maxRows(), ecaldqm::MESetMulti.MESetMulti(), FWProxyBuilderBase.modelChanges(), HLTConfigData.moduleIndex(), edm::EDConsumerBase.modulesDependentUpon(), HGCalDDDConstants.modulesInit(), MuonScenarioBuilder.moveCSCSectors(), MuonScenarioBuilder.moveDTSectors(), MultiGaussianStateTransform.multiState1D(), MuonGeometrySanityCheckPoint.MuonGeometrySanityCheckPoint(), ElectronMVAEstimator.mva(), HcalDDDGeometry.newCell(), HGCalDDDConstants.newCell(), HcalGeometry.newCell(), FWGUIEventDataAdder.newIndexSelected(), ora::MappingRules.newNameForSchemaObject(), cscdqm::Detector.NextAddressBoxByPartition(), CSCConditions.noiseMatrix(), GoldenPattern.normalise(), HGCalDDDConstants.numberCells(), HcalQIECoder.offset(), CastorQIECoder.offset(), VoronoiSubtractor.offsetCorrectJets(), FWGUIManager.open3DRegion(), FWDetailViewManager.openDetailViewFor(), EcalShapeBase.operator()(), Grid3D.operator()(), reco::tau::Combinatoric< T >::ValueAccessor.operator()(), operator<<(), PhiMemoryImage.operator[](), SeedingLayerSetsHits.operator[](), MultiGaussianStateTransform.outerMultiState1D(), Phase2OTEndcapLayer.overlapInR(), TIDLayer.overlapInR(), l1t::stage1::RCTEmRegionPacker.pack(), AlignmentSurfaceDeformations.parameters(), MuonAlignmentFromReference.parseReference(), CaloTowerConstituentsMapBuilder.parseTextMap(), ParticleDecayProducer.ParticleDecayProducer(), CSCDBPedestals.pedestal(), CSCConditions.pedestal(), CSCDBPedestals.pedestal_rms(), CSCConditions.pedestalSigma(), PFTauMVAInputDiscriminantTranslator.PFTauMVAInputDiscriminantTranslator(), HcalTopology.phiBin(), pos::PixelCalibConfiguration.PixelCalibConfiguration(), pos::PixelNameTranslation.PixelNameTranslation(), pftools::LinearCalibrator.populateDetElIndex(), SiStripRegionCabling.position(), edm::service::Timing.postEvent(), edm::service::Timing.preEvent(), gbl::GblTrajectory.prepare(), MatrixInjector.MatrixInjector.prepare(), print_trigger_candidates(), print_trigger_collection(), pf2pat::TopProjectorAlgo< Top, Bottom >.printAncestors(), SiStripFedCabling.printDebug(), TShapeAnalysis.printshapeData(), SiStripFedCabling.printSummary(), ErsatzMEt.probeFinder(), PFAlgoTestBenchElectrons.processBlock(), PFAlgo.processBlock(), l1t::Stage1Layer2EGammaAlgorithmImpPP.processEvent(), HcalBeamMonitor.processEvent(), l1t::Stage1Layer2EGammaAlgorithmImpHI.processEvent(), l1t::Stage1Layer2EGammaAlgorithmImpHW.processEvent(), QualityFilter.produce(), PseudoTopProducer.produce(), TrackListCombiner.produce(), JetTracksAssociationDRCalo.produce(), MuonTrackProducer.produce(), GenParticlePruner.produce(), SecondaryVertexTagInfoProxy.produce(), reco::CorrectedJetProducer< T >.produce(), cms::JetCorrectionProducer< T >.produce(), HLTTauMCProducer.produce(), edm::LogErrorHarvester.produce(), JetSubstructurePacker.produce(), pat::PATConversionProducer.produce(), ScalersRawToDigi.produce(), TrackCandidateProducer.produce(), pat::PATMuonProducer.produce(), cms::DigitizerFP420.produce(), HLTScoutingMuonProducer.produce(), HLTScoutingEgammaProducer.produce(), pat::PATTriggerMatchEmbedder< PATObjectType >.produce(), JetDeltaRTagInfoValueMapProducer< T, I >.produce(), pat::PATElectronProducer.produce(), reco::PhysObjectMatcher< C1, C2, S, D, Q >.produce(), JetDeltaRValueMapProducer< T, C >.produce(), ecaldqm::OccupancyClient.producePlots(), MuonDigiCollection< CSCDetId, CSCCLCTDigi >.put(), TShapeAnalysis.putAllVals(), MagneticFieldGrid.putCoordGetInd(), MagneticFieldGrid.putIndGetCoord(), SiPixelGenError.qbin(), SiStripTemplate.qbin(), SiPixelTemplate.qbin(), SiPixelTemplate.qbin_dist(), HGCDigiProducer.randomEngine(), HcalDigiProducer.randomEngine(), HcalTBDigiProducer.randomEngine(), SiStripDigitizer.randomEngine(), CastorDigiProducer.randomEngine(), cms::SiPixelDigitizer.randomEngine(), EcalDigiProducer.randomEngine(), EcalMixingModuleValidation.randomEngine(), TkStripMeasurementDet.rawId(), L1CaloRegionDetId.rctCard(), DQMRootSource.readElements(), CSCTFSectorProcessor.readParameters(), XMLConfigReader.readPatterns(), MuonAlignmentFromReference.readTmpFiles(), heppy::ReclusterJets.ReclusterJets(), edm::Principal.recombine(), fwlite::Record.Record(), edm::EDConsumerBase.recordConsumes(), GenParticlePruner.recursiveFlagDaughters(), GenParticlePruner.recursiveFlagMothers(), SiStripRegionCabling.region(), lhef::LHEEvent.removeParticle(), edmplugin::CacheParser.replaceSpaces(), BackgroundHandler.rescale(), FWEventItem.resetColor(), resetColors(), FWColorPopup.ResetColors(), edm::StreamSchedule.resetEarlyDelete(), edmplugin::CacheParser.restoreSpaces(), PhotonOfflineClient.retrieveHisto(), ZeeCalibration.ringNumberCorrector(), CSCDBCrosstalk.rinter(), FWCollectionSummaryTableManager.rowHeader(), L1MuGMTPSB.RPCMuon(), CSCDBCrosstalk.rslope(), EcalTrigPrimFunctionalAlgo.run_part2(), FWEventItem.runFilter(), DTOccupancyTest.runOccupancyTest(), ecaldqm::LedTask.runOnDigis(), ecaldqm::LedTask.runOnPnDigis(), ecaldqm::LedTask.runOnRawData(), ecaldqm::LedTask.runOnUncalibRecHits(), PFPhotonAlgo.RunPFPhoton(), HcalTBTiming.S1Hits(), HcalTBTiming.S2Hits(), HcalTBTiming.S3Hits(), HcalTBTiming.S4Hits(), HcalDetDiagNoiseMonitor.SaveRates(), FWCaloRecHitDigitSetProxyBuilder.scaleProduct(), FWPFEcalRecHitRPProxyBuilder.scaleProduct(), reco::TemplatedSecondaryVertexTagInfo< reco::CandIPTagInfo, reco::VertexCompositePtrCandidate >.secondaryVertex(), HLTL1TSeed.seedsL1TriggerObjectMaps(), RPCSeedPattern.SegmentAlgorithmSpecial(), FWViewContextMenuHandlerGL.select(), HLTEventSelector.select(), FWModelExpressionSelector.select(), MuonAlignmentFromReference.selectResidualsPeaks(), CmsShowSearchFiles.sendToWebBrowser(), TkStripMeasurementDet.set128StripStatus(), l1t::PhysicsToBitConverter.Set32bitWordLinkEven(), l1t::PhysicsToBitConverter.Set32bitWordLinkOdd(), TkPixelMeasurementDet.setActive(), TkPixelMeasurementDet.setActiveThisEvent(), TkStripMeasurementDet.setActiveThisEvent(), TkStripMeasurementDet.setActiveThisPeriod(), ElectronDqmAnalyzerBase.setBookIndex(), ElectronDqmHarvesterBase.setBookIndex(), CalibrationInterface< CategoryT, CalibDataT >.setCalibData(), Vispa.Main.Application.Application.setCurrentTabController(), FWEventItem.setDefaultDisplayProperties(), FWEventItem.setDisplayProperties(), edm::Path.setEarlyDeleteHelpers(), TkPixelMeasurementDet.setEmpty(), TkStripMeasurementDet.setEmpty(), SimpleL1MuGMTCand.setEta(), FWPSetTableManager.setExpanded(), Particle.SetFirstDaughterIndex(), HBHEStatusBitSetter.SetFlagsFromDigi(), HBHEStatusBitSetter.SetFlagsFromRecHits(), RPCLogCone.setIdx(), edm::RootInputFileSequence.setIndexIntoFile(), Particle.SetLastDaughterIndex(), reco::PFBlock.setLink(), reco::PFDisplacedVertexCandidate.setLink(), PFElectronAlgo.SetLinks(), HcalQIECoder.setOffset(), CastorQIECoder.setOffset(), SiStripLorentzAngleCalibration.setParameter(), SiPixelLorentzAngleCalibration.setParameter(), SiStripBackplaneCalibration.setParameter(), SiStripLorentzAngleCalibration.setParameterError(), SiPixelLorentzAngleCalibration.setParameterError(), SiStripBackplaneCalibration.setParameterError(), fit::RootMinuit< Function >.setParameters(), DreamSD.setPbWO2MaterialProperties_(), SimpleL1MuGMTCand.setPhi(), reco::PFTrack.setPoint(), SimpleL1MuGMTCand.setPt(), edm::OwnArray< T, MAX_SIZE, P >.setPtr(), edm::OwnVector< T, P >.setPtr(), reco::SoftLeptonProperties.setQuality(), gen::Pythia6Service.setSLHAFromHeader(), HcalQIECoder.setSlope(), CastorQIECoder.setSlope(), CaloGeometry.setSubdetGeometry(), CaloTopology.setSubdetTopology(), FWCaloTowerDetailView.setTextInfo(), FWMuonDetailView.setTextInfo(), FWPhotonDetailView.setTextInfo(), FWPFCandidateDetailView.setTextInfo(), FWElectronDetailView.setTextInfo(), FWTrackHitsDetailView.setTextInfo(), FWConvTrackHitsDetailView.setTextInfo(), l1t::Muon.setTfMuonIndex(), edm::IndexIntoFile::IndexIntoFileItrSorted.setToLastEventInRange(), DQMRootSource.setupFile(), SiPixelPerformanceSummary.setValue(), MonitorElement.ShiftFillLast(), FWGUIValidatingTextEntry.showOptions(), CmsShowSearchFiles.showPrefixes(), FWModelContextMenuHandler.showSelectedModelContext(), L1DummyProducer.SimpleDigi(), HGCalDDDConstants.simToReco(), SiPixelDetSummary.SiPixelDetSummary(), HcalQIECoder.slope(), CastorQIECoder.slope(), l1t.SortEGammas(), sortNtupleByEvent(), CSCChamberSpecs.specsValue(), TrackingMaterialAnalyser.split(), DQMRootOutputModule.startEndFile(), HLTConfigData.streamIndex(), edm.stripNamespace(), TkStripMeasurementDet.subId(), edm::SubProcess.SubProcess(), VoronoiSubtractor.subtractPedestal(), ReflectedIterator.subtractPedestal(), MultipleAlgoIterator.subtractPedestal(), ParametrizedSubtractor.subtractPedestal(), PileUpSubtractor.subtractPedestal(), edm::SystemTimeKeeper.SystemTimeKeeper(), reco::GsfTrackExtra.tangentDeltaP(), reco::GsfTrackExtra.tangentMomentum(), reco::GsfTrackExtra.tangentPosition(), TauDQMHistPlotter.TauDQMHistPlotter(), SiPixelTemplate.temperrors(), GBRTree2D.TerminalIndex(), GBRTreeD.TerminalIndex(), GBRTree.TerminalIndex(), SiStripFedCabling.terse(), TEveElementIter.TEveElementIter(), TkStripMeasurementDet.theSet(), EcalShapeBase.timeIndex(), HFTimingTrust.timerr_hf(), CSCCFEBTimeSlice.timeSample(), timeshift_ns_hbheho(), timeshift_ns_hf(), TkStripMeasurementDet.totalStrips(), reco::TemplatedSecondaryVertexTagInfo< IPTI, VTX >.track(), reco::TemplatedSecondaryVertexTagInfo< IPTI, VTX >.trackData(), reco::TemplatedSecondaryVertexTagInfo< IPTI, VTX >.trackIPData(), reco::PFTrack.trajectoryPoint(), HLTConfigData.triggerIndex(), edm::TriggerNames.TriggerNames(), l1t::Stage2Layer2EGammaAlgorithmFirmwareImp1.trimmingLutIndex(), CSCOfflineMonitor.typeIndex(), CSCValidation.typeIndex(), uniqueElectronFinder(), TrackCategories.unknownTrack(), VertexCategories.unknownVertex(), CSCTFEvent.unpack(), DCCMemBlock.unpackMemTowerData(), TkPixelMeasurementDet.update(), HcaluLUTTPGCoder.update(), FWPSetTableManager.update(), FP420Test.update(), BscTest.update(), G4StepStatistics.update(), SeedingLayerSetsBuilder.updateEventSetup(), HcalDetDiagNoiseMonitor.UpdateHistos(), FWTableView.updateItems(), edm::EDConsumerBase.updateLookup(), HcalTBTiming.V775(), HLTMuon.validChambers(), CSCDBGasGainCorrection.value(), CSCDBChipSpeedCorrection.value(), PrimaryVertexMonitor.vertexPlots(), HGCalDDDConstants.waferZ(), sistrip::EnsembleCalibrationLA.write_ensembles_text(), GctFormatTranslateMCLegacy.writeRctEmCandBlocks(), MuonAlignmentFromReference.writeTmpFiles(), and L1GtVmeWriterCore.writeVME().

cmsHarvester.Jsonfilename

Definition at line 3709 of file cmsHarvester.py.

cmsHarvester.Jsonlumi

Definition at line 3683 of file cmsHarvester.py.

int cmsHarvester.loop = 0

CMSSW

CAF

Definition at line 4392 of file cmsHarvester.py.

Referenced by optutl::CommandLineParser._getSectionFiles(), RawDataConverter.ClearData(), CmsShowMainFrame.CmsShowMainFrame(), DDCheckMaterial(), SiStripTFile.dirContent(), MillePedeAlignmentAlgorithm.doIO(), LaserAlignment.DumpPosFileSet(), LaserAlignment.DumpStripFileSet(), SETSeedFinder.estimateMomentum(), PhysicsTools::MVAComputer.evalInternal(), LMFDefFabric.getColor(), LMFDefFabric.getColorFromID(), RawDataConverter.GetDigis(), LMFDefFabric.getRunTag(), LMFDefFabric.getRunTagFromID(), LMFDefFabric.getTrigType(), LMFDefFabric.getTrigTypeFromID(), pat::EventHypothesis.loop(), output(), optutl::CommandLineParser.parseArguments(), EcalDigiSelector.produce(), SiStripTFile.readDQMFormat(), TrajectoryManager.reconstruct(), stdcomb.recursive_combination(), LMFColoredTable.setColor(), LMFColoredTable.setSystem(), HcalTestAnalysis.update(), DDI::Polyhedra.volume(), and DDI::Polycone.volume().

string cmsHarvester.marker = "\n"

Definition at line 4815 of file cmsHarvester.py.

Referenced by fireworks.addDashedArrow(), fireworks.addDashedLine(), FWPhotonLegoProxyBuilder.build(), FWMuonGlimpseProxyBuilder.build(), FWElectronLegoProxyBuilder.build(), FWElectronGlimpseProxyBuilder.build(), FWTauProxyBuilderBase.buildBaseTau(), and FWMETProxyBuilder.buildViewType().

list cmsHarvester.marker_lines = []

Definition at line 4807 of file cmsHarvester.py.

string cmsHarvester.metavar = "JSONRUNFILE"

Definition at line 2059 of file cmsHarvester.py.

cmsHarvester.mirrored = None

Definition at line 3223 of file cmsHarvester.py.

string cmsHarvester.msg = "Could not create directory `%s'"

class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": site_name = str(attrs["STORAGEELEMENT_SENAME"])

TODO TODO TODO

Ugly hack to get around cases like this:

$ dbs search –query="find dataset, site, file.count where dataset=/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO"

Using DBS instance at: http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet

Processing ...

\

PATH STORAGEELEMENT_SENAME COUNT_FILES

_________________________________________________________________________________

/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO 1

/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO cmssrm.fnal.gov 12

/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO srm-cms.cern.ch 12

if len(site_name) < 1: return

TODO TODO TODO end

run_number = int(attrs["RUNS_RUNNUMBER"]) file_name = str(attrs["FILES_LOGICALFILENAME"]) nevents = int(attrs["FILES_NUMBEROFEVENTS"])

I know, this is a bit of a kludge.

if not files_info.has_key(run_number):

New run.

files_info[run_number] = {} files_info[run_number][file_name] = (nevents, [site_name]) elif not files_info[run_number].has_key(file_name):

New file for a known run.

files_info[run_number][file_name] = (nevents, [site_name]) else:

New entry for a known file for a known run.

DEBUG DEBUG DEBUG

Each file should have the same number of

events independent of the site it's at.

assert nevents == files_info[run_number][file_name][0]

DEBUG DEBUG DEBUG end

files_info[run_number][file_name][1].append(site_name) OBSOLETE OBSOLETE OBSOLETE end

Definition at line 1640 of file cmsHarvester.py.

tuple cmsHarvester.multicrab_block_name
Initial value:
1 = self.create_multicrab_block_name( \
2  dataset_name, run, index)

Definition at line 4415 of file cmsHarvester.py.

string cmsHarvester.multicrab_config = "\n"

Definition at line 4496 of file cmsHarvester.py.

list cmsHarvester.multicrab_config_lines = []

Definition at line 4339 of file cmsHarvester.py.

list cmsHarvester.nevents = int(handler.results["file.numevents"][index])

Definition at line 3177 of file cmsHarvester.py.

Referenced by SiStripHitEffFromCalibTree.algoAnalyze(), DTT0Calibration.analyze(), DTT0CalibrationNew.analyze(), DTDigiTask.analyze(), DTResolutionAnalysisTest.beginRun(), DTLocalTriggerBaseTest.beginRun(), DTEfficiencyTest.beginRun(), DTDigiTask.dqmBeginRun(), DTNoiseAnalysisTest.dqmEndLuminosityBlock(), DTChamberEfficiencyTest.DTChamberEfficiencyTest(), DTNoiseAnalysisTest.DTNoiseAnalysisTest(), DTOccupancyTest.DTOccupancyTest(), DTResolutionTest.DTResolutionTest(), DTRunConditionVarClient.DTRunConditionVarClient(), DTSegmentAnalysisTest.DTSegmentAnalysisTest(), DTT0Calibration.DTT0Calibration(), DTT0CalibrationNew.DTT0CalibrationNew(), ZDCDigiStudy.endRun(), gen::BaseHadronizer.generateLHE(), DTLocalTriggerBaseTest.setConfig(), edm::IndexIntoFile.setNumberOfEvents(), StatisticsPlots(), SummaryHisto(), DTChamberEfficiencyTest.~DTChamberEfficiencyTest(), DTDigiTask.~DTDigiTask(), DTEfficiencyTest.~DTEfficiencyTest(), DTLocalTriggerBaseTest.~DTLocalTriggerBaseTest(), DTNoiseAnalysisTest.~DTNoiseAnalysisTest(), DTResolutionAnalysisTest.~DTResolutionAnalysisTest(), DTResolutionTest.~DTResolutionTest(), and DTSegmentAnalysisTest.~DTSegmentAnalysisTest().

cmsHarvester.non_t1access

Definition at line 1090 of file cmsHarvester.py.

cmsHarvester.nr_max_sites

Definition at line 1141 of file cmsHarvester.py.

dictionary cmsHarvester.num_events_catalog = {}

Definition at line 3216 of file cmsHarvester.py.

tuple cmsHarvester.num_events_dataset = sum(tmp)

Definition at line 3987 of file cmsHarvester.py.

tuple cmsHarvester.num_sites
Initial value:
1 = len(self.datasets_information[dataset_name] \
2  ["sites"][run_number])

               if self.datasets_information[dataset_name]["num_events"][run_number] != 0:
                   pdb.set_trace()

DEBUG DEBUG DEBUG end

Definition at line 3957 of file cmsHarvester.py.

int cmsHarvester.number_max_sites = self.nr_max_sites+1

Definition at line 4337 of file cmsHarvester.py.

cmsHarvester.option_parser

Definition at line 1879 of file cmsHarvester.py.

cmsHarvester.output_file_name = self.\

Definition at line 4383 of file cmsHarvester.py.

Referenced by HcalLutManager.writeLutXmlFiles().

tuple cmsHarvester.path = os.path.join(path, piece)

else:

Piece not in the list, fine.

                   self.logger.debug("  accepting")

Add piece to the path we're building. self.logger.debug("!!! Skip path piece `%s'? %s" % \ (piece, str(skip_this_path_piece))) self.logger.debug("Adding piece to path...")

Definition at line 1592 of file cmsHarvester.py.

tuple cmsHarvester.permissions = extract_permissions(output)

Definition at line 1649 of file cmsHarvester.py.

string cmsHarvester.permissions_new = []

Definition at line 1679 of file cmsHarvester.py.

string cmsHarvester.permissions_target = "775"

Definition at line 1673 of file cmsHarvester.py.

cmsHarvester.preferred_site

Definition at line 1147 of file cmsHarvester.py.

cmsHarvester.ref_hist_mappings_file_name

Definition at line 2258 of file cmsHarvester.py.

tuple cmsHarvester.run_number = int(handler.results["run.number"][index])

Definition at line 3175 of file cmsHarvester.py.

Referenced by BlockFormatter.DigiToRaw(), TEcnaRead.FileParameters(), HcalChannelQualityXml.HcalChannelQualityXml(), HcalL1TriggerObjectsXml.HcalL1TriggerObjectsXml(), TEcnaHeader.HeaderParameters(), HcalChannelDataXml.init_data(), HcalChannelDataXml.set_header_run_number(), SiStripCommissioningOfflineClient.setInputFiles(), and XMLHTRZeroSuppressionLoader.XMLHTRZeroSuppressionLoader().

list cmsHarvester.runs = self.datasets_to_use[dataset_name]

Definition at line 4350 of file cmsHarvester.py.

cmsHarvester.runs_to_ignore

Definition at line 3556 of file cmsHarvester.py.

cmsHarvester.runs_to_use

Definition at line 3532 of file cmsHarvester.py.

cmsHarvester.saveByLumiSection

Definition at line 1120 of file cmsHarvester.py.

tuple cmsHarvester.se_name = choice(t1_sites)

Definition at line 1792 of file cmsHarvester.py.

string cmsHarvester.sep = "#"

Definition at line 4808 of file cmsHarvester.py.

Referenced by edm::Entry.Entry(), ExpressionVariable< Object, label >.ExpressionVariable(), GEDPhotonProducer.fillPhotonCollection(), LumiProducer.fillRunCache(), ElectronDqmHarvesterBase.find(), fwlite::DataGetterHelper.getBranchDataFor(), HLTPixlMBForAlignmentFilter.hltFilter(), fit::RootMinuitCommands< Function >.init(), Tokenizer.join(), L1TGlobalProducer.L1TGlobalProducer(), l1t::L1TGlobalUtil.loadPrescalesAndMasks(), std.operator<<(), HcalTopologyRestrictionParser.parse(), lumi::NormDML.parseAfterglows(), ParticleReplacerZtautau.ParticleReplacerZtautau(), ClhepEvaluator.prepare(), ElectronIDValueMapProducer.produce(), PhotonIDValueMapProducer.produce(), ElectronEnergyRegressionEvaluate.regressionUncertaintyNoTrkVar(), ElectronEnergyRegressionEvaluate.regressionUncertaintyNoTrkVarV1(), ElectronEnergyRegressionEvaluate.regressionUncertaintyWithSubClusters(), ElectronEnergyRegressionEvaluate.regressionUncertaintyWithTrkVar(), ElectronEnergyRegressionEvaluate.regressionUncertaintyWithTrkVarV1(), ElectronEnergyRegressionEvaluate.regressionUncertaintyWithTrkVarV2(), ElectronEnergyRegressionEvaluate.regressionValueNoTrkVar(), ElectronEnergyRegressionEvaluate.regressionValueNoTrkVarV1(), ElectronEnergyRegressionEvaluate.regressionValueWithSubClusters(), ElectronEnergyRegressionEvaluate.regressionValueWithTrkVar(), ElectronEnergyRegressionEvaluate.regressionValueWithTrkVarV1(), ElectronEnergyRegressionEvaluate.regressionValueWithTrkVarV2(), StringBasedNTupler.StringBasedNTupler(), and edm.tokenize().

list cmsHarvester.site_name = None

Definition at line 1773 of file cmsHarvester.py.

list cmsHarvester.site_names = list(set([j for i in files_info[run_number].values() for j in i[1]]))

Definition at line 3218 of file cmsHarvester.py.

list cmsHarvester.sites = [self.preferred_site]

Definition at line 1762 of file cmsHarvester.py.

Referenced by l1t::Stage2Layer2TauAlgorithmFirmwareImp1.merging(), and edm::service::SiteLocalConfigService.parse().

list cmsHarvester.sites_forbidden = []

Definition at line 1709 of file cmsHarvester.py.

list cmsHarvester.sites_with_complete_copies = []

Definition at line 3234 of file cmsHarvester.py.

cmsHarvester.skip_this_path_piece = True

self.logger.debug("Checking CASTOR path piece `%s'" % \ piece)

self.logger.debug("Checking `%s' against `%s'" % \ (castor_path_pieces[piece_index + check_size], castor_paths_dont_touch[check_size])) self.logger.debug(" skipping")

Definition at line 1584 of file cmsHarvester.py.

list cmsHarvester.t1_sites = []

Definition at line 1779 of file cmsHarvester.py.

dictionary cmsHarvester.tmp
Initial value:
1 = [j for (i, j) in self.datasets_information \
2  [dataset_name]["num_events"].items() \
3  if i in self.datasets_to_use[dataset_name]]

TODO TODO TODO

Need to think about where this should go, but

somewhere we have to move over the fact that we want

to process all runs for each dataset that we're

considering.

This basically means copying over the

information from self.datasets_information[]["runs"]

to self.datasets_to_use[].

for dataset_name in self.datasets_to_use.keys(): self.datasets_to_use[dataset_name] = self.datasets_information[dataset_name]["runs"]

TODO TODO TODO end

OBSOLETE OBSOLETE OBSOLETE end tmp = self.datasets_information[dataset_name] \ ["num_events"]

Definition at line 3984 of file cmsHarvester.py.

tuple cmsHarvester.traceback_string = traceback.format_exc()

Definition at line 5718 of file cmsHarvester.py.

string cmsHarvester.twiki_url = "https://twiki.cern.ch/twiki/bin/view/CMS/CmsHarvester"

Definition at line 41 of file cmsHarvester.py.

string cmsHarvester.type = "string"

Definition at line 2058 of file cmsHarvester.py.

tuple cmsHarvester.use_es_prefer = (self.harvesting_type == "RelVal")

Definition at line 4859 of file cmsHarvester.py.

cmsHarvester.use_refs = use_es_preferor\

Definition at line 4860 of file cmsHarvester.py.

cmsHarvester.UserName = output

Definition at line 4332 of file cmsHarvester.py.

cmsHarvester.workflow_name = dataset_name

Definition at line 4888 of file cmsHarvester.py.