Classes
class	CMSHarvester
	CMSHarvester class. More...

class	CMSHarvesterHelpFormatter
	Helper class: CMSHarvesterHelpFormatter. More...

class	DBSXMLHandler
	Helper class: DBSXMLHandler. More...

class	Error
	Helper class: Error exception. More...

class	Usage
	Helper class: Usage exception. More...

Functions
def	build_dataset_ignore_list

def	build_dataset_list
	def dbs_check_dataset_num_events(self, dataset_name): """Figure out the number of events in each run of this dataset. More...

def	build_dataset_use_list

def	build_datasets_information

def	build_runs_ignore_list

def	build_runs_list

def	build_runs_use_list

def	check_cmssw

def	check_dataset_list

def	check_dbs

def	check_globaltag

def	check_globaltag_contains_ref_hist_key

def	check_globaltag_exists

def	check_input_status

def	check_ref_hist_mappings

def	check_ref_hist_tag

def	create_and_check_castor_dir

def	create_and_check_castor_dirs

def	create_castor_path_name_common

def	create_castor_path_name_special

def	create_config_file_name

def	create_crab_config

def	create_es_prefer_snippet

def	create_harvesting_config

def	create_harvesting_config_file_name

def	create_harvesting_output_file_name

def	create_me_extraction_config
	More...

def	create_me_summary_config_file_name

def	create_me_summary_output_file_name

def	create_multicrab_block_name

def	create_multicrab_config

def	create_output_file_name

def	dbs_check_dataset_spread
	def dbs_resolve_dataset_number_of_sites(self, dataset_name): """Ask DBS across how many sites this dataset has been spread out. More...

def	dbs_resolve_cmssw_version

def	dbs_resolve_dataset_name

def	dbs_resolve_datatype

def	dbs_resolve_globaltag

def	dbs_resolve_number_of_events

def	dbs_resolve_runs
	def dbs_resolve_dataset_number_of_events(self, dataset_name): """Ask DBS across how many events this dataset has been spread out. More...

def	escape_dataset_name

def	load_ref_hist_mappings

def	option_handler_caf_access

def	option_handler_castor_dir
	def option_handler_dataset_name(self, option, opt_str, value, parser): """Specify the name(s) of the dataset(s) to be processed. More...

def	option_handler_crab_submission

def	option_handler_list_types

def	option_handler_no_t1access

def	option_handler_preferred_site

def	option_handler_saveByLumiSection

def	option_handler_sites

def	parse_cmd_line_options

def	pick_a_site

def	process_dataset_ignore_list

def	process_runs_use_and_ignore_lists

def	ref_hist_mappings_needed

def	run

def	setup_dbs
	Now we try to do a very simple DBS search. More...

def	setup_harvesting_info

def	show_exit_message

def	singlify_datasets

def	write_crab_config
	def create_harvesting_config(self, dataset_name): """Create the Python harvesting configuration for a given job. More...

def	write_harvesting_config

def	write_me_extraction_config

def	write_multicrab_config

Variables
string	__author__ = "Jeroen Hegeman (jeroen.hegeman@cern.ch),"

string	__version__ = "3.8.2p1"
	File : cmsHarvest.py Authors : Jeroen Hegeman (jeroe.nosp@m.n.he.nosp@m.geman.nosp@m.@cer.nosp@m.n.ch) Niklas Pietsch (nikla.nosp@m.s.pi.nosp@m.etsch.nosp@m.@des.nosp@m.y.de) Franseco Costanza (franc.nosp@m.esco.nosp@m..cost.nosp@m.anza.nosp@m.@desy.nosp@m..de) Last change: 20100308. More...

string	action = "callback"

list	all_file_names = files_info[run_number]

list	all_t1

	caf_access

	callback = self.option_handler_input_Jsonrunfile,

	castor_base_dir

list	castor_dir = self.datasets_information[dataset_name]
	CRAB More...

tuple	castor_path_common = self.create_castor_path_name_common(dataset_name)
	DEBUG DEBUG DEBUG This is probably only useful to make sure we don't muck things up, right? Figure out across how many sites this sample has been spread. More...

tuple	castor_paths

	castor_prefix = self.castor_prefix

string	cmd = "rfstat %s"
	self.logger.debug("Path is now `%s'" % \ path) More...

list	cmssw_version = self.datasets_information[dataset_name]

list	complete_sites
	site_names_ref = set(files_info[run_number].values()[0][1]) for site_names_tmp in files_info[run_number].values()[1:]: if set(site_names_tmp[1]) != site_names_ref: mirrored = False break More...

tuple	config_builder = ConfigBuilder(config_options, with_input=True)

	config_contents = config_builder.pythonCfgCode
	In case this file is the second step (the real harvesting step) of the two-step harvesting we have to tell it to use our local files. More...

tuple	config_file_name = self.create_me_summary_config_file_name(dataset_name)
	Only add the alarming piece to the file name if this is a spread-out dataset. More...

list	connect_name = self.frontier_connection_name["globaltag"]

dictionary	country_codes

string	crab_config = "\n"
	CRAB More...

	crab_submission

list	customisations = [""]

tuple	dataset_name_escaped = self.escape_dataset_name(dataset_name)

tuple	dataset_names = self.datasets_to_use.keys()

	dataset_names_after_checks = dataset_names_after_checks_tmp

tuple	dataset_names_after_checks_tmp = copy.deepcopy(dataset_names_after_checks)

	datasets_information

	datasets_to_ignore

	datasets_to_use

list	datatype = self.datasets_information[dataset_name]

	dbs_api

tuple	empty_runs = dict(tmp)

tuple	es_prefer_snippet = self.create_es_prefer_snippet(dataset_name)

int	exit_code = 1

list	file_name = handler.results["file.name"]

list	files_at_site

dictionary	files_info = {}

list	files_without_sites

list	globaltag = self.datasets_information[dataset_name]

	harvesting_info

	harvesting_mode

	harvesting_type

string	help = "Jsonfile containing dictionary of run/lumisections pairs. "

string	index = "site_%02d"

	Jsonfilename

	Jsonlumi

int	loop = 0
	CMSSW More...

string	marker = "\n"

list	marker_lines = []

string	metavar = "JSONRUNFILE"

	mirrored = None

string	msg = "Could not create directory `%s'"
	class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": site_name = str(attrs["STORAGEELEMENT_SENAME"]) TODO TODO TODO Ugly hack to get around cases like this: $ dbs search –query="find dataset, site, file.count where dataset=/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO" Using DBS instance at: http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet Processing ... More...

tuple	multicrab_block_name

string	multicrab_config = "\n"

list	multicrab_config_lines = []

tuple	nevents = int(handler.results["file.numevents"][index])

	non_t1access

	nr_max_sites

dictionary	num_events_catalog = {}

tuple	num_events_dataset = sum(tmp)

tuple	num_sites
	if self.datasets_information[dataset_name]["num_events"][run_number] != 0: pdb.set_trace() DEBUG DEBUG DEBUG end More...

int	number_max_sites = self.nr_max_sites+1

	option_parser

	output_file_name = self.\

tuple	path = os.path.join(path, piece)
	else: Piece not in the list, fine. More...

tuple	permissions = extract_permissions(output)

list	permissions_new = []

string	permissions_target = "775"

	preferred_site

	ref_hist_mappings_file_name

tuple	run_number = int(handler.results["run.number"][index])

list	runs = self.datasets_to_use[dataset_name]

	runs_to_ignore

	runs_to_use

	saveByLumiSection

tuple	se_name = choice(t1_sites)

string	sep = "#"

	site_name = None

tuple	site_names = list(set([j for i in files_info[run_number].values() for j in i[1]]))

list	sites = [self.preferred_site]

list	sites_forbidden = []

list	sites_with_complete_copies = []

	skip_this_path_piece = True
	self.logger.debug("Checking CASTOR path piece `%s'" % \ piece) More...

list	t1_sites = []

list	tmp
	TODO TODO TODO Need to think about where this should go, but somewhere we have to move over the fact that we want to process all runs for each dataset that we're considering. More...

tuple	traceback_string = traceback.format_exc()

string	twiki_url = "https://twiki.cern.ch/twiki/bin/view/CMS/CmsHarvester"

string	type = "string"

tuple	use_es_prefer = (self.harvesting_type == "RelVal")

	use_refs = use_es_preferor\

	UserName = output

	workflow_name = dataset_name

Function Documentation

def cmsHarvester.build_dataset_ignore_list ( self )

Build a list of datasets to ignore.

NOTE: We should always have a list of datasets to process, but
it may be that we don't have a list of datasets to ignore.

Definition at line 3444 of file cmsHarvester.py.

 
     def build_dataset_ignore_list(self):
         """Build a list of datasets to ignore.
 
         NOTE: We should always have a list of datasets to process, but
         it may be that we don't have a list of datasets to ignore.
 
         """
 
         self.logger.info("Building list of datasets to ignore...")
 
         input_method = self.input_method["datasets"]["ignore"]
         input_name = self.input_name["datasets"]["ignore"]
         dataset_names = self.build_dataset_list(input_method,
                                                 input_name)
         self.datasets_to_ignore = dict(zip(dataset_names,
                                            [None] * len(dataset_names)))
 
         self.logger.info("  found %d dataset(s) to ignore:" % \
                          len(dataset_names))
         for dataset in dataset_names:
             self.logger.info("  `%s'" % dataset)
 
         # End of build_dataset_ignore_list.

python.multivaluedict.dict

dict

Definition: multivaluedict.py:25

archive.zip

tuple zip

Definition: archive.py:476

cmsHarvester.build_dataset_ignore_list

def build_dataset_ignore_list

Definition: cmsHarvester.py:3444

def cmsHarvester.build_dataset_list	(	self,
		input_method,
		input_name
	)

def cmsHarvester.build_runs_list	(	self,
		input_method,
		input_name
	)

def cmsHarvester.check_globaltag_contains_ref_hist_key	(	self,
		globaltag,
		connect_name
	)

def cmsHarvester.check_globaltag_exists	(	self,
		globaltag,
		connect_name
	)

def cmsHarvester.create_castor_path_name_special	(	self,
		dataset_name,
		run_number,
		castor_path_common
	)

def cmsHarvester.check_globaltag	(	self,
		globaltag = `None`
	)

def cmsHarvester.create_and_check_castor_dir	(	self,
		castor_dir
	)

def cmsHarvester.create_castor_path_name_common	(	self,
		dataset_name
	)

def cmsHarvester.create_config_file_name	(	self,
		dataset_name,
		run_number
	)

def cmsHarvester.create_es_prefer_snippet	(	self,
		dataset_name
	)

def cmsHarvester.create_harvesting_config	(	self,
		dataset_name
	)

def cmsHarvester.create_harvesting_output_file_name	(	self,
		dataset_name,
		run_number
	)

def cmsHarvester.create_me_extraction_config	(	self,
		dataset_name
	)

def cmsHarvester.create_me_summary_config_file_name	(	self,
		dataset_name
	)

def cmsHarvester.create_me_summary_output_file_name	(	self,
		dataset_name
	)

def cmsHarvester.check_ref_hist_tag	(	self,
		tag_name
	)

def cmsHarvester.create_multicrab_block_name	(	self,
		dataset_name,
		run_number,
		index
	)

def cmsHarvester.create_output_file_name	(	self,
		dataset_name,
		run_number = `None`
	)

def cmsHarvester.dbs_check_dataset_spread	(	self,
		dataset_name
	)

def cmsHarvester.dbs_resolve_cmssw_version	(	self,
		dataset_name
	)

def cmsHarvester.dbs_resolve_dataset_name	(	self,
		dataset_name
	)

def cmsHarvester.dbs_resolve_datatype	(	self,
		dataset_name
	)

def cmsHarvester.dbs_resolve_globaltag	(	self,
		dataset_name
	)

def cmsHarvester.dbs_resolve_number_of_events	(	self,
		dataset_name,
		run_number = `None`
	)

def cmsHarvester.option_handler_caf_access	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_castor_dir	(	self,
		option,
		opt_str,
		value,
		parser
	)

Classes

Functions

Now we try to do a very simple DBS search.

Variables

CRAB

DEBUG DEBUG DEBUG

This is probably only useful to make sure we don't muck

things up, right?

Figure out across how many sites this sample has been spread.

In case this file is the second step (the real harvesting

step) of the two-step harvesting we have to tell it to use

our local files.

Only add the alarming piece to the file name if this is

a spread-out dataset.

CRAB

CMSSW

TODO TODO TODO

Ugly hack to get around cases like this:

$ dbs search –query="find dataset, site, file.count where dataset=/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO"

Using DBS instance at: http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet

Processing ...

Piece not in the list, fine.

TODO TODO TODO

Need to think about where this should go, but

somewhere we have to move over the fact that we want

to process all runs for each dataset that we're

considering.

Function Documentation

This might very well not work at all for spread-out samples. (?)

BUG BUG BUG end

If we get here DBS should have been set up already.

DEBUG DEBUG DEBUG end

BUG BUG BUG end

End of create_harvesting_config_two_step.

If we get here DBS should have been set up already.

DEBUG DEBUG DEBUG end

DEBUG DEBUG DEBUG end

If we get here DBS should have been set up already.

DEBUG DEBUG DEBUG end

sure we get results for each and every run in the

dataset. I'm not sure this would make sense since we'd be

cross-checking DBS info with DBS info anyway. Note that we

use the file count per site to see if we're dealing with an

incomplete vs. a mirrored dataset.

BUG BUG BUG

Doh! For some reason DBS never returns any other

event count than zero.

BUG BUG BUG end

Quick-n-dirty trick to see if all file counts are the

same.

Okay, so this must be a mirrored dataset.

We have to pick one but we have to be careful. We

cannot submit to things like a T0, a T1, or CAF.

Looks like this is a spread-out sample.

Just kept for debugging now.

If we get here DBS should have been set up already.

DEBUG DEBUG DEBUG end

sure we get results for each and every run in the

dataset. I'm not sure this would make sense since we'd be

cross-checking DBS info with DBS info anyway. Note that we

use the file count per site to see if we're dealing with an

incomplete vs. a mirrored dataset.

BUG BUG BUG

Doh! For some reason DBS never returns any other

event count than zero.

BUG BUG BUG end

Quick-n-dirty trick to see if all file counts are the

same.

Okay, so this must be a mirrored dataset.

We have to pick one but we have to be careful. We

cannot submit to things like a T0, a T1, or CAF.

Looks like this is a spread-out sample.

DEBUG DEBUG DEBUG end

If we get here DBS should have been set up already.

DEBUG DEBUG DEBUG end

DEBUG DEBUG DEBUG end

Now we try to do a very simple DBS search.

instead of giving us the `Unsupported API call' crap, we

should be good to go.

NOTE: Not ideal, I know, but it reduces the amount of

def cmsHarvester.option_handler_crab_submission	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_list_types	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_no_t1access	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_preferred_site	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_saveByLumiSection	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.option_handler_sites	(	self,
		option,
		opt_str,
		value,
		parser
	)

def cmsHarvester.pick_a_site	(	self,
		sites,
		cmssw_version
	)

def cmsHarvester.ref_hist_mappings_needed	(	self,
		dataset_name = `None`
	)

def cmsHarvester.write_harvesting_config	(	self,
		dataset_name
	)

def cmsHarvester.write_me_extraction_config	(	self,
		dataset_name
	)