CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
Classes | Namespaces | Functions | Variables
cmsHarvester.py File Reference

Go to the source code of this file.

Classes

class  cmsHarvester.CMSHarvester
 CMSHarvester class. More...
 
class  cmsHarvester.CMSHarvesterHelpFormatter
 Helper class: CMSHarvesterHelpFormatter. More...
 
class  cmsHarvester.DBSXMLHandler
 Helper class: DBSXMLHandler. More...
 
class  cmsHarvester.Error
 Helper class: Error exception. More...
 
class  cmsHarvester.Usage
 Helper class: Usage exception. More...
 

Namespaces

 cmsHarvester
 

Functions

def cmsHarvester.build_dataset_ignore_list
 
def cmsHarvester.build_dataset_list
 def dbs_check_dataset_num_events(self, dataset_name): """Figure out the number of events in each run of this dataset. More...
 
def cmsHarvester.build_dataset_use_list
 
def cmsHarvester.build_datasets_information
 
def cmsHarvester.build_runs_ignore_list
 
def cmsHarvester.build_runs_list
 
def cmsHarvester.build_runs_use_list
 
def cmsHarvester.check_cmssw
 
def cmsHarvester.check_dataset_list
 
def cmsHarvester.check_dbs
 
def cmsHarvester.check_globaltag
 
def cmsHarvester.check_globaltag_contains_ref_hist_key
 
def cmsHarvester.check_globaltag_exists
 
def cmsHarvester.check_input_status
 
def cmsHarvester.check_ref_hist_mappings
 
def cmsHarvester.check_ref_hist_tag
 
def cmsHarvester.create_and_check_castor_dir
 
def cmsHarvester.create_and_check_castor_dirs
 
def cmsHarvester.create_castor_path_name_common
 
def cmsHarvester.create_castor_path_name_special
 
def cmsHarvester.create_config_file_name
 
def cmsHarvester.create_crab_config
 
def cmsHarvester.create_es_prefer_snippet
 
def cmsHarvester.create_harvesting_config
 
def cmsHarvester.create_harvesting_config_file_name
 
def cmsHarvester.create_harvesting_output_file_name
 
def cmsHarvester.create_me_extraction_config
 
More...
 
def cmsHarvester.create_me_summary_config_file_name
 
def cmsHarvester.create_me_summary_output_file_name
 
def cmsHarvester.create_multicrab_block_name
 
def cmsHarvester.create_multicrab_config
 
def cmsHarvester.create_output_file_name
 
def cmsHarvester.dbs_check_dataset_spread
 def dbs_resolve_dataset_number_of_sites(self, dataset_name): """Ask DBS across how many sites this dataset has been spread out. More...
 
def cmsHarvester.dbs_resolve_cmssw_version
 
def cmsHarvester.dbs_resolve_dataset_name
 
def cmsHarvester.dbs_resolve_datatype
 
def cmsHarvester.dbs_resolve_globaltag
 
def cmsHarvester.dbs_resolve_number_of_events
 
def cmsHarvester.dbs_resolve_runs
 def dbs_resolve_dataset_number_of_events(self, dataset_name): """Ask DBS across how many events this dataset has been spread out. More...
 
def cmsHarvester.escape_dataset_name
 
def cmsHarvester.load_ref_hist_mappings
 
def cmsHarvester.option_handler_caf_access
 
def cmsHarvester.option_handler_castor_dir
 def option_handler_dataset_name(self, option, opt_str, value, parser): """Specify the name(s) of the dataset(s) to be processed. More...
 
def cmsHarvester.option_handler_crab_submission
 
def cmsHarvester.option_handler_list_types
 
def cmsHarvester.option_handler_no_t1access
 
def cmsHarvester.option_handler_preferred_site
 
def cmsHarvester.option_handler_saveByLumiSection
 
def cmsHarvester.option_handler_sites
 
def cmsHarvester.parse_cmd_line_options
 
def cmsHarvester.pick_a_site
 
def cmsHarvester.process_dataset_ignore_list
 
def cmsHarvester.process_runs_use_and_ignore_lists
 
def cmsHarvester.ref_hist_mappings_needed
 
def cmsHarvester.run
 
def cmsHarvester.setup_dbs
 

Now we try to do a very simple DBS search.

More...
 
def cmsHarvester.setup_harvesting_info
 
def cmsHarvester.show_exit_message
 
def cmsHarvester.singlify_datasets
 
def cmsHarvester.write_crab_config
 def create_harvesting_config(self, dataset_name): """Create the Python harvesting configuration for a given job. More...
 
def cmsHarvester.write_harvesting_config
 
def cmsHarvester.write_me_extraction_config
 
def cmsHarvester.write_multicrab_config
 

Variables

string cmsHarvester.__author__ = "Jeroen Hegeman (jeroen.hegeman@cern.ch),"
 
string cmsHarvester.__version__ = "3.8.2p1"
 File : cmsHarvest.py Authors : Jeroen Hegeman (jeroe.nosp@m.n.he.nosp@m.geman.nosp@m.@cer.nosp@m.n.ch) Niklas Pietsch (nikla.nosp@m.s.pi.nosp@m.etsch.nosp@m.@des.nosp@m.y.de) Franseco Costanza (franc.nosp@m.esco.nosp@m..cost.nosp@m.anza.nosp@m.@desy.nosp@m..de) Last change: 20100308. More...
 
string cmsHarvester.action = "callback"
 
list cmsHarvester.all_file_names = files_info[run_number]
 
list cmsHarvester.all_t1
 
 cmsHarvester.caf_access
 
 cmsHarvester.callback = self.option_handler_input_Jsonrunfile,
 
 cmsHarvester.castor_base_dir
 
list cmsHarvester.castor_dir = self.datasets_information[dataset_name]
 

CRAB

More...
 
tuple cmsHarvester.castor_path_common = self.create_castor_path_name_common(dataset_name)
 

DEBUG DEBUG DEBUG

This is probably only useful to make sure we don't muck

things up, right?

Figure out across how many sites this sample has been spread.

More...
 
tuple cmsHarvester.castor_paths
 
 cmsHarvester.castor_prefix = self.castor_prefix
 
string cmsHarvester.cmd = "rfstat %s"
 self.logger.debug("Path is now `%s'" % \ path) More...
 
list cmsHarvester.cmssw_version = self.datasets_information[dataset_name]
 
list cmsHarvester.complete_sites
 site_names_ref = set(files_info[run_number].values()[0][1]) for site_names_tmp in files_info[run_number].values()[1:]: if set(site_names_tmp[1]) != site_names_ref: mirrored = False break More...
 
tuple cmsHarvester.config_builder = ConfigBuilder(config_options, with_input=True)
 
 cmsHarvester.config_contents = config_builder.pythonCfgCode
 

In case this file is the second step (the real harvesting

step) of the two-step harvesting we have to tell it to use

our local files.

More...
 
tuple cmsHarvester.config_file_name = self.create_me_summary_config_file_name(dataset_name)
 

Only add the alarming piece to the file name if this is

a spread-out dataset.

More...
 
list cmsHarvester.connect_name = self.frontier_connection_name["globaltag"]
 
dictionary cmsHarvester.country_codes
 
string cmsHarvester.crab_config = "\n"
 

CRAB

More...
 
 cmsHarvester.crab_submission
 
list cmsHarvester.customisations = [""]
 
tuple cmsHarvester.dataset_name_escaped = self.escape_dataset_name(dataset_name)
 
tuple cmsHarvester.dataset_names = self.datasets_to_use.keys()
 
 cmsHarvester.dataset_names_after_checks = dataset_names_after_checks_tmp
 
tuple cmsHarvester.dataset_names_after_checks_tmp = copy.deepcopy(dataset_names_after_checks)
 
 cmsHarvester.datasets_information
 
 cmsHarvester.datasets_to_ignore
 
 cmsHarvester.datasets_to_use
 
list cmsHarvester.datatype = self.datasets_information[dataset_name]
 
 cmsHarvester.dbs_api
 
tuple cmsHarvester.empty_runs = dict(tmp)
 
tuple cmsHarvester.es_prefer_snippet = self.create_es_prefer_snippet(dataset_name)
 
int cmsHarvester.exit_code = 1
 
list cmsHarvester.file_name = handler.results["file.name"]
 
list cmsHarvester.files_at_site
 
dictionary cmsHarvester.files_info = {}
 
list cmsHarvester.files_without_sites
 
list cmsHarvester.globaltag = self.datasets_information[dataset_name]
 
 cmsHarvester.harvesting_info
 
 cmsHarvester.harvesting_mode
 
 cmsHarvester.harvesting_type
 
string cmsHarvester.help = "Jsonfile containing dictionary of run/lumisections pairs. "
 
string cmsHarvester.index = "site_%02d"
 
 cmsHarvester.Jsonfilename
 
 cmsHarvester.Jsonlumi
 
int cmsHarvester.loop = 0
 

CMSSW

More...
 
string cmsHarvester.marker = "\n"
 
list cmsHarvester.marker_lines = []
 
string cmsHarvester.metavar = "JSONRUNFILE"
 
 cmsHarvester.mirrored = None
 
string cmsHarvester.msg = "Could not create directory `%s'"
 class Handler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): if name == "result": site_name = str(attrs["STORAGEELEMENT_SENAME"])

TODO TODO TODO

Ugly hack to get around cases like this:

$ dbs search –query="find dataset, site, file.count where dataset=/RelValQCD_Pt_3000_3500/CMSSW_3_3_0_pre1-STARTUP31X_V4-v1/GEN-SIM-RECO"

Using DBS instance at: http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet

Processing ...

More...
 
tuple cmsHarvester.multicrab_block_name
 
string cmsHarvester.multicrab_config = "\n"
 
list cmsHarvester.multicrab_config_lines = []
 
tuple cmsHarvester.nevents = int(handler.results["file.numevents"][index])
 
 cmsHarvester.non_t1access
 
 cmsHarvester.nr_max_sites
 
dictionary cmsHarvester.num_events_catalog = {}
 
tuple cmsHarvester.num_events_dataset = sum(tmp)
 
tuple cmsHarvester.num_sites
 
               if self.datasets_information[dataset_name]["num_events"][run_number] != 0:
                   pdb.set_trace()

DEBUG DEBUG DEBUG end More...

 
int cmsHarvester.number_max_sites = self.nr_max_sites+1
 
 cmsHarvester.option_parser
 
 cmsHarvester.output_file_name = self.\
 
tuple cmsHarvester.path = os.path.join(path, piece)
 else:

Piece not in the list, fine.

More...
 
tuple cmsHarvester.permissions = extract_permissions(output)
 
list cmsHarvester.permissions_new = []
 
string cmsHarvester.permissions_target = "775"
 
 cmsHarvester.preferred_site
 
 cmsHarvester.ref_hist_mappings_file_name
 
tuple cmsHarvester.run_number = int(handler.results["run.number"][index])
 
list cmsHarvester.runs = self.datasets_to_use[dataset_name]
 
 cmsHarvester.runs_to_ignore
 
 cmsHarvester.runs_to_use
 
 cmsHarvester.saveByLumiSection
 
tuple cmsHarvester.se_name = choice(t1_sites)
 
string cmsHarvester.sep = "#"
 
 cmsHarvester.site_name = None
 
tuple cmsHarvester.site_names = list(set([j for i in files_info[run_number].values() for j in i[1]]))
 
list cmsHarvester.sites = [self.preferred_site]
 
list cmsHarvester.sites_forbidden = []
 
list cmsHarvester.sites_with_complete_copies = []
 
 cmsHarvester.skip_this_path_piece = True
 self.logger.debug("Checking CASTOR path piece `%s'" % \ piece) More...
 
list cmsHarvester.t1_sites = []
 
list cmsHarvester.tmp
 

TODO TODO TODO

Need to think about where this should go, but

somewhere we have to move over the fact that we want

to process all runs for each dataset that we're

considering.

More...
 
tuple cmsHarvester.traceback_string = traceback.format_exc()
 
string cmsHarvester.twiki_url = "https://twiki.cern.ch/twiki/bin/view/CMS/CmsHarvester"
 
string cmsHarvester.type = "string"
 
tuple cmsHarvester.use_es_prefer = (self.harvesting_type == "RelVal")
 
 cmsHarvester.use_refs = use_es_preferor\
 
 cmsHarvester.UserName = output
 
 cmsHarvester.workflow_name = dataset_name