CMS 3D CMS Logo

Classes | Functions | Variables
tkal_create_file_lists Namespace Reference

Classes

class  _DasCache
 
class  FileListCreator
 

Functions

def _get_events (entity, name)
 
def _get_properties (name, entity, properties, filters=None, sub_entity=None, aggregators=None)
 
def _make_file_info (dataset_name_nevents)
 
def das_client (query, check_key=None)
 
def find_key (collection, key_chain)
 
def get_chunks (long_list, chunk_size)
 
def get_datasets (dataset_pattern)
 
def get_events_per_dataset (dataset_name)
 
def get_events_per_file (file_name)
 
def get_file_info (dataset)
 
def get_files (dataset_name)
 
def get_max_run (dataset_name)
 
def get_runs (file_name)
 
def main (argv=None)
 
def merge_strings (strings)
 
def print_msg (text, line_break=True, log_file=None)
 

Variables

 FileInfo = collections.namedtuple("FileInfo", "dataset name nevents runs")
 

Function Documentation

def tkal_create_file_lists._get_events (   entity,
  name 
)
private
Retrieve the number of events from `entity` called `name`.

Arguments:
- `entity`: type of entity
- `name`: name of entity

Definition at line 1066 of file tkal_create_file_lists.py.

References _get_properties(), das_client(), find_key(), and createfilelist.int.

Referenced by get_events_per_dataset(), and get_events_per_file().

1066 def _get_events(entity, name):
1067  """Retrieve the number of events from `entity` called `name`.
1068 
1069  Arguments:
1070  - `entity`: type of entity
1071  - `name`: name of entity
1072  """
1073 
1074  data = das_client("{0:s}={1:s} system=dbs3 detail=True | grep {0:s}.nevents"
1075  .format(entity, name), entity)
1076  return int(find_key(data, [entity, "nevents"]))
1077 
1078 
def find_key(collection, key_chain)
def das_client(query, check_key=None)
def tkal_create_file_lists._get_properties (   name,
  entity,
  properties,
  filters = None,
  sub_entity = None,
  aggregators = None 
)
private
Retrieve `properties` from `entity` called `name`.

Arguments:
- `name`: name of entity
- `entity`: type of entity
- `properties`: list of property names
- `filters`: list of filters on properties
- `sub_entity`: type of entity from which to extract the properties;
                defaults to `entity`
- `aggregators`: additional aggregators/filters to amend to query

Definition at line 1080 of file tkal_create_file_lists.py.

References das_client(), find_key(), and join().

Referenced by _get_events(), and get_file_info().

1080  aggregators = None):
1081  """Retrieve `properties` from `entity` called `name`.
1082 
1083  Arguments:
1084  - `name`: name of entity
1085  - `entity`: type of entity
1086  - `properties`: list of property names
1087  - `filters`: list of filters on properties
1088  - `sub_entity`: type of entity from which to extract the properties;
1089  defaults to `entity`
1090  - `aggregators`: additional aggregators/filters to amend to query
1091  """
1092 
1093  if sub_entity is None: sub_entity = entity
1094  if filters is None: filters = []
1095  props = ["{0:s}.{1:s}".format(sub_entity,prop.split()[0])
1096  for prop in properties]
1097  conditions = ["{0:s}.{1:s}".format(sub_entity, filt)
1098  for filt in filters]
1099  add_ons = "" if aggregators is None else " | "+" | ".join(aggregators)
1100 
1101  data = das_client("{0:s} {1:s}={2:s} system=dbs3 detail=True | grep {3:s}{4:s}"
1102  .format(sub_entity, entity, name,
1103  ", ".join(props+conditions), add_ons), sub_entity)
1104  return [[find_key(f[sub_entity], [prop]) for prop in properties] for f in data]
1105 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def find_key(collection, key_chain)
def das_client(query, check_key=None)
def tkal_create_file_lists._make_file_info (   dataset_name_nevents)
private

Definition at line 1118 of file tkal_create_file_lists.py.

References FileInfo, and get_runs().

1118 def _make_file_info(dataset_name_nevents):
1119  return FileInfo(*dataset_name_nevents, runs=get_runs(dataset_name_nevents[1]))
1120 
def _make_file_info(dataset_name_nevents)
def tkal_create_file_lists.das_client (   query,
  check_key = None 
)
Submit `query` to DAS client and handle possible errors.
Further treatment of the output might be necessary.

Arguments:
- `query`: DAS query
- `check_key`: optional key to be checked for; retriggers query if needed

Definition at line 905 of file tkal_create_file_lists.py.

References find_key(), cmssw_das_client.get_data(), edm.print(), print_msg(), and str.

Referenced by _get_events(), _get_properties(), get_datasets(), get_files(), get_max_run(), and get_runs().

905 def das_client(query, check_key = None):
906  """
907  Submit `query` to DAS client and handle possible errors.
908  Further treatment of the output might be necessary.
909 
910  Arguments:
911  - `query`: DAS query
912  - `check_key`: optional key to be checked for; retriggers query if needed
913  """
914 
915  error = True
916  for i in range(5): # maximum of 5 tries
917  try:
918  das_data = cmssw_das_client.get_data(query, limit = 0)
919  except IOError as e:
920  if e.errno == 14: #https://stackoverflow.com/q/36397853/5228524
921  continue
922  except ValueError as e:
923  if str(e) == "No JSON object could be decoded":
924  continue
925 
926  if das_data["status"] == "ok":
927  if das_data["nresults"] == 0 or check_key is None:
928  error = False
929  break
930 
931  result_count = 0
932  for d in find_key(das_data["data"], [check_key]):
933  result_count += len(d)
934  if result_count == 0:
935  das_data["status"] = "error"
936  das_data["reason"] = ("DAS did not return required data.")
937  continue
938  else:
939  error = False
940  break
941 
942  if das_data["status"] == "error":
943  print_msg("DAS query '{}' failed 5 times. "
944  "The last time for the the following reason:".format(query))
945  print(das_data["reason"])
946  sys.exit(1)
947  return das_data["data"]
948 
949 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def print_msg(text, line_break=True, log_file=None)
def find_key(collection, key_chain)
def get_data(query, limit=None, threshold=None, idx=None, host=None, cmd=None)
#define str(s)
def das_client(query, check_key=None)
def tkal_create_file_lists.find_key (   collection,
  key_chain 
)
Searches for `key` in `collection` and returns first corresponding value.

Arguments:
- `collection`: list of dictionaries
- `key_chain`: chain of keys to be searched for

Definition at line 950 of file tkal_create_file_lists.py.

Referenced by _get_events(), _get_properties(), das_client(), get_datasets(), get_files(), and get_runs().

950 def find_key(collection, key_chain):
951  """Searches for `key` in `collection` and returns first corresponding value.
952 
953  Arguments:
954  - `collection`: list of dictionaries
955  - `key_chain`: chain of keys to be searched for
956  """
957 
958  result = None
959  for i,key in enumerate(key_chain):
960  for item in collection:
961  if key in item:
962  if i == len(key_chain) - 1:
963  result = item[key]
964  else:
965  try:
966  result = find_key(item[key], key_chain[i+1:])
967  except LookupError:
968  pass # continue with next `item` in `collection`
969  else:
970  pass # continue with next `item` in `collection`
971 
972  if result is not None: return result
973  raise LookupError(key_chain, collection) # put
974 
975 
def find_key(collection, key_chain)
def tkal_create_file_lists.get_chunks (   long_list,
  chunk_size 
)
Generates list of sub-lists of `long_list` with a maximum size of
`chunk_size`.

Arguments:
- `long_list`: original list
- `chunk_size`: maximum size of created sub-lists

Definition at line 1121 of file tkal_create_file_lists.py.

Referenced by tkal_create_file_lists.FileListCreator._create_dataset_cff().

1121 def get_chunks(long_list, chunk_size):
1122  """
1123  Generates list of sub-lists of `long_list` with a maximum size of
1124  `chunk_size`.
1125 
1126  Arguments:
1127  - `long_list`: original list
1128  - `chunk_size`: maximum size of created sub-lists
1129  """
1130 
1131  for i in range(0, len(long_list), chunk_size):
1132  yield long_list[i:i+chunk_size]
1133 
1134 
def get_chunks(long_list, chunk_size)
def tkal_create_file_lists.get_datasets (   dataset_pattern)
Retrieve list of dataset matching `dataset_pattern`.

Arguments:
- `dataset_pattern`: pattern of dataset names

Definition at line 1034 of file tkal_create_file_lists.py.

References das_client(), and find_key().

1034 def get_datasets(dataset_pattern):
1035  """Retrieve list of dataset matching `dataset_pattern`.
1036 
1037  Arguments:
1038  - `dataset_pattern`: pattern of dataset names
1039  """
1040 
1041  data = das_client("dataset dataset={0:s} system=dbs3 detail=True"
1042  "| grep dataset.name".format(dataset_pattern), "dataset")
1043  return sorted(set([find_key(f["dataset"], ["name"]) for f in data]))
1044 
1045 
def find_key(collection, key_chain)
def get_datasets(dataset_pattern)
def das_client(query, check_key=None)
def tkal_create_file_lists.get_events_per_dataset (   dataset_name)
Retrieve the number of a events in `dataset_name`.

Arguments:
- `dataset_name`: name of a dataset

Definition at line 1046 of file tkal_create_file_lists.py.

References _get_events().

1046 def get_events_per_dataset(dataset_name):
1047  """Retrieve the number of a events in `dataset_name`.
1048 
1049  Arguments:
1050  - `dataset_name`: name of a dataset
1051  """
1052 
1053  return _get_events("dataset", dataset_name)
1054 
1055 
def get_events_per_dataset(dataset_name)
def tkal_create_file_lists.get_events_per_file (   file_name)
Retrieve the number of a events in `file_name`.

Arguments:
- `file_name`: name of a dataset file

Definition at line 1056 of file tkal_create_file_lists.py.

References _get_events().

1056 def get_events_per_file(file_name):
1057  """Retrieve the number of a events in `file_name`.
1058 
1059  Arguments:
1060  - `file_name`: name of a dataset file
1061  """
1062 
1063  return _get_events("file", file_name)
1064 
1065 
def tkal_create_file_lists.get_file_info (   dataset)

Definition at line 1106 of file tkal_create_file_lists.py.

References _get_properties().

1106 def get_file_info(dataset):
1107  result = _get_properties(name=dataset,
1108  properties = ["name", "nevents"],
1109  filters = ["nevents > 0"],
1110  entity = "dataset",
1111  sub_entity = "file")
1112  return [(dataset, name, nevents) for name, nevents in result]
1113 
1114 
1115 
def _get_properties(name, entity, properties, filters=None, sub_entity=None, aggregators=None)
def tkal_create_file_lists.get_files (   dataset_name)
Retrieve list of files in `dataset_name`.

Arguments:
- `dataset_name`: name of the dataset

Definition at line 1021 of file tkal_create_file_lists.py.

References das_client(), and find_key().

1021 def get_files(dataset_name):
1022  """Retrieve list of files in `dataset_name`.
1023 
1024  Arguments:
1025  - `dataset_name`: name of the dataset
1026  """
1027 
1028  data = das_client(("file dataset={0:s} system=dbs3 detail=True | "+
1029  "grep file.name, file.nevents > 0").format(dataset_name),
1030  "file")
1031  return [find_key(f["file"], ["name"]) for f in data]
1032 
1033 
def find_key(collection, key_chain)
def das_client(query, check_key=None)
def tkal_create_file_lists.get_max_run (   dataset_name)
Retrieve the maximum run number in `dataset_name`.

Arguments:
- `dataset_name`: name of the dataset

Definition at line 1009 of file tkal_create_file_lists.py.

References das_client(), and SiStripPI.max.

1009 def get_max_run(dataset_name):
1010  """Retrieve the maximum run number in `dataset_name`.
1011 
1012  Arguments:
1013  - `dataset_name`: name of the dataset
1014  """
1015 
1016  data = das_client("run dataset={0:s} system=dbs3".format(dataset_name))
1017  runs = [f["run"][0]["run_number"] for f in data]
1018  return max(runs)
1019 
1020 
def das_client(query, check_key=None)
def tkal_create_file_lists.get_runs (   file_name)
Try to guess the run number from `file_name`. If run could not be
determined, gets the run numbers from DAS (slow!)

Arguments:
- `file_name`: name of the considered file

Definition at line 994 of file tkal_create_file_lists.py.

References das_client(), find_key(), createfilelist.int, and join().

Referenced by _make_file_info().

994 def get_runs(file_name):
995  """
996  Try to guess the run number from `file_name`. If run could not be
997  determined, gets the run numbers from DAS (slow!)
998 
999  Arguments:
1000  - `file_name`: name of the considered file
1001  """
1002  try:
1003  return [int("".join(file_name.split("/")[-4:-2]))]
1004  except ValueError:
1005  query = "run file="+file_name+" system=dbs3"
1006  return [int(_) for _ in find_key(das_client(query), ["run", "run_number"])]
1007 
1008 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def find_key(collection, key_chain)
def das_client(query, check_key=None)
def tkal_create_file_lists.main (   argv = None)
Main routine. Not called, if this module is loaded via `import`.

Arguments:
- `argv`: Command line arguments passed to the script.

Definition at line 29 of file tkal_create_file_lists.py.

29 def main(argv = None):
30  """
31  Main routine. Not called, if this module is loaded via `import`.
32 
33  Arguments:
34  - `argv`: Command line arguments passed to the script.
35  """
36 
37  if argv == None:
38  argv = sys.argv[1:]
39 
40  file_list_creator = FileListCreator(argv)
41  file_list_creator.create()
42 
43 
def tkal_create_file_lists.merge_strings (   strings)
Merge strings in `strings` into a common string.

Arguments:
- `strings`: list of strings

Definition at line 1135 of file tkal_create_file_lists.py.

References str.

1135 def merge_strings(strings):
1136  """Merge strings in `strings` into a common string.
1137 
1138  Arguments:
1139  - `strings`: list of strings
1140  """
1141 
1142  if type(strings) == str:
1143  return strings
1144  elif len(strings) == 0:
1145  return ""
1146  elif len(strings) == 1:
1147  return strings[0]
1148  elif len(strings) == 2:
1149  first = strings[0]
1150  second = strings[1]
1151  else:
1152  first = merge_strings(strings[:-1])
1153  second = strings[-1]
1154 
1155  merged_string = ""
1156  blocks = difflib.SequenceMatcher(None, first, second).get_matching_blocks()
1157 
1158  last_i, last_j, last_n = 0, 0, 0
1159  for i, j, n in blocks:
1160  merged_string += first[last_i+last_n:i]
1161  merged_string += second[last_j+last_n:j]
1162  merged_string += first[i:i+n]
1163  last_i, last_j, last_n = i, j, n
1164 
1165  return str(merged_string)
1166 
1167 
#define str(s)
def tkal_create_file_lists.print_msg (   text,
  line_break = True,
  log_file = None 
)
Formatted printing of `text`.

Arguments:
- `text`: string to be printed

Definition at line 976 of file tkal_create_file_lists.py.

References edm.print(), and str.

Referenced by tkal_create_file_lists.FileListCreator._create_dataset_cff(), tkal_create_file_lists.FileListCreator._create_dataset_ini_section(), tkal_create_file_lists.FileListCreator._create_dataset_txt(), tkal_create_file_lists.FileListCreator._create_hippy_txt(), tkal_create_file_lists.FileListCreator._create_json_file(), tkal_create_file_lists.FileListCreator._get_track_collection(), tkal_create_file_lists.FileListCreator._print_eventcounts(), tkal_create_file_lists.FileListCreator._request_dataset_information(), tkal_create_file_lists.FileListCreator._validate_input(), tkal_create_file_lists.FileListCreator._write_file_lists(), das_client(), tkal_create_file_lists._DasCache.dump(), and tkal_create_file_lists._DasCache.load().

976 def print_msg(text, line_break = True, log_file = None):
977  """Formatted printing of `text`.
978 
979  Arguments:
980  - `text`: string to be printed
981  """
982 
983  msg = " >>> " + str(text)
984  if line_break:
985  print(msg)
986  else:
987  print(msg, end=' ')
988  sys.stdout.flush()
989  if log_file:
990  with open(log_file, "a") as f: f.write(msg+"\n")
991  return msg
992 
993 
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:66
def print_msg(text, line_break=True, log_file=None)
#define str(s)

Variable Documentation

tkal_create_file_lists.FileInfo = collections.namedtuple("FileInfo", "dataset name nevents runs")