CMS 3D CMS Logo

hdf5Writer.py
Go to the documentation of this file.
1 import h5py
2 import zlib
3 import lzma
4 import numpy as np
5 
6 #The file structure
7 #
8 # "format_version" - Attribute says which version of the file format was used
9 # "default_payload_compressor" - Attribute name of compressor used for the payloads
10 #
11 # "Records"- Group
12 # <Record> - Group name is the EventSetup record name
13 # "DataProducts" - Group
14 # <data product> - Group name is the '<type>@<label>' combination
15 # "type" - Attribute, the C++ canonical type name
16 # "Payloads" - Group
17 # <payload> - DataSet name is hash used in DB
18 # "memsize" = Attribute bytes needed after decompression
19 # "type" = Attribute the actual type stored (for polymorphism)
20 # "Tags" - Group
21 # <tag> - Group name is
22 # same as DB if only one data product is in the tag
23 # a hybrid name formed from the different DB tags it merged
24 # "products" - Attribute, list of the data products used in the order they appear in "payload"
25 # "time_type" - Attribute, either 'run_lumi' or 'time'
26 # "db_tags" - Attribute the list of DB tags that were combine
27 # "record" - Attribute name of the record to which the tag is associated (optimizes readback)
28 # "first" - DataSet holds the beginning IOVSyncValue for the IOVs
29 # "last" - DataSet holds the end IOVSyncValue for the IOVS
30 # "payload" - DataSet references to the payloads for this IOV for each data product
31 #
32 # "GlobalTags" - Group
33 # <global tag> - Group name is the global tag name
34 # "Tags" - DataSet holds references to the tags
35 
36 
37 def writeTagImpl(tagsGroup, name, recName, time_type, IOV_payloads, payloadToRefs, productNames, originalTagNames):
38  tagGroup = tagsGroup.create_group(name)
39  tagGroup.attrs["time_type"] = time_type.encode("ascii")
40  tagGroup.attrs["db_tags"] = [x.encode("ascii") for x in originalTagNames]
41  tagGroup.attrs["record"] = recName.encode("ascii")
42  tagGroup.attrs['products'] = [x.encode("ascii") for x in productNames]
43  firstValues = [x[0] for x in IOV_payloads]
44  lastValues = [x[1] for x in IOV_payloads]
45  syncValueType = np.dtype([("high", np.uint32),("low", np.uint32)])
46  first_np = np.empty(shape=(len(IOV_payloads),), dtype=syncValueType)
47  first_np['high'] = [ x.high for x in firstValues]
48  first_np['low'] = [ x.low for x in firstValues]
49  last_np = np.empty(shape=(len(lastValues),), dtype=syncValueType)
50  last_np['high'] = [ x.high for x in lastValues]
51  last_np['low'] = [ x.low for x in lastValues]
52  #tagGroup.create_dataset("first",data=np.array(firstValues), dtype=syncValueType)
53  #tagGroup.create_dataset("last", data=np.array(lastValues),dtype=syncValueType)
54  payloads = [ [ payloadToRefs[y] for y in x[2]] for x in IOV_payloads]
55  compressor = None
56  if len(first_np) > 100:
57  compressor = 'gzip'
58  tagGroup.create_dataset("first",data=first_np, compression = compressor)
59  tagGroup.create_dataset("last",data=last_np, compression = compressor)
60  tagGroup.create_dataset("payload", data=payloads, dtype=h5py.ref_dtype, compression = compressor)
61  return tagGroup.ref
62 
63 
64 def writeTag(tagsGroup, time_type, IOV_payloads, payloadToRefs, originalTagNames, recName, productNames):
65  name = originalTagNames[0]
66  if len(originalTagNames) != 1:
67  name = name+"@joined"
68  return writeTagImpl(tagsGroup, name, recName, time_type, IOV_payloads, payloadToRefs, productNames, originalTagNames)
69 
70 def writeH5File(fileName, globalTags, excludeRecords, includeRecords, tagReader, compressorName):
71  #what are key lists??? They seem to hold objects of type 'cond::persistency::KeyList'
72  # and have their own proxy type
73  keyListRecords = set(["ExDwarfListRcd", "DTKeyedConfigListRcd", "DTKeyedConfigContainerRcd"])
74 
75  default_compressor_name = compressorName
76  print(default_compressor_name)
77  default_compressor = None
78  if default_compressor_name == "zlib":
79  default_compressor = zlib
80  elif default_compressor_name == "lzma":
81  default_compressor = lzma
82  with h5py.File(fileName, 'w') as h5file:
83  h5file.attrs["file_format"] = 1
84  h5file.attrs["default_payload_compressor"] = default_compressor_name.encode("ascii")
85  recordsGroup = h5file.create_group("Records")
86  globalTagsGroup = h5file.create_group("GlobalTags")
87  null_dataset = h5file.create_dataset("null_payload", data=np.array([], dtype='b') )
88  tagGroupRefs = []
89 
90  for name in globalTags:
91  gt = tagReader(name)
92  for tag in gt.tags():
93  rcd = tag.record()
94  if rcd in keyListRecords:
95  continue
96  if rcd in excludeRecords:
97  continue
98  if includeRecords and (not rcd in includeRecords):
99  continue
100  recordDataSize = 0
101 
102  payloadToRefs = { None: null_dataset.ref}
103 
104  recordGroup = recordsGroup.create_group(rcd)
105  tagsGroup = recordGroup.create_group("Tags")
106  dataProductsGroup = recordGroup.create_group("DataProducts")
107  print("record: %s"%rcd)
108  productNames = []
109  for dataProduct in tag.dataProducts():
110  productNames.append(dataProduct.name())
111  dataProductGroup = dataProductsGroup.create_group(dataProduct.name())
112  dataProductGroup.attrs["type"] = dataProduct.objtype().encode("ascii")
113  payloadsGroup = dataProductGroup.create_group("Payloads")
114  print(" product: %s"%dataProduct.name())
115  for p_index, payload in enumerate(dataProduct.payloads()):
116  print(" %i payload: %s size: %i"%(p_index,payload.name(),len(payload.data())))
117  recordDataSize +=len(payload.data())
118  if default_compressor:
119  b = default_compressor.compress(payload.data())
120  if len(b) >= len(payload.data()):
121  #compressing isn't helping
122  b = payload.data()
123  else:
124  b = payload.data()
125  pl = payloadsGroup.create_dataset(payload.name(), data=np.frombuffer(b,dtype='b'))
126  pl.attrs["memsize"] = len(payload.data())
127  pl.attrs["type"] = payload.actualType()
128  payloadToRefs[payload.name()] = pl.ref
129 
130  tagGroupRefs.append(writeTag(tagsGroup, tag.time_type(), tag.iovsNPayloadNames(), payloadToRefs, tag.originalTagNames(), rcd, productNames))
131  print(" total size:",recordDataSize)
132  recordDataSize = 0
133 
134  globalTagGroup = globalTagsGroup.create_group(name)
135  globalTagGroup.create_dataset("Tags", data=tagGroupRefs, dtype=h5py.ref_dtype)
def writeH5File(fileName, globalTags, excludeRecords, includeRecords, tagReader, compressorName)
Definition: hdf5Writer.py:70
def writeTag(tagsGroup, time_type, IOV_payloads, payloadToRefs, originalTagNames, recName, productNames)
Definition: hdf5Writer.py:64
def writeTagImpl(tagsGroup, name, recName, time_type, IOV_payloads, payloadToRefs, productNames, originalTagNames)
Definition: hdf5Writer.py:37
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:47
def encode(args, files)