CMS 3D CMS Logo

data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 from __future__ import print_function
7 from __future__ import absolute_import
8 
9 import json
10 
11 # data_source will extend this
12 class node(object):
13 
14  _data = None
15  _child_nodes = None
16  def __init__(self, data=None):
17  self._data = data
18  self._child_nodes = []
19 
20  def data(self):
21  return self._data
22 
23  def add_child(self, node_data):
24  new_node = node(node_data)
25  self._child_nodes.append(new_node)
26 
27  def children(self):
28  return self._child_nodes
29 
30  def child(self, index):
31  return self.children()[index]
32 
33  def left_child(self):
34  return self.children()[0]
35 
36  def right_child(self):
37  return self.children()[1]
38 
39  def is_leaf(self):
40  return len(self.children()) == 0
41 
42  def __str__(self):
43  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
44 
46 
47  def __init__(self):
48  pass
49 
50  def get_data(self):
51  return []
52 
53  def __repr__(self):
54  return "<data_source>"
55 
56 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
58 
59  # sub_data is the current subtree of the json data
60  # sub_data is used for chaining navigation methods
61  # Note: _data is defined since data_source extends node, but defining it here for convenience
62  _data, _sub_data, _file_name = None, None, None
63  def __init__(self, json_file_name):
64  # read the file, then parse into JSON object
65  self._file_name = json_file_name
66  with open(self._file_name, "r") as handle:
67  contents = "".join(handle.readlines())
68  data = json.loads(contents)
69  self._data = data
70  self._sub_data = data
71 
72  def data(self):
73  return json_data_node.make(self._data)
74 
75  def raw(self):
76  return self._data
77 
78  def __str__(self):
79  return self.__repr__()
80 
82  _data, _sub_data, _file_name = None, None, None
83  def __init__(self, sqlite_file_name):
84  self._file_name = sqlite_file_name
85  # import sqlite3 and connect to the database file
86  import sqlite3
87  connection = sqlite3.connect(self._file_name)
88  cursor = connection.cursor()
89  if query_object == None:
90  # try to query the file to get table and column data
91  tables = cursor.execute("select name from sqlite_master where type = 'table'")
92 
93  # now build a mapping of tables to columns - with a dictionary
94  table_to_columns = {}
95  for table in tables.fetchall():
96  table_to_columns[table[0]] = []
97  # now query columns for this table
98  columns = cursor.execute("pragma table_info(%s)" % table[0])
99  for column in columns.fetchall():
100  table_to_columns[table[0]].append(str(column[1]))
101 
102  # now query with the mapping
103  table_to_data = {}
104  for table in table_to_columns:
105  # query with all columns
106  column_string = ",".join(table_to_columns[table])
107  sql_query = "select %s from %s" % (column_string, table)
108  results = cursor.execute(sql_query).fetchall()
109  for n in range(0, len(results)):
110  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
111  table_to_data[str(table)] = results
112  self._data = json_data_node.make(table_to_data)
113  else:
114  sql_query = query_object.to_sql()
115 
116  def data(self):
117  return self._data
118 
119 # used for chaining json-navigation methods
120 # when a method is called initially on the data, an object of this class is returned,
121 # then the methods on that object return an object of this class again.
123 
124  _data = None
125  def __init__(self, data=None):
126  self._data = data
127 
128  # use this instead of having to decide on which kind of json node should
129  # be created in code that shouldn't be doing it.
130  @staticmethod
131  def make(data):
132  if isinstance(data, list):
133  return json_list(data)
134  elif isinstance(data, dict):
135  return json_dict(data)
136  else:
137  return json_basic(data)
138 
139  def data(self):
140  return self._data
141 
142  def raw(self):
143  return self._data
144 
145  def get(self, *args):
146  current_json_node = self
147  if len(args) == 1:
148  data_to_use = current_json_node.data()[args[0]]
149  return json_data_node.make(data_to_use)
150  for key in args:
151  current_json_node = current_json_node.get(key)
152  return current_json_node
153 
154  def set(self, data):
155  self._data = data
156  return self
157 
158  def find(self, type_name):
159  # traverse json_data_node structure, and find all lists
160  # if this node in the structure is a list, return all sub lists
161  lists = []
162  if isinstance(self._data, type_name):
163  lists.append(self._data)
164  if isinstance(self._data, list):
165  for item in self._data:
166  lists += json_data_node.make(item).find(type_name)
167  elif isinstance(self._data, dict):
168  for key in self._data:
169  lists += json_data_node.make(self._data[key]).find(type_name)
170  return lists
171 
172  def __str__(self):
173  return "<json_data_node data='%s'>" % str(self._data)
174 
176 
177  iterator_index = None
178 
179  def __init__(self, data=None):
180  self._data = data if data != None else []
181  self.iterator_index = 0
182 
183  def first(self):
184  data = self.get(0)
185  return data
186 
187  def last(self):
188  data = self.get(len(self.data())-1)
189  return data
190 
191  def add_child(self, data):
192  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
193  data = data.data()
194  self._data.append(data)
195 
196  # iterator methods
197 
198  def __iter__(self):
199  return self
200 
201  def next(self):
202  if self.iterator_index > len(self._data)-1:
203  self.reset()
204  raise StopIteration
205  else:
206  self.iterator_index += 1
207  return self._data[self.iterator_index-1]
208 
209  def reset(self):
210  self.iterator_index = 0
211 
212  # misc methods
213 
214  def indices(self, *indices):
215  final_list = []
216  for index in indices:
217  try:
218  index = int(index)
219  try:
220  final_list.append(self.get(index).data())
221  except Exception:
222  # index didn't exist
223  pass
224  except Exception:
225  return
226  return json_data_node.make(final_list)
227 
228  def get_members(self, member_name):
229  # assume self.data() is a list
230  if not(type(member_name) in [str, unicode]):
231  raise TypeError("Value given for member name must be a string.")
232  type_of_first_item = self.data()[0].__class__
233  for item in self.data():
234  if item.__class__ != type_of_first_item:
235  return None
236  return json_data_node.make(map(lambda item : getattr(item, member_name), self.data()))
237 
238  # format methods
239 
240  def as_dicts(self, convert_timestamps=False):
241 
242  if len(self.data()) == 0:
243  print("\nNo data to convert to dictionaries.\n")
244  return
245 
246  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "Tag", "IOV", "Payload"]:
247  # copy data
248  new_data = map(lambda item : item.as_dicts(convert_timestamps=convert_timestamps), [item for item in self.data()])
249  return new_data
250  else:
251  print("Data in json_list was not the correct type.")
252 
253 
254  # return ascii version of data
255  # expects array of dicts
256  # fit is a list of columns that should be kept at their full size
257  # col_width is the column width to be used as a guide
258  def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False):
259 
260  if len(self.data()) == 0:
261  print("\nNo data to draw table with.\n")
262  return
263 
264  from . import models
265  models_dict = models.generate()
266 
267  # if the list contains ORM objects, then convert them all to dictionaries,
268  # otherwise, leave the list as it is - assume it is already a list of dictionaries
269  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
270 
271  from .data_formats import _objects_to_dicts
272  data = _objects_to_dicts(self.data()).data()
273 
274  from .querying import connection
275  table_name = models.class_name_to_column(self.get(0).data().__class__).upper()
276  # set headers to those found in ORM models
277  # do it like this so we copy the headers
278  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
279  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
280  else:
281  table_name = None
282  data = self.data()
283  # gets headers stored in first dictionary
284  headers = data[0].keys()
285 
286  if columns != None:
287  headers = columns
288 
289  if row_nums:
290  headers = ["row"] + headers
291 
292  # append an extra column to all rows of data, as well
293  for i, item in enumerate(data):
294  data[i]["row"] = str(i)
295 
296  if fit == ["all"]:
297  fit = headers
298 
299  if col_width == None:
300  import subprocess
301  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
302  col_width = int(table_width/len(headers))
303 
304  if hide != None:
305  for n in range(0, len(hide)):
306  del headers[headers.index(hide[n])]
307 
308  def max_width_of_column(column, data):
309  max_width_found = len(str(data[0][column]))
310  for item in data:
311  current_width = len(str(item[column]))
312  if current_width > max_width_found:
313  max_width_found = current_width
314  if max_width_found > len(column):
315  return max_width_found
316  else:
317  return len(column)
318 
319  def cell(content, header, col_width, fit):
320  if fit:
321  col_width_with_padding = col_width+2
322  col_width_substring = len(str(content))
323  else:
324  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
325  col_width_substring = col_width-5 if col_width-7 > 0 else 1
326  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
327  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
328 
329  column_to_width = {}
330 
331  if fit != headers:
332 
333  # get the column widths of fited columns
334  surplus_width = 0
335  for column in fit:
336 
337  if not(column in headers):
338  print("'%s' is not a valid column." % column)
339  return
340 
341  column_to_width[column] = max_width_of_column(column, data)
342  surplus_width += column_to_width[column]-col_width
343 
344  if len(set(headers)-set(fit)) != 0:
345  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
346  else:
347  non_fited_width_surplus = 0
348 
349  for column in headers:
350  if not(column in fit):
351  column_to_width[column] = col_width - non_fited_width_surplus
352  else:
353  for column in headers:
354  column_to_width[column] = max_width_of_column(column, data)
355 
356  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
357  for header in headers:
358  ascii_string += cell(header, header, column_to_width[header], header in fit)
359  ascii_string += "\n"
360  horizontal_border = "\n"
361  ascii_string += horizontal_border
362  for item in data:
363  for n in range(0, len(headers)):
364  entry = item[headers[n]]
365  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
366  ascii_string += "\n"
367  #ascii_string += "\n"
368  ascii_string += horizontal_border
369  ascii_string += "Showing %d rows\n\n" % len(data)
370  print(ascii_string)
371 
373 
374  def __init__(self, data=None):
375  self._data = data if data != None else {}
376 
377  def add_key(self, data, key):
378  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
379  data = data.data()
380  self._data[key] = data
381 
382 # for strings, integers, etc
384 
385  def __init__(self, data=None):
386  self._data = data if data != None else ""
data_sources.json_data_node.__str__
def __str__(self)
Definition: data_sources.py:172
data_sources.json_file.data
def data(self)
Definition: data_sources.py:72
FastTimerService_cff.range
range
Definition: FastTimerService_cff.py:34
resolutioncreator_cfi.object
object
Definition: resolutioncreator_cfi.py:4
data_sources.node._data
_data
Definition: data_sources.py:17
data_sources.json_data_node._data
_data
Definition: data_sources.py:126
data_sources.json_data_node.data
def data(self)
Definition: data_sources.py:139
data_sources.json_list.__iter__
def __iter__(self)
Definition: data_sources.py:198
data_sources.json_file.raw
def raw(self)
Definition: data_sources.py:75
data_sources.sqlite_schema
Definition: data_sources.py:81
data_sources.json_data_node.make
def make(data)
Definition: data_sources.py:131
join
static std::string join(char **cmd)
Definition: RemoteFile.cc:17
data_sources.node.children
def children(self)
Definition: data_sources.py:27
data_sources.json_dict.__init__
def __init__(self, data=None)
Definition: data_sources.py:374
models.generate
def generate(map_blobs=False, class_name=None)
Definition: models.py:189
relativeConstraints.keys
keys
Definition: relativeConstraints.py:89
data_sources.sqlite_schema.__init__
def __init__(self, sqlite_file_name)
Definition: data_sources.py:83
data_sources.sqlite_schema._file_name
_file_name
Definition: data_sources.py:84
data_sources.json_list.as_dicts
def as_dicts(self, convert_timestamps=False)
Definition: data_sources.py:240
data_sources.node.data
def data(self)
Definition: data_sources.py:20
data_sources.json_file.__str__
def __str__(self)
Definition: data_sources.py:78
data_sources.json_data_node.find
def find(self, type_name)
Definition: data_sources.py:158
data_sources.sqlite_schema.data
def data(self)
Definition: data_sources.py:116
submitPVValidationJobs.split
def split(sequence, size)
Definition: submitPVValidationJobs.py:352
data_sources.json_file._sub_data
_sub_data
Definition: data_sources.py:70
str
#define str(s)
Definition: TestProcessor.cc:51
data_sources.json_list.reset
def reset(self)
Definition: data_sources.py:209
data_sources.json_file._file_name
_file_name
Definition: data_sources.py:65
data_sources.json_list.first
def first(self)
Definition: data_sources.py:183
data_sources.json_data_node.get
def get(self, *args)
Definition: data_sources.py:145
data_sources.node._child_nodes
_child_nodes
Definition: data_sources.py:18
data_sources.json_list.indices
def indices(self, *indices)
Definition: data_sources.py:214
data_sources.node.add_child
def add_child(self, node_data)
Definition: data_sources.py:23
data_sources.data_source
Definition: data_sources.py:45
data_sources.json_data_node.set
def set(self, data)
Definition: data_sources.py:154
print
void print(TMatrixD &m, const char *label=nullptr, bool mathematicaFormat=false)
Definition: Utilities.cc:46
data_sources.json_list.last
def last(self)
Definition: data_sources.py:187
mps_setup.append
append
Definition: mps_setup.py:85
data_sources.json_list.__init__
def __init__(self, data=None)
Definition: data_sources.py:179
data_sources.node.right_child
def right_child(self)
Definition: data_sources.py:36
createfilelist.int
int
Definition: createfilelist.py:10
data_sources.json_data_node
Definition: data_sources.py:122
data_sources.json_basic.__init__
def __init__(self, data=None)
Definition: data_sources.py:385
data_sources.data_source.get_data
def get_data(self)
Definition: data_sources.py:50
ComparisonHelper::zip
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
Definition: L1TStage2CaloLayer1.h:41
data_sources.json_data_node.raw
def raw(self)
Definition: data_sources.py:142
data_sources.node.child
def child(self, index)
Definition: data_sources.py:30
data_sources.json_dict.add_key
def add_key(self, data, key)
Definition: data_sources.py:377
data_sources.data_source.__init__
def __init__(self)
Definition: data_sources.py:47
data_sources.node.__init__
def __init__(self, data=None)
Definition: data_sources.py:16
models.class_name_to_column
def class_name_to_column(cls)
Definition: models.py:59
data_sources.node.is_leaf
def is_leaf(self)
Definition: data_sources.py:39
data_sources.json_list.as_table
def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False)
Definition: data_sources.py:258
data_sources.json_list.get_members
def get_members(self, member_name)
Definition: data_sources.py:228
data_formats._objects_to_dicts
def _objects_to_dicts(data)
Definition: data_formats.py:97
data_sources.json_list
Definition: data_sources.py:175
data_sources.json_list.iterator_index
iterator_index
Definition: data_sources.py:181
format
pileupCalc.upper
upper
Definition: pileupCalc.py:214
data_sources.data_source.__repr__
def __repr__(self)
Definition: data_sources.py:53
data_sources.json_file.__init__
def __init__(self, json_file_name)
Definition: data_sources.py:63
genParticles_cff.map
map
Definition: genParticles_cff.py:11
data_sources.json_data_node.__init__
def __init__(self, data=None)
Definition: data_sources.py:125
data_sources.json_file
Definition: data_sources.py:57
data_sources.node.left_child
def left_child(self)
Definition: data_sources.py:33
data_sources.json_basic
Definition: data_sources.py:383
data_sources.json_list.add_child
def add_child(self, data)
Definition: data_sources.py:191
data_sources.json_dict
Definition: data_sources.py:372
data_sources.node
Definition: data_sources.py:12
data_sources.json_list.next
def next(self)
Definition: data_sources.py:201
data_sources.node.__str__
def __str__(self)
Definition: data_sources.py:42
python.rootplot.root2matplotlib.replace
def replace(string, replacements)
Definition: root2matplotlib.py:444