CMS 3D CMS Logo

data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 
7 import json
8 
9 # data_source will extend this
10 class node(object):
11 
12  _data = None
13  _child_nodes = None
14  def __init__(self, data=None):
15  self._data = data
16  self._child_nodes = []
17 
18  def data(self):
19  return self._data
20 
21  def add_child(self, node_data):
22  new_node = node(node_data)
23  self._child_nodes.append(new_node)
24 
25  def children(self):
26  return self._child_nodes
27 
28  def child(self, index):
29  return self.children()[index]
30 
31  def left_child(self):
32  return self.children()[0]
33 
34  def right_child(self):
35  return self.children()[1]
36 
37  def is_leaf(self):
38  return len(self.children()) == 0
39 
40  def __str__(self):
41  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
42 
44 
45  def __init__(self):
46  pass
47 
48  def get_data(self):
49  return []
50 
51  def __repr__(self):
52  return "<data_source>"
53 
54 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
56 
57  # sub_data is the current subtree of the json data
58  # sub_data is used for chaining navigation methods
59  # Note: _data is defined since data_source extends node, but defining it here for convenience
60  _data, _sub_data, _file_name = None, None, None
61  def __init__(self, json_file_name):
62  # read the file, then parse into JSON object
63  self._file_name = json_file_name
64  with open(self._file_name, "r") as handle:
65  contents = "".join(handle.readlines())
66  data = json.loads(contents)
67  self._data = data
68  self._sub_data = data
69 
70  def data(self):
71  return json_data_node.make(self._data)
72 
73  def raw(self):
74  return self._data
75 
76  def __str__(self):
77  return self.__repr__()
78 
80  _data, _sub_data, _file_name = None, None, None
81  def __init__(self, sqlite_file_name):
82  self._file_name = sqlite_file_name
83  # import sqlite3 and connect to the database file
84  import sqlite3
85  connection = sqlite3.connect(self._file_name)
86  cursor = connection.cursor()
87  if query_object == None:
88  # try to query the file to get table and column data
89  tables = cursor.execute("select name from sqlite_master where type = 'table'")
90 
91  # now build a mapping of tables to columns - with a dictionary
92  table_to_columns = {}
93  for table in tables.fetchall():
94  table_to_columns[table[0]] = []
95  # now query columns for this table
96  columns = cursor.execute("pragma table_info(%s)" % table[0])
97  for column in columns.fetchall():
98  table_to_columns[table[0]].append(str(column[1]))
99 
100  # now query with the mapping
101  table_to_data = {}
102  for table in table_to_columns:
103  # query with all columns
104  column_string = ",".join(table_to_columns[table])
105  sql_query = "select %s from %s" % (column_string, table)
106  results = cursor.execute(sql_query).fetchall()
107  for n in range(0, len(results)):
108  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
109  table_to_data[str(table)] = results
110  self._data = json_data_node.make(table_to_data)
111  else:
112  sql_query = query_object.to_sql()
113 
114  def data(self):
115  return self._data
116 
117 # used for chaining json-navigation methods
118 # when a method is called initially on the data, an object of this class is returned,
119 # then the methods on that object return an object of this class again.
121 
122  _data = None
123  def __init__(self, data=None):
124  self._data = data
125 
126  # use this instead of having to decide on which kind of json node should
127  # be created in code that shouldn't be doing it.
128  @staticmethod
129  def make(data):
130  if type(data) == list:
131  return json_list(data)
132  elif type(data) == dict:
133  return json_dict(data)
134  else:
135  return json_basic(data)
136 
137  def data(self):
138  return self._data
139 
140  def raw(self):
141  return self._data
142 
143  def get(self, *args):
144  current_json_node = self
145  if len(args) == 1:
146  data_to_use = current_json_node.data()[args[0]]
147  return json_data_node.make(data_to_use)
148  for key in args:
149  current_json_node = current_json_node.get(key)
150  return current_json_node
151 
152  def set(self, data):
153  self._data = data
154  return self
155 
156  def find(self, type_name):
157  # traverse json_data_node structure, and find all lists
158  # if this node in the structure is a list, return all sub lists
159  lists = []
160  if type(self._data) == type_name:
161  lists.append(self._data)
162  if type(self._data) == list:
163  for item in self._data:
164  lists += json_data_node.make(item).find(type_name)
165  elif type(self._data) == dict:
166  for key in self._data:
167  lists += json_data_node.make(self._data[key]).find(type_name)
168  return lists
169 
170  def __str__(self):
171  return "<json_data_node data='%s'>" % str(self._data)
172 
174 
175  iterator_index = None
176 
177  def __init__(self, data=None):
178  self._data = data if data != None else []
179  self.iterator_index = 0
180 
181  def first(self):
182  data = self.get(0)
183  return data
184 
185  def last(self):
186  data = self.get(len(self.data())-1)
187  return data
188 
189  def add_child(self, data):
190  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
191  data = data.data()
192  self._data.append(data)
193 
194  # iterator methods
195 
196  def __iter__(self):
197  return self
198 
199  def next(self):
200  if self.iterator_index > len(self._data)-1:
201  self.reset()
202  raise StopIteration
203  else:
204  self.iterator_index += 1
205  return self._data[self.iterator_index-1]
206 
207  def reset(self):
208  self.iterator_index = 0
209 
210  # misc methods
211 
212  def indices(self, *indices):
213  final_list = []
214  for index in indices:
215  try:
216  index = int(index)
217  try:
218  final_list.append(self.get(index).data())
219  except Exception:
220  # index didn't exist
221  pass
222  except Exception:
223  return
224  return json_data_node.make(final_list)
225 
226  def get_members(self, member_name):
227  # assume self.data() is a list
228  if not(type(member_name) in [str, unicode]):
229  raise TypeError("Value given for member name must be a string.")
230  type_of_first_item = self.data()[0].__class__
231  for item in self.data():
232  if item.__class__ != type_of_first_item:
233  return None
234  return json_data_node.make(map(lambda item : getattr(item, member_name), self.data()))
235 
236  # format methods
237 
238  def as_dicts(self, convert_timestamps=False):
239 
240  if len(self.data()) == 0:
241  print("\nNo data to convert to dictionaries.\n")
242  return
243 
244  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "Tag", "IOV", "Payload"]:
245  # copy data
246  new_data = map(lambda item : item.as_dicts(convert_timestamps=convert_timestamps), [item for item in self.data()])
247  return new_data
248  else:
249  print("Data in json_list was not the correct type.")
250 
251 
252  # return ascii version of data
253  # expects array of dicts
254  # fit is a list of columns that should be kept at their full size
255  # col_width is the column width to be used as a guide
256  def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False):
257 
258  if len(self.data()) == 0:
259  print("\nNo data to draw table with.\n")
260  return
261 
262  import models
263  models_dict = models.generate()
264 
265  # if the list contains ORM objects, then convert them all to dictionaries,
266  # otherwise, leave the list as it is - assume it is already a list of dictionaries
267  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
268 
269  from data_formats import _objects_to_dicts
270  data = _objects_to_dicts(self.data()).data()
271 
272  from querying import connection
273  table_name = models.class_name_to_column(self.get(0).data().__class__).upper()
274  # set headers to those found in ORM models
275  # do it like this so we copy the headers
276  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
277  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
278  else:
279  table_name = None
280  data = self.data()
281  # gets headers stored in first dictionary
282  headers = data[0].keys()
283 
284  if columns != None:
285  headers = columns
286 
287  if row_nums:
288  headers = ["row"] + headers
289 
290  # append an extra column to all rows of data, as well
291  for i, item in enumerate(data):
292  data[i]["row"] = str(i)
293 
294  if fit == ["all"]:
295  fit = headers
296 
297  if col_width == None:
298  import subprocess
299  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
300  col_width = int(table_width/len(headers))
301 
302  if hide != None:
303  for n in range(0, len(hide)):
304  del headers[headers.index(hide[n])]
305 
306  def max_width_of_column(column, data):
307  max_width_found = len(str(data[0][column]))
308  for item in data:
309  current_width = len(str(item[column]))
310  if current_width > max_width_found:
311  max_width_found = current_width
312  if max_width_found > len(column):
313  return max_width_found
314  else:
315  return len(column)
316 
317  def cell(content, header, col_width, fit):
318  if fit:
319  col_width_with_padding = col_width+2
320  col_width_substring = len(str(content))
321  else:
322  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
323  col_width_substring = col_width-5 if col_width-7 > 0 else 1
324  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
325  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
326 
327  column_to_width = {}
328 
329  if fit != headers:
330 
331  # get the column widths of fited columns
332  surplus_width = 0
333  for column in fit:
334 
335  if not(column in headers):
336  print("'%s' is not a valid column." % column)
337  return
338 
339  column_to_width[column] = max_width_of_column(column, data)
340  surplus_width += column_to_width[column]-col_width
341 
342  if len(set(headers)-set(fit)) != 0:
343  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
344  else:
345  non_fited_width_surplus = 0
346 
347  for column in headers:
348  if not(column in fit):
349  column_to_width[column] = col_width - non_fited_width_surplus
350  else:
351  for column in headers:
352  column_to_width[column] = max_width_of_column(column, data)
353 
354  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
355  for header in headers:
356  ascii_string += cell(header, header, column_to_width[header], header in fit)
357  ascii_string += "\n"
358  horizontal_border = "\n"
359  ascii_string += horizontal_border
360  for item in data:
361  for n in range(0, len(headers)):
362  entry = item[headers[n]]
363  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
364  ascii_string += "\n"
365  #ascii_string += "\n"
366  ascii_string += horizontal_border
367  ascii_string += "Showing %d rows\n\n" % len(data)
368  print ascii_string
369 
371 
372  def __init__(self, data=None):
373  self._data = data if data != None else {}
374 
375  def add_key(self, data, key):
376  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
377  data = data.data()
378  self._data[key] = data
379 
380 # for strings, integers, etc
382 
383  def __init__(self, data=None):
384  self._data = data if data != None else ""
def add_child(self, node_data)
Definition: data_sources.py:21
def __str__(self)
Definition: data_sources.py:40
def child(self, index)
Definition: data_sources.py:28
def right_child(self)
Definition: data_sources.py:34
def __init__(self, data=None)
def left_child(self)
Definition: data_sources.py:31
def get_members(self, member_name)
def replace(string, replacements)
def add_key(self, data, key)
def children(self)
Definition: data_sources.py:25
def as_dicts(self, convert_timestamps=False)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False)
def generate(map_blobs=False, class_name=None)
Definition: models.py:187
def class_name_to_column(cls)
Definition: models.py:57
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def __init__(self, data=None)
def find(self, type_name)
def __init__(self, json_file_name)
Definition: data_sources.py:61
def __init__(self, data=None)
Definition: data_sources.py:14
def __init__(self, data=None)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __init__(self, data=None)
def is_leaf(self)
Definition: data_sources.py:37
def _objects_to_dicts(data)
Definition: data_formats.py:96
def __init__(self, sqlite_file_name)
Definition: data_sources.py:81
def indices(self, indices)
double split
Definition: MVATrainer.cc:139
def add_child(self, data)