CMS 3D CMS Logo

data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 from __future__ import print_function
7 
8 import json
9 
10 # data_source will extend this
11 class node(object):
12 
13  _data = None
14  _child_nodes = None
15  def __init__(self, data=None):
16  self._data = data
17  self._child_nodes = []
18 
19  def data(self):
20  return self._data
21 
22  def add_child(self, node_data):
23  new_node = node(node_data)
24  self._child_nodes.append(new_node)
25 
26  def children(self):
27  return self._child_nodes
28 
29  def child(self, index):
30  return self.children()[index]
31 
32  def left_child(self):
33  return self.children()[0]
34 
35  def right_child(self):
36  return self.children()[1]
37 
38  def is_leaf(self):
39  return len(self.children()) == 0
40 
41  def __str__(self):
42  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
43 
45 
46  def __init__(self):
47  pass
48 
49  def get_data(self):
50  return []
51 
52  def __repr__(self):
53  return "<data_source>"
54 
55 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
57 
58  # sub_data is the current subtree of the json data
59  # sub_data is used for chaining navigation methods
60  # Note: _data is defined since data_source extends node, but defining it here for convenience
61  _data, _sub_data, _file_name = None, None, None
62  def __init__(self, json_file_name):
63  # read the file, then parse into JSON object
64  self._file_name = json_file_name
65  with open(self._file_name, "r") as handle:
66  contents = "".join(handle.readlines())
67  data = json.loads(contents)
68  self._data = data
69  self._sub_data = data
70 
71  def data(self):
72  return json_data_node.make(self._data)
73 
74  def raw(self):
75  return self._data
76 
77  def __str__(self):
78  return self.__repr__()
79 
81  _data, _sub_data, _file_name = None, None, None
82  def __init__(self, sqlite_file_name):
83  self._file_name = sqlite_file_name
84  # import sqlite3 and connect to the database file
85  import sqlite3
86  connection = sqlite3.connect(self._file_name)
87  cursor = connection.cursor()
88  if query_object == None:
89  # try to query the file to get table and column data
90  tables = cursor.execute("select name from sqlite_master where type = 'table'")
91 
92  # now build a mapping of tables to columns - with a dictionary
93  table_to_columns = {}
94  for table in tables.fetchall():
95  table_to_columns[table[0]] = []
96  # now query columns for this table
97  columns = cursor.execute("pragma table_info(%s)" % table[0])
98  for column in columns.fetchall():
99  table_to_columns[table[0]].append(str(column[1]))
100 
101  # now query with the mapping
102  table_to_data = {}
103  for table in table_to_columns:
104  # query with all columns
105  column_string = ",".join(table_to_columns[table])
106  sql_query = "select %s from %s" % (column_string, table)
107  results = cursor.execute(sql_query).fetchall()
108  for n in range(0, len(results)):
109  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
110  table_to_data[str(table)] = results
111  self._data = json_data_node.make(table_to_data)
112  else:
113  sql_query = query_object.to_sql()
114 
115  def data(self):
116  return self._data
117 
118 # used for chaining json-navigation methods
119 # when a method is called initially on the data, an object of this class is returned,
120 # then the methods on that object return an object of this class again.
122 
123  _data = None
124  def __init__(self, data=None):
125  self._data = data
126 
127  # use this instead of having to decide on which kind of json node should
128  # be created in code that shouldn't be doing it.
129  @staticmethod
130  def make(data):
131  if isinstance(data, list):
132  return json_list(data)
133  elif isinstance(data, dict):
134  return json_dict(data)
135  else:
136  return json_basic(data)
137 
138  def data(self):
139  return self._data
140 
141  def raw(self):
142  return self._data
143 
144  def get(self, *args):
145  current_json_node = self
146  if len(args) == 1:
147  data_to_use = current_json_node.data()[args[0]]
148  return json_data_node.make(data_to_use)
149  for key in args:
150  current_json_node = current_json_node.get(key)
151  return current_json_node
152 
153  def set(self, data):
154  self._data = data
155  return self
156 
157  def find(self, type_name):
158  # traverse json_data_node structure, and find all lists
159  # if this node in the structure is a list, return all sub lists
160  lists = []
161  if isinstance(self._data, type_name):
162  lists.append(self._data)
163  if isinstance(self._data, list):
164  for item in self._data:
165  lists += json_data_node.make(item).find(type_name)
166  elif isinstance(self._data, dict):
167  for key in self._data:
168  lists += json_data_node.make(self._data[key]).find(type_name)
169  return lists
170 
171  def __str__(self):
172  return "<json_data_node data='%s'>" % str(self._data)
173 
175 
176  iterator_index = None
177 
178  def __init__(self, data=None):
179  self._data = data if data != None else []
180  self.iterator_index = 0
181 
182  def first(self):
183  data = self.get(0)
184  return data
185 
186  def last(self):
187  data = self.get(len(self.data())-1)
188  return data
189 
190  def add_child(self, data):
191  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
192  data = data.data()
193  self._data.append(data)
194 
195  # iterator methods
196 
197  def __iter__(self):
198  return self
199 
200  def next(self):
201  if self.iterator_index > len(self._data)-1:
202  self.reset()
203  raise StopIteration
204  else:
205  self.iterator_index += 1
206  return self._data[self.iterator_index-1]
207 
208  def reset(self):
209  self.iterator_index = 0
210 
211  # misc methods
212 
213  def indices(self, *indices):
214  final_list = []
215  for index in indices:
216  try:
217  index = int(index)
218  try:
219  final_list.append(self.get(index).data())
220  except Exception:
221  # index didn't exist
222  pass
223  except Exception:
224  return
225  return json_data_node.make(final_list)
226 
227  def get_members(self, member_name):
228  # assume self.data() is a list
229  if not(type(member_name) in [str, unicode]):
230  raise TypeError("Value given for member name must be a string.")
231  type_of_first_item = self.data()[0].__class__
232  for item in self.data():
233  if item.__class__ != type_of_first_item:
234  return None
235  return json_data_node.make(map(lambda item : getattr(item, member_name), self.data()))
236 
237  # format methods
238 
239  def as_dicts(self, convert_timestamps=False):
240 
241  if len(self.data()) == 0:
242  print("\nNo data to convert to dictionaries.\n")
243  return
244 
245  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "Tag", "IOV", "Payload"]:
246  # copy data
247  new_data = map(lambda item : item.as_dicts(convert_timestamps=convert_timestamps), [item for item in self.data()])
248  return new_data
249  else:
250  print("Data in json_list was not the correct type.")
251 
252 
253  # return ascii version of data
254  # expects array of dicts
255  # fit is a list of columns that should be kept at their full size
256  # col_width is the column width to be used as a guide
257  def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False):
258 
259  if len(self.data()) == 0:
260  print("\nNo data to draw table with.\n")
261  return
262 
263  import models
264  models_dict = models.generate()
265 
266  # if the list contains ORM objects, then convert them all to dictionaries,
267  # otherwise, leave the list as it is - assume it is already a list of dictionaries
268  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
269 
270  from data_formats import _objects_to_dicts
271  data = _objects_to_dicts(self.data()).data()
272 
273  from querying import connection
274  table_name = models.class_name_to_column(self.get(0).data().__class__).upper()
275  # set headers to those found in ORM models
276  # do it like this so we copy the headers
277  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
278  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
279  else:
280  table_name = None
281  data = self.data()
282  # gets headers stored in first dictionary
283  headers = data[0].keys()
284 
285  if columns != None:
286  headers = columns
287 
288  if row_nums:
289  headers = ["row"] + headers
290 
291  # append an extra column to all rows of data, as well
292  for i, item in enumerate(data):
293  data[i]["row"] = str(i)
294 
295  if fit == ["all"]:
296  fit = headers
297 
298  if col_width == None:
299  import subprocess
300  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
301  col_width = int(table_width/len(headers))
302 
303  if hide != None:
304  for n in range(0, len(hide)):
305  del headers[headers.index(hide[n])]
306 
307  def max_width_of_column(column, data):
308  max_width_found = len(str(data[0][column]))
309  for item in data:
310  current_width = len(str(item[column]))
311  if current_width > max_width_found:
312  max_width_found = current_width
313  if max_width_found > len(column):
314  return max_width_found
315  else:
316  return len(column)
317 
318  def cell(content, header, col_width, fit):
319  if fit:
320  col_width_with_padding = col_width+2
321  col_width_substring = len(str(content))
322  else:
323  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
324  col_width_substring = col_width-5 if col_width-7 > 0 else 1
325  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
326  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
327 
328  column_to_width = {}
329 
330  if fit != headers:
331 
332  # get the column widths of fited columns
333  surplus_width = 0
334  for column in fit:
335 
336  if not(column in headers):
337  print("'%s' is not a valid column." % column)
338  return
339 
340  column_to_width[column] = max_width_of_column(column, data)
341  surplus_width += column_to_width[column]-col_width
342 
343  if len(set(headers)-set(fit)) != 0:
344  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
345  else:
346  non_fited_width_surplus = 0
347 
348  for column in headers:
349  if not(column in fit):
350  column_to_width[column] = col_width - non_fited_width_surplus
351  else:
352  for column in headers:
353  column_to_width[column] = max_width_of_column(column, data)
354 
355  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
356  for header in headers:
357  ascii_string += cell(header, header, column_to_width[header], header in fit)
358  ascii_string += "\n"
359  horizontal_border = "\n"
360  ascii_string += horizontal_border
361  for item in data:
362  for n in range(0, len(headers)):
363  entry = item[headers[n]]
364  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
365  ascii_string += "\n"
366  #ascii_string += "\n"
367  ascii_string += horizontal_border
368  ascii_string += "Showing %d rows\n\n" % len(data)
369  print(ascii_string)
370 
372 
373  def __init__(self, data=None):
374  self._data = data if data != None else {}
375 
376  def add_key(self, data, key):
377  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
378  data = data.data()
379  self._data[key] = data
380 
381 # for strings, integers, etc
383 
384  def __init__(self, data=None):
385  self._data = data if data != None else ""
def add_child(self, node_data)
Definition: data_sources.py:22
def __str__(self)
Definition: data_sources.py:41
def child(self, index)
Definition: data_sources.py:29
def right_child(self)
Definition: data_sources.py:35
def __init__(self, data=None)
def left_child(self)
Definition: data_sources.py:32
def get_members(self, member_name)
def replace(string, replacements)
def add_key(self, data, key)
def children(self)
Definition: data_sources.py:26
def as_dicts(self, convert_timestamps=False)
S & print(S &os, JobReport::InputFile const &f)
Definition: JobReport.cc:65
def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False)
def generate(map_blobs=False, class_name=None)
Definition: models.py:188
def class_name_to_column(cls)
Definition: models.py:58
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def __init__(self, data=None)
def find(self, type_name)
def __init__(self, json_file_name)
Definition: data_sources.py:62
def __init__(self, data=None)
Definition: data_sources.py:15
def __init__(self, data=None)
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def __init__(self, data=None)
def is_leaf(self)
Definition: data_sources.py:38
def _objects_to_dicts(data)
Definition: data_formats.py:96
def __init__(self, sqlite_file_name)
Definition: data_sources.py:82
def indices(self, indices)
#define str(s)
double split
Definition: MVATrainer.cc:139
def add_child(self, data)