CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 
7 import json
8 
9 # data_source will extend this, and formula trees will use this
10 class node():
11 
12  _data = None
13  _child_nodes = None
14  def __init__(self, data=None):
15  self._data = data
16  self._child_nodes = []
17 
18  def data(self):
19  return self._data
20 
21  def add_child(self, node_data):
22  new_node = node(node_data)
23  self._child_nodes.append(new_node)
24 
25  def children(self):
26  return self._child_nodes
27 
28  def child(self, index):
29  return self.children()[index]
30 
31  def left_child(self):
32  return self.children()[0]
33 
34  def right_child(self):
35  return self.children()[1]
36 
37  def is_leaf(self):
38  return len(self.children()) == 0
39 
40  def __str__(self):
41  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
42 
43 # tree is used to hold query structures
44 # and propositional formula structures for where clauses in queries
45 class tree():
46 
47  _root = None
48 
49  def __init__(self, root_data):
50  self._root = node(root_data)
51 
52  def set_root(self, node):
53  self.root = node
54 
55  def root(self):
56  return self._root
57 
58 # will need methods to add comparison terms to specific nodes
59 # comparison terms - a=b, a>=b, a<=b, etc.
61 
62  # model is the class that we can prepend each column name with
63  def to_sql(self, model_name=None, root=None):
64  if root == None:
65  root = self.root()
66  # do in-order traversal to get SQL
67  if root.is_leaf():
68  if model != None:
69  return model + "." + root.data().strip()
70  else:
71  return root.data().strip()
72  else:
73  child_nodes = [self.to_sql(model, child_node) for child_node in root.children()]
74  return "(%s)" % ((" " + root.data().upper() + " ").join(child_nodes))
75 
76 # will have a query data_source object as its root
77 # will have methods for turning query held at root into sql query string
79 
80  def to_sql(self):
81  # take elements from root node to construct sql query
82  #sql_query = "%s %s FROM %s %s" % (self._)
83  pass
84 
85 class data_source(node):
86 
87  def __init__(self):
88  pass
89 
90  def get_data(self):
91  return []
92 
93  def __repr__(self):
94  return "<data_source>"
95 
96 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
98 
99  # sub_data is the current subtree of the json data
100  # sub_data is used for chaining navigation methods
101  # Note: _data is defined since data_source extends node, but defining it here for convenience
102  _data, _sub_data, _file_name = None, None, None
103  def __init__(self, json_file_name):
104  # read the file, then parse into JSON object
105  self._file_name = json_file_name
106  try:
107  with open(self._file_name, "r") as handle:
108  contents = "".join(handle.readlines())
109  data = json.loads(contents)
110  self._data = data
111  self._sub_data = data
112  except IOError as io:
113  exit("Couldn't open the file '%s'" % self._file_name)
114 
115  def data(self):
116  return json_data_node.make(self._data)
117 
118  def raw(self):
119  return self._data
120 
121  def __str__(self):
122  return self.__repr__()
123 
125  _data, _sub_data, _file_name = None, None, None
126  def __init__(self, sqlite_file_name):
127  self._file_name = sqlite_file_name
128  try:
129  # import sqlite3 and connect to the database file
130  import sqlite3
131  connection = sqlite3.connect(self._file_name)
132  cursor = connection.cursor()
133  if query_object == None:
134  # try to query the file to get table and column data
135  tables = cursor.execute("select name from sqlite_master where type = 'table'")
136 
137  # now build a mapping of tables to columns - with a dictionary
138  table_to_columns = {}
139  for table in tables.fetchall():
140  table_to_columns[table[0]] = []
141  # now query columns for this table
142  columns = cursor.execute("pragma table_info(%s)" % table[0])
143  for column in columns.fetchall():
144  table_to_columns[table[0]].append(str(column[1]))
145 
146  # now query with the mapping
147  table_to_data = {}
148  for table in table_to_columns:
149  # query with all columns
150  column_string = ",".join(table_to_columns[table])
151  sql_query = "select %s from %s" % (column_string, table)
152  results = cursor.execute(sql_query).fetchall()
153  for n in range(0, len(results)):
154  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
155  table_to_data[str(table)] = results
156  self._data = json_data_node.make(table_to_data)
157  else:
158  sql_query = query_object.to_sql()
159 
160  except IOError as io:
161  exit("Couldn't open the file '%s'" % self._file_name)
162 
163  def data(self):
164  return self._data
165 
166 # used for chaining json-navigation methods
167 # when a method is called initially on the data, an object of this class is returned,
168 # then the methods on that object return an object of this class again.
170 
171  _data = None
172  def __init__(self, data=None):
173  self._data = data
174 
175  # use this instead of having to decide on which kind of json node should
176  # be created in code that shouldn't be doing it.
177  @staticmethod
178  def make(data):
179  if type(data) == list:
180  return json_list(data)
181  elif type(data) == dict:
182  return json_dict(data)
183  else:
184  return json_basic(data)
185 
186  def data(self):
187  return self._data
188 
189  def raw(self):
190  return self._data
191 
192  def get(self, *args):
193  current_json_node = self
194  if len(args) == 1:
195  data_to_use = current_json_node.data()[args[0]]
196  return json_data_node.make(data_to_use)
197  for key in args:
198  current_json_node = current_json_node.get(key)
199  return current_json_node
200 
201  def set(self, data):
202  self._data = data
203  return self
204 
205  def find(self, type_name):
206  # traverse json_data_node structure, and find all lists
207  # if this node in the structure is a list, return all sub lists
208  lists = []
209  if type(self._data) == type_name:
210  lists.append(self._data)
211  if type(self._data) == list:
212  for item in self._data:
213  lists += json_data_node.make(item).find(type_name)
214  elif type(self._data) == dict:
215  for key in self._data:
216  lists += json_data_node.make(self._data[key]).find(type_name)
217  return lists
218 
219  def __str__(self):
220  return "<json_data_node data='%s'>" % str(self._data)
221 
223 
224  iterator_index = None
225 
226  def __init__(self, data=None):
227  self._data = data if data != None else []
228  self.iterator_index = 0
229 
230  def first(self):
231  data = self.get(0)
232  return data
233 
234  def last(self):
235  data = self.get(len(self.data())-1)
236  return data
237 
238  def add_child(self, data):
239  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
240  data = data.data()
241  self._data.append(data)
242 
243  # iterator methods
244 
245  def __iter__(self):
246  return self
247 
248  def next(self):
249  if self.iterator_index > len(self._data)-1:
250  raise StopIteration
251  else:
252  self.iterator_index += 1
253  return self._data[self.iterator_index-1]
254 
255  def reset(self):
256  self.iterator_index = 0
257 
258  # misc methods
259 
260  def indices(self, *indices):
261  final_list = []
262  for index in indices:
263  try:
264  index = int(index)
265  final_list.append(self.get(index).data())
266  except Exception:
267  return
268  return json_data_node.make(final_list)
269 
270  # format methods
271 
272  def as_dicts(self):
273 
274  if len(self.data()) == 0:
275  print("\nNo data to convert to dictionaries.\n")
276  return
277 
278  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
279  # copy data
280  new_data = map(lambda item : item.as_dicts(), [item for item in self.data()])
281  return new_data
282  else:
283  print("Data in json_list was not the correct type.")
284 
285 
286  # return ascii version of data
287  # expects array of dicts
288  # columns_to_fit is a list of columns that should be kept at their full size
289  # col_width is the column width to be used as a guide
290  def as_table(self, fit=[], columns=None, hide=None, col_width=None, row_nums=False):
291 
292  if len(self.data()) == 0:
293  print("\nNo data to draw table with.\n")
294  return
295 
296  import models
297  models_dict = models.generate()
298 
299  # if the list contains ORM objects, then convert them all to dictionaries,
300  # otherwise, leave the list as it is - assume it is already a list of dictionaries
301  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
302 
303  from data_formats import _objects_to_dicts
304  data = _objects_to_dicts(self.data()).data()
305 
306  from querying import connection
307  table_name = connection.class_name_to_column(self.get(0).data().__class__).upper()
308  # set headers to those found in ORM models
309  # do it like this so we copy the headers
310  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
311  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
312  else:
313  table_name = None
314  data = self.data()
315  # gets headers stored in first dictionary
316  headers = data[0].keys()
317 
318  if columns != None:
319  headers = columns
320 
321  if row_nums:
322  headers = ["row"] + headers
323 
324  # append an extra column to all rows of data, as well
325  for i, item in enumerate(data):
326  data[i]["row"] = str(i)
327 
328  if fit == ["all"]:
329  fit = headers
330 
331  if col_width == None:
332  import subprocess
333  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
334  col_width = int(table_width/len(headers))
335 
336  if hide != None:
337  for n in range(0, len(hide)):
338  del headers[headers.index(hide[n])]
339 
340  def max_width_of_column(column, data):
341  max_width_found = len(str(data[0][column]))
342  for item in data:
343  current_width = len(str(item[column]))
344  if current_width > max_width_found:
345  max_width_found = current_width
346  if max_width_found > len(column):
347  return max_width_found
348  else:
349  return len(column)
350 
351  def cell(content, header, col_width, fit):
352  if fit:
353  col_width_with_padding = col_width+2
354  col_width_substring = len(str(content))
355  else:
356  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
357  col_width_substring = col_width-5 if col_width-7 > 0 else 1
358  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
359  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
360 
361  column_to_width = {}
362 
363  if fit != headers:
364 
365  # get the column widths of fited columns
366  surplus_width = 0
367  for column in fit:
368 
369  if not(column in headers):
370  print("'%s' is not a valid column." % column)
371  return
372 
373  column_to_width[column] = max_width_of_column(column, data)
374  surplus_width += column_to_width[column]-col_width
375 
376  if len(set(headers)-set(fit)) != 0:
377  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
378  else:
379  non_fited_width_surplus = 0
380 
381  for column in headers:
382  if not(column in fit):
383  column_to_width[column] = col_width - non_fited_width_surplus
384  else:
385  for column in headers:
386  column_to_width[column] = max_width_of_column(column, data)
387 
388  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
389  for header in headers:
390  ascii_string += cell(header.upper(), header, column_to_width[header], header in fit)
391  ascii_string += "\n"
392  horizontal_border = "\n"
393  ascii_string += horizontal_border
394  for item in data:
395  for n in range(0, len(headers)):
396  entry = item[headers[n]]
397  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
398  ascii_string += "\n"
399  #ascii_string += "\n"
400  ascii_string += horizontal_border
401  print ascii_string
402 
404 
405  def __init__(self, data=None):
406  self._data = data if data != None else {}
407 
408  def add_key(self, data, key):
409  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
410  data = data.data()
411  self._data[key] = data
412 
413 # for strings, integers, etc
415 
416  def __init__(self, data=None):
417  self._data = data if data != None else ""
std::string print(const Track &, edm::Verbosity=edm::Concise)
Track print utility.
Definition: print.cc:10
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
def _objects_to_dicts
Definition: data_formats.py:96
def generate
Definition: models.py:38
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139