test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 
7 import json
8 
9 # data_source will extend this, and formula trees will use this
10 class node():
11 
12  _data = None
13  _child_nodes = None
14  def __init__(self, data=None):
15  self._data = data
16  self._child_nodes = []
17 
18  def data(self):
19  return self._data
20 
21  def add_child(self, node_data):
22  new_node = node(node_data)
23  self._child_nodes.append(new_node)
24 
25  def children(self):
26  return self._child_nodes
27 
28  def child(self, index):
29  return self.children()[index]
30 
31  def left_child(self):
32  return self.children()[0]
33 
34  def right_child(self):
35  return self.children()[1]
36 
37  def is_leaf(self):
38  return len(self.children()) == 0
39 
40  def __str__(self):
41  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
42 
43 # tree is used to hold query structures
44 # and propositional formula structures for where clauses in queries
45 class tree():
46 
47  _root = None
48 
49  def __init__(self, root_data):
50  self._root = node(root_data)
51 
52  def set_root(self, node):
53  self.root = node
54 
55  def root(self):
56  return self._root
57 
58 # will need methods to add comparison terms to specific nodes
59 # comparison terms - a=b, a>=b, a<=b, etc.
61 
62  # model is the class that we can prepend each column name with
63  def to_sql(self, model_name=None, root=None):
64  if root == None:
65  root = self.root()
66  # do in-order traversal to get SQL
67  if root.is_leaf():
68  if model != None:
69  return model + "." + root.data().strip()
70  else:
71  return root.data().strip()
72  else:
73  child_nodes = [self.to_sql(model, child_node) for child_node in root.children()]
74  return "(%s)" % ((" " + root.data().upper() + " ").join(child_nodes))
75 
76 # will have a query data_source object as its root
77 # will have methods for turning query held at root into sql query string
79 
80  def to_sql(self):
81  # take elements from root node to construct sql query
82  #sql_query = "%s %s FROM %s %s" % (self._)
83  pass
84 
85 class data_source(node):
86 
87  def __init__(self):
88  pass
89 
90  def get_data(self):
91  return []
92 
93  def __repr__(self):
94  return "<data_source>"
95 
96 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
98 
99  # sub_data is the current subtree of the json data
100  # sub_data is used for chaining navigation methods
101  # Note: _data is defined since data_source extends node, but defining it here for convenience
102  _data, _sub_data, _file_name = None, None, None
103  def __init__(self, json_file_name):
104  # read the file, then parse into JSON object
105  self._file_name = json_file_name
106  try:
107  with open(self._file_name, "r") as handle:
108  contents = "".join(handle.readlines())
109  data = json.loads(contents)
110  self._data = data
111  self._sub_data = data
112  except IOError as io:
113  exit("Couldn't open the file '%s'" % self._file_name)
114 
115  def data(self):
116  return json_data_node.make(self._data)
117 
118  def raw(self):
119  return self._data
120 
121  def __str__(self):
122  return self.__repr__()
123 
125  _data, _sub_data, _file_name = None, None, None
126  def __init__(self, sqlite_file_name):
127  self._file_name = sqlite_file_name
128  try:
129  # import sqlite3 and connect to the database file
130  import sqlite3
131  connection = sqlite3.connect(self._file_name)
132  cursor = connection.cursor()
133  if query_object == None:
134  # try to query the file to get table and column data
135  tables = cursor.execute("select name from sqlite_master where type = 'table'")
136 
137  # now build a mapping of tables to columns - with a dictionary
138  table_to_columns = {}
139  for table in tables.fetchall():
140  table_to_columns[table[0]] = []
141  # now query columns for this table
142  columns = cursor.execute("pragma table_info(%s)" % table[0])
143  for column in columns.fetchall():
144  table_to_columns[table[0]].append(str(column[1]))
145 
146  # now query with the mapping
147  table_to_data = {}
148  for table in table_to_columns:
149  # query with all columns
150  column_string = ",".join(table_to_columns[table])
151  sql_query = "select %s from %s" % (column_string, table)
152  results = cursor.execute(sql_query).fetchall()
153  for n in range(0, len(results)):
154  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
155  table_to_data[str(table)] = results
156  self._data = json_data_node.make(table_to_data)
157  else:
158  sql_query = query_object.to_sql()
159 
160  except IOError as io:
161  exit("Couldn't open the file '%s'" % self._file_name)
162 
163  def data(self):
164  return self._data
165 
166 # used for chaining json-navigation methods
167 # when a method is called initially on the data, an object of this class is returned,
168 # then the methods on that object return an object of this class again.
170 
171  _data = None
172  def __init__(self, data=None):
173  self._data = data
174 
175  # use this instead of having to decide on which kind of json node should
176  # be created in code that shouldn't be doing it.
177  @staticmethod
178  def make(data):
179  if type(data) == list:
180  return json_list(data)
181  elif type(data) == dict:
182  return json_dict(data)
183  else:
184  return json_basic(data)
185 
186  def data(self):
187  return self._data
188 
189  def raw(self):
190  return self._data
191 
192  def get(self, *args):
193  current_json_node = self
194  if len(args) == 1:
195  data_to_use = current_json_node.data()[args[0]]
196  return json_data_node.make(data_to_use)
197  for key in args:
198  current_json_node = current_json_node.get(key)
199  return current_json_node
200 
201  def set(self, data):
202  self._data = data
203  return self
204 
205  def find(self, type_name):
206  # traverse json_data_node structure, and find all lists
207  # if this node in the structure is a list, return all sub lists
208  lists = []
209  if type(self._data) == type_name:
210  lists.append(self._data)
211  if type(self._data) == list:
212  for item in self._data:
213  lists += json_data_node.make(item).find(type_name)
214  elif type(self._data) == dict:
215  for key in self._data:
216  lists += json_data_node.make(self._data[key]).find(type_name)
217  return lists
218 
219  def __str__(self):
220  return "<json_data_node data='%s'>" % str(self._data)
221 
223 
224  iterator_index = None
225 
226  def __init__(self, data=None):
227  self._data = data if data != None else []
228  self.iterator_index = 0
229 
230  def first(self):
231  data = self.get(0)
232  return data
233 
234  def last(self):
235  data = self.get(len(self.data())-1)
236  return data
237 
238  def add_child(self, data):
239  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
240  data = data.data()
241  self._data.append(data)
242 
243  # iterator methods
244 
245  def __iter__(self):
246  return self
247 
248  def next(self):
249  if self.iterator_index > len(self._data)-1:
250  raise StopIteration
251  else:
252  self.iterator_index += 1
253  return self._data[self.iterator_index-1]
254 
255  def reset(self):
256  self.iterator_index = 0
257 
258  # misc methods
259 
260  def indices(self, *indices):
261  final_list = []
262  for index in indices:
263  try:
264  index = int(index)
265  final_list.append(self.get(index).data())
266  except Exception:
267  return
268  return json_data_node.make(final_list)
269 
270  # format methods
271 
272  def as_dicts(self):
273 
274  if len(self.data()) == 0:
275  print("\nNo data to convert to dictionaries.\n")
276  return
277 
278  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
279  # copy data
280  new_data = map(lambda item : item.as_dicts(), [item for item in self.data()])
281  return new_data
282  else:
283  print("Data in json_list was not the correct type.")
284 
285 
286  # return ascii version of data
287  # expects array of dicts
288  # columns_to_fit is a list of columns that should be kept at their full size
289  # col_width is the column width to be used as a guide
290  def as_table(self, fit=[], columns=None, hide=None, col_width=None, row_nums=False):
291 
292  if len(self.data()) == 0:
293  print("\nNo data to draw table with.\n")
294  return
295 
296  import models
297  models_dict = models.generate()
298 
299  # if the list contains ORM objects, then convert them all to dictionaries,
300  # otherwise, leave the list as it is - assume it is already a list of dictionaries
301  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
302 
303  from data_formats import _objects_to_dicts
304  data = _objects_to_dicts(self.data()).data()
305 
306  from querying import connection
307  table_name = connection.class_name_to_column(self.get(0).data().__class__).upper()
308  # set headers to those found in ORM models
309  # do it like this so we copy the headers
310  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
311  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
312  else:
313  table_name = None
314  data = self.data()
315  # gets headers stored in first dictionary
316  headers = data[0].keys()
317 
318  if columns != None:
319  headers = columns
320 
321  if row_nums:
322  headers = ["row"] + headers
323 
324  # append an extra column to all rows of data, as well
325  for i, item in enumerate(data):
326  data[i]["row"] = str(i)
327 
328  if fit == ["all"]:
329  fit = headers
330 
331  if col_width == None:
332  import subprocess
333  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
334  col_width = int(table_width/len(headers))
335 
336  if hide != None:
337  for n in range(0, len(hide)):
338  del headers[headers.index(hide[n])]
339 
340  def max_width_of_column(column, data):
341  max_width_found = len(str(data[0][column]))
342  for item in data:
343  current_width = len(str(item[column]))
344  if current_width > max_width_found:
345  max_width_found = current_width
346  if max_width_found > len(column):
347  return max_width_found
348  else:
349  return len(column)
350 
351  def cell(content, header, col_width, fit):
352  if fit:
353  col_width_with_padding = col_width+2
354  col_width_substring = len(str(content))
355  else:
356  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
357  col_width_substring = col_width-5 if col_width-7 > 0 else 1
358  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
359  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
360 
361  column_to_width = {}
362 
363  if fit != headers:
364 
365  # get the column widths of fited columns
366  surplus_width = 0
367  for column in fit:
368 
369  if not(column in headers):
370  print("'%s' is not a valid column." % column)
371  return
372 
373  column_to_width[column] = max_width_of_column(column, data)
374  surplus_width += column_to_width[column]-col_width
375 
376  if len(set(headers)-set(fit)) != 0:
377  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
378  else:
379  non_fited_width_surplus = 0
380 
381  for column in headers:
382  if not(column in fit):
383  column_to_width[column] = col_width - non_fited_width_surplus
384  else:
385  for column in headers:
386  column_to_width[column] = max_width_of_column(column, data)
387 
388  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
389  for header in headers:
390  ascii_string += cell(header.upper(), header, column_to_width[header], header in fit)
391  ascii_string += "\n"
392  horizontal_border = "\n"
393  ascii_string += horizontal_border
394  for item in data:
395  for n in range(0, len(headers)):
396  entry = item[headers[n]]
397  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
398  ascii_string += "\n"
399  #ascii_string += "\n"
400  ascii_string += horizontal_border
401  print ascii_string
402 
404 
405  def __init__(self, data=None):
406  self._data = data if data != None else {}
407 
408  def add_key(self, data, key):
409  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
410  data = data.data()
411  self._data[key] = data
412 
413 # for strings, integers, etc
415 
416  def __init__(self, data=None):
417  self._data = data if data != None else ""
std::string print(const Track &, edm::Verbosity=edm::Concise)
Track print utility.
Definition: print.cc:10
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def _objects_to_dicts
Definition: data_formats.py:96
def generate
Definition: models.py:38
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
double split
Definition: MVATrainer.cc:139