test
CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
data_sources.py
Go to the documentation of this file.
1 """
2 
3 This file contains the base DataSource class, and all sub classes that implement their own methods for parsing data.
4 
5 """
6 
7 import json
8 
9 # data_source will extend this
10 class node(object):
11 
12  _data = None
13  _child_nodes = None
14  def __init__(self, data=None):
15  self._data = data
16  self._child_nodes = []
17 
18  def data(self):
19  return self._data
20 
21  def add_child(self, node_data):
22  new_node = node(node_data)
23  self._child_nodes.append(new_node)
24 
25  def children(self):
26  return self._child_nodes
27 
28  def child(self, index):
29  return self.children()[index]
30 
31  def left_child(self):
32  return self.children()[0]
33 
34  def right_child(self):
35  return self.children()[1]
36 
37  def is_leaf(self):
38  return len(self.children()) == 0
39 
40  def __str__(self):
41  return "<node data='%s' children=%s>" % (self.data(), str(self.children()))
42 
44 
45  def __init__(self):
46  pass
47 
48  def get_data(self):
49  return []
50 
51  def __repr__(self):
52  return "<data_source>"
53 
54 # a json file data source first reads json from the file given, and then provides methods to navigate it and select fields
56 
57  # sub_data is the current subtree of the json data
58  # sub_data is used for chaining navigation methods
59  # Note: _data is defined since data_source extends node, but defining it here for convenience
60  _data, _sub_data, _file_name = None, None, None
61  def __init__(self, json_file_name):
62  # read the file, then parse into JSON object
63  self._file_name = json_file_name
64  with open(self._file_name, "r") as handle:
65  contents = "".join(handle.readlines())
66  data = json.loads(contents)
67  self._data = data
68  self._sub_data = data
69 
70  def data(self):
71  return json_data_node.make(self._data)
72 
73  def raw(self):
74  return self._data
75 
76  def __str__(self):
77  return self.__repr__()
78 
80  _data, _sub_data, _file_name = None, None, None
81  def __init__(self, sqlite_file_name):
82  self._file_name = sqlite_file_name
83  # import sqlite3 and connect to the database file
84  import sqlite3
85  connection = sqlite3.connect(self._file_name)
86  cursor = connection.cursor()
87  if query_object == None:
88  # try to query the file to get table and column data
89  tables = cursor.execute("select name from sqlite_master where type = 'table'")
90 
91  # now build a mapping of tables to columns - with a dictionary
92  table_to_columns = {}
93  for table in tables.fetchall():
94  table_to_columns[table[0]] = []
95  # now query columns for this table
96  columns = cursor.execute("pragma table_info(%s)" % table[0])
97  for column in columns.fetchall():
98  table_to_columns[table[0]].append(str(column[1]))
99 
100  # now query with the mapping
101  table_to_data = {}
102  for table in table_to_columns:
103  # query with all columns
104  column_string = ",".join(table_to_columns[table])
105  sql_query = "select %s from %s" % (column_string, table)
106  results = cursor.execute(sql_query).fetchall()
107  for n in range(0, len(results)):
108  results[n] = dict(zip(table_to_columns[table], map(str, results[n])))
109  table_to_data[str(table)] = results
110  self._data = json_data_node.make(table_to_data)
111  else:
112  sql_query = query_object.to_sql()
113 
114  def data(self):
115  return self._data
116 
117 # used for chaining json-navigation methods
118 # when a method is called initially on the data, an object of this class is returned,
119 # then the methods on that object return an object of this class again.
120 class json_data_node(object):
121 
122  _data = None
123  def __init__(self, data=None):
124  self._data = data
125 
126  # use this instead of having to decide on which kind of json node should
127  # be created in code that shouldn't be doing it.
128  @staticmethod
129  def make(data):
130  if type(data) == list:
131  return json_list(data)
132  elif type(data) == dict:
133  return json_dict(data)
134  else:
135  return json_basic(data)
136 
137  def data(self):
138  return self._data
139 
140  def raw(self):
141  return self._data
142 
143  def get(self, *args):
144  current_json_node = self
145  if len(args) == 1:
146  data_to_use = current_json_node.data()[args[0]]
147  return json_data_node.make(data_to_use)
148  for key in args:
149  current_json_node = current_json_node.get(key)
150  return current_json_node
151 
152  def set(self, data):
153  self._data = data
154  return self
155 
156  def find(self, type_name):
157  # traverse json_data_node structure, and find all lists
158  # if this node in the structure is a list, return all sub lists
159  lists = []
160  if type(self._data) == type_name:
161  lists.append(self._data)
162  if type(self._data) == list:
163  for item in self._data:
164  lists += json_data_node.make(item).find(type_name)
165  elif type(self._data) == dict:
166  for key in self._data:
167  lists += json_data_node.make(self._data[key]).find(type_name)
168  return lists
169 
170  def __str__(self):
171  return "<json_data_node data='%s'>" % str(self._data)
172 
174 
175  iterator_index = None
176 
177  def __init__(self, data=None):
178  self._data = data if data != None else []
179  self.iterator_index = 0
180 
181  def first(self):
182  data = self.get(0)
183  return data
184 
185  def last(self):
186  data = self.get(len(self.data())-1)
187  return data
188 
189  def add_child(self, data):
190  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
191  data = data.data()
192  self._data.append(data)
193 
194  # iterator methods
195 
196  def __iter__(self):
197  return self
198 
199  def next(self):
200  if self.iterator_index > len(self._data)-1:
201  self.reset()
202  raise StopIteration
203  else:
204  self.iterator_index += 1
205  return self._data[self.iterator_index-1]
206 
207  def reset(self):
208  self.iterator_index = 0
209 
210  # misc methods
211 
212  def indices(self, *indices):
213  final_list = []
214  for index in indices:
215  try:
216  index = int(index)
217  try:
218  final_list.append(self.get(index).data())
219  except Exception:
220  # index didn't exist
221  pass
222  except Exception:
223  return
224  return json_data_node.make(final_list)
225 
226  def get_members(self, member_name):
227  # assume self.data() is a list
228  if not(type(member_name) in [str, unicode]):
229  raise TypeError("Value given for member name must be a string.")
230  type_of_first_item = self.data()[0].__class__
231  for item in self.data():
232  if item.__class__ != type_of_first_item:
233  return None
234  return json_data_node.make(map(lambda item : getattr(item, member_name), self.data()))
235 
236  # format methods
237 
238  def as_dicts(self, convert_timestamps=False):
239 
240  if len(self.data()) == 0:
241  print("\nNo data to convert to dictionaries.\n")
242  return
243 
244  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "Tag", "IOV", "Payload"]:
245  # copy data
246  new_data = map(lambda item : item.as_dicts(convert_timestamps=convert_timestamps), [item for item in self.data()])
247  return new_data
248  else:
249  print("Data in json_list was not the correct type.")
250 
251 
252  # return ascii version of data
253  # expects array of dicts
254  # fit is a list of columns that should be kept at their full size
255  # col_width is the column width to be used as a guide
256  def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False):
257 
258  if len(self.data()) == 0:
259  print("\nNo data to draw table with.\n")
260  return
261 
262  import models
263  models_dict = models.generate()
264 
265  # if the list contains ORM objects, then convert them all to dictionaries,
266  # otherwise, leave the list as it is - assume it is already a list of dictionaries
267  if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]:
268 
269  from data_formats import _objects_to_dicts
270  data = _objects_to_dicts(self.data()).data()
271 
272  from querying import connection
273  table_name = models.class_name_to_column(self.get(0).data().__class__).upper()
274  # set headers to those found in ORM models
275  # do it like this so we copy the headers
276  # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this
277  headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers]
278  else:
279  table_name = None
280  data = self.data()
281  # gets headers stored in first dictionary
282  headers = data[0].keys()
283 
284  if columns != None:
285  headers = columns
286 
287  if row_nums:
288  headers = ["row"] + headers
289 
290  # append an extra column to all rows of data, as well
291  for i, item in enumerate(data):
292  data[i]["row"] = str(i)
293 
294  if fit == ["all"]:
295  fit = headers
296 
297  if col_width == None:
298  import subprocess
299  table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1]))
300  col_width = int(table_width/len(headers))
301 
302  if hide != None:
303  for n in range(0, len(hide)):
304  del headers[headers.index(hide[n])]
305 
306  def max_width_of_column(column, data):
307  max_width_found = len(str(data[0][column]))
308  for item in data:
309  current_width = len(str(item[column]))
310  if current_width > max_width_found:
311  max_width_found = current_width
312  if max_width_found > len(column):
313  return max_width_found
314  else:
315  return len(column)
316 
317  def cell(content, header, col_width, fit):
318  if fit:
319  col_width_with_padding = col_width+2
320  col_width_substring = len(str(content))
321  else:
322  col_width_with_padding = col_width-2 if col_width-2 > 0 else 1
323  col_width_substring = col_width-5 if col_width-7 > 0 else 1
324  return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\
325  + ("..." if not(fit) and col_width_substring < len(str(content)) else ""))
326 
327  column_to_width = {}
328 
329  if fit != headers:
330 
331  # get the column widths of fited columns
332  surplus_width = 0
333  for column in fit:
334 
335  if not(column in headers):
336  print("'%s' is not a valid column." % column)
337  return
338 
339  column_to_width[column] = max_width_of_column(column, data)
340  surplus_width += column_to_width[column]-col_width
341 
342  if len(set(headers)-set(fit)) != 0:
343  non_fited_width_surplus = surplus_width/len(set(headers)-set(fit))
344  else:
345  non_fited_width_surplus = 0
346 
347  for column in headers:
348  if not(column in fit):
349  column_to_width[column] = col_width - non_fited_width_surplus
350  else:
351  for column in headers:
352  column_to_width[column] = max_width_of_column(column, data)
353 
354  ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n"
355  for header in headers:
356  ascii_string += cell(header, header, column_to_width[header], header in fit)
357  ascii_string += "\n"
358  horizontal_border = "\n"
359  ascii_string += horizontal_border
360  for item in data:
361  for n in range(0, len(headers)):
362  entry = item[headers[n]]
363  ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit)
364  ascii_string += "\n"
365  #ascii_string += "\n"
366  ascii_string += horizontal_border
367  ascii_string += "Showing %d rows\n\n" % len(data)
368  print ascii_string
369 
371 
372  def __init__(self, data=None):
373  self._data = data if data != None else {}
374 
375  def add_key(self, data, key):
376  if data.__class__.__name__ in ["json_list", "json_dict", "json_basic"]:
377  data = data.data()
378  self._data[key] = data
379 
380 # for strings, integers, etc
382 
383  def __init__(self, data=None):
384  self._data = data if data != None else ""
std::string print(const Track &, edm::Verbosity=edm::Concise)
Track print utility.
Definition: print.cc:10
boost::dynamic_bitset append(const boost::dynamic_bitset<> &bs1, const boost::dynamic_bitset<> &bs2)
this method takes two bitsets bs1 and bs2 and returns result of bs2 appended to the end of bs1 ...
OutputIterator zip(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp)
def _objects_to_dicts
Definition: data_formats.py:96
def generate
Definition: models.py:187
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
def class_name_to_column
Definition: models.py:57
double split
Definition: MVATrainer.cc:139