CMS 3D CMS Logo

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
condformats_serialization_generate.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 '''CMS Conditions DB Serialization generator.
3 
4 Generates the non-intrusive serialization code required for the classes
5 marked with the COND_SERIALIZABLE macro.
6 
7 The code was taken from the prototype that did many other things as well
8 (finding transients, marking serializable classes, etc.). After removing
9 everything but what is required to build the serialization, the code was
10 made more robust and cleaned up a bit to be integrated on the BoostIO IB.
11 However, the code still needs to be restructured a bit more to improve
12 readability (e.g. name some constants, use a template engine, ask for
13 clang's bindings to be installed along clang itself, etc.).
14 '''
15 
16 __author__ = 'Miguel Ojeda'
17 __copyright__ = 'Copyright 2014, CERN'
18 __credits__ = ['Giacomo Govi', 'Miguel Ojeda', 'Andreas Pfeiffer']
19 __license__ = 'Unknown'
20 __maintainer__ = 'Miguel Ojeda'
21 __email__ = 'mojedasa@cern.ch'
22 
23 
24 import argparse
25 import logging
26 import os
27 import re
28 import subprocess
29 
30 import clang.cindex
31 
32 
33 headers_template = '''
34 #include "{headers}"
35 
36 #include <boost/serialization/base_object.hpp>
37 #include <boost/serialization/nvp.hpp>
38 #include <boost/serialization/export.hpp>
39 
40 #include "CondFormats/Serialization/interface/Equal.h"
41 #include "CondFormats/Serialization/interface/Instantiate.h"
42 
43 '''
44 
45 serialize_method_begin_template = '''template <class Archive>
46 void {klass}::serialize(Archive & ar, const unsigned int)
47 {{'''
48 
49 serialize_method_base_object_template = ' ar & boost::serialization::make_nvp("{base_object_name_sanitised}", boost::serialization::base_object<{base_object_name}>(*this));'
50 
51 serialize_method_member_template = ''' ar & boost::serialization::make_nvp("{member_name_sanitised}", {member_name});'''
52 
53 serialize_method_end = '''}
54 '''
55 
56 instantiation_template = '''COND_SERIALIZATION_INSTANTIATE({klass});
57 '''
58 
59 
60 skip_namespaces = frozenset([
61  # Do not go inside anonymous namespaces (static)
62  '',
63 
64  # Do not go inside some standard namespaces
65  'std', 'boost', 'mpl_', 'boost_swap_impl',
66 
67  # Do not go inside some big namespaces coming from externals
68  'ROOT', 'edm', 'ora', 'coral', 'CLHEP', 'Geom', 'HepGeom',
69 ])
70 
72  if node.get_definition() is None:
73  return False
74  if node.location is None or node.get_definition().location is None:
75  return False
76  return node.location == node.get_definition().location
77 
79  for child in node.get_children():
80  if child.spelling != 'serialize' or child.kind != clang.cindex.CursorKind.FUNCTION_TEMPLATE or is_definition_by_loc(child):
81  continue
82 
83  if [(x.spelling, x.kind, is_definition_by_loc(x), x.type.kind) for x in child.get_children()] != [
84  ('Archive', clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER, True, clang.cindex.TypeKind.UNEXPOSED),
85  ('ar', clang.cindex.CursorKind.PARM_DECL, True, clang.cindex.TypeKind.LVALUEREFERENCE),
86  ('version', clang.cindex.CursorKind.PARM_DECL, True, clang.cindex.TypeKind.UINT),
87  ]:
88  continue
89 
90  return True
91 
92  return False
93 
94 
96  for child in node.get_children():
97  if child.spelling == 'cond_serialization_manual' and child.kind == clang.cindex.CursorKind.CXX_METHOD and not is_definition_by_loc(child):
98  return True
99 
100  return False
101 
102 
103 def get_statement(node):
104  # For some cursor kinds, their location is empty (e.g. translation units
105  # and attributes); either because of a bug or because they do not have
106  # a meaningful 'start' -- however, the extent is always available
107  if node.extent.start.file is None:
108  return None
109 
110  filename = node.extent.start.file.name
111  start = node.extent.start.offset
112  end = node.extent.end.offset
113 
114  with open(filename, 'rb') as fd:
115  source = fd.read()
116 
117  return source[start:source.find(';', end)]
118 
119 
121  typekinds = {
122  clang.cindex.TypeKind.BOOL: 'bool',
123  clang.cindex.TypeKind.INT: 'int',
124  clang.cindex.TypeKind.LONG: 'long',
125  clang.cindex.TypeKind.UINT: 'unsigned int',
126  clang.cindex.TypeKind.ULONG: 'unsigned long',
127  clang.cindex.TypeKind.FLOAT: 'float',
128  clang.cindex.TypeKind.DOUBLE: 'double',
129  }
130 
131  if node.type.kind not in typekinds:
132  raise Exception('Not a known basic type.')
133 
134  return typekinds[node.type.kind]
135 
136 
137 def get_type_string(node):
138  spelling = node.type.get_declaration().spelling
139  if spelling is not None:
140  return spelling
141 
142  return get_basic_type_string(node)
143 
144 
145 def get_serializable_classes_members(node, all_template_types=None, namespace='', only_from_path=None):
146  if all_template_types is None:
147  all_template_types = []
148 
149  logging.debug('%s', (node.spelling, all_template_types, namespace))
150  results = {}
151  for child in node.get_children():
152  if child.kind == clang.cindex.CursorKind.NAMESPACE:
153  # If we are in the root namespace, let's skip some common, big
154  # namespaces to improve speed and avoid serializing those.
155  if namespace == '':
156  if child.spelling in skip_namespaces:
157  continue
158 
159  # This skips compiler-specific stuff as well (e.g. __gnucxx...)
160  if child.spelling.startswith('_'):
161  continue
162 
163  logging.debug('Going into namespace %s', child.spelling)
164 
165  results.update(get_serializable_classes_members(child, all_template_types, namespace + child.spelling + '::', only_from_path))
166  continue
167 
168  if child.kind in [clang.cindex.CursorKind.CLASS_DECL, clang.cindex.CursorKind.STRUCT_DECL, clang.cindex.CursorKind.CLASS_TEMPLATE] and is_definition_by_loc(child):
169  logging.debug('Found struct/class/template definition: %s', child.spelling if child.spelling else '<anonymous>')
170 
171  if only_from_path is not None \
172  and child.location.file is not None \
173  and not child.location.file.name.startswith(only_from_path):
174  logging.debug('Skipping since it is an external of this package: %s', child.spelling)
175  continue
176 
177  serializable = is_serializable_class(child)
178  if serializable:
179  if child.spelling == '':
180  raise Exception('It is not possible to serialize anonymous/unnamed structs/classes.')
181 
183  logging.info('Found manual serializable struct/class/template: %s', child.spelling)
184  continue
185 
186  logging.info('Found serializable struct/class/template: %s', child.spelling)
187 
188  template_types = []
189  base_objects = []
190  members = []
191  transients = []
192  after_serialize = False
193  after_serialize_count = 0
194  for member in child.get_children():
195  if after_serialize:
196  if after_serialize_count == 2:
197  after_serialize = False
198  else:
199  after_serialize_count = after_serialize_count + 1
200 
201  if member.kind != clang.cindex.CursorKind.UNEXPOSED_DECL:
202  raise Exception('Expected unexposed declaration (friend) after serialize() but found something else: looks like the COND_SERIALIZABLE macro has been changed without updating the script.')
203 
204  if 'COND_SERIALIZABLE' not in get_statement(member):
205  raise Exception('Could not find COND_SERIALIZABLE in the statement of the expected unexposed declarations (friends) after serialize(). Please fix the script/macro.')
206 
207  logging.debug('Skipping expected unexposed declaration (friend) after serialize().')
208  continue
209 
210  # Template type parameters (e.g. <typename T>)
211  if member.kind == clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER:
212  logging.info(' Found template type parameter: %s', member.spelling)
213  template_types.append(('typename', member.spelling))
214 
215  # Template non-type parameters (e.g. <int N>)
216  elif member.kind == clang.cindex.CursorKind.TEMPLATE_NON_TYPE_PARAMETER:
217  type_string = get_type_string(member)
218  if not type_string:
219  type_string = get_basic_type_string(member)
220  logging.info(' Found template non-type parameter: %s %s', type_string, member.spelling)
221  template_types.append((type_string, member.spelling))
222 
223  # Base objects
224  elif member.kind == clang.cindex.CursorKind.CXX_BASE_SPECIFIER:
225  # FIXME: .displayname gives sometimes things like "class mybase"
226  base_object = member.displayname
227  prefix = 'class '
228  if base_object.startswith(prefix):
229  base_object = base_object[len(prefix):]
230  logging.info(' Found base object: %s', base_object)
231  base_objects.append(base_object)
232 
233  # Member variables
234  elif member.kind == clang.cindex.CursorKind.FIELD_DECL and is_definition_by_loc(member):
235  # While clang 3.3 does not ignore unrecognized attributes
236  # (see http://llvm.org/viewvc/llvm-project?revision=165082&view=revision )
237  # for some reason they do not appear in the bindings yet
238  # so we just do it ourselves.
239 
240  # FIXME: To simplify and avoid parsing C++ ourselves, our transient
241  # attribute applies to *all* the variables declared in the same statement.
242  if 'COND_TRANSIENT' not in get_statement(member):
243  logging.info(' Found member variable: %s', member.spelling)
244  members.append(member.spelling)
245  else:
246  if serializable:
247  logging.info(' Found transient member variable: %s', member.spelling)
248  transients.append(member.spelling)
249  else:
250  raise Exception('Transient %s found for non-serializable class %s', member.spelling, child.spelling)
251 
252  elif member.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE and member.spelling == 'serialize':
253  after_serialize = True
254  logging.debug('Found serialize() method, skipping next two children which must be unexposed declarations.')
255 
256  elif member.kind in frozenset([
257  # For safety, we list all known kinds that we need to skip
258  # and raise in unknown cases (this helps catching problems
259  # with undefined classes)
260  clang.cindex.CursorKind.CONSTRUCTOR,
261  clang.cindex.CursorKind.DESTRUCTOR,
262  clang.cindex.CursorKind.CXX_METHOD,
263  clang.cindex.CursorKind.CXX_ACCESS_SPEC_DECL,
264  clang.cindex.CursorKind.FUNCTION_TEMPLATE,
265  clang.cindex.CursorKind.TYPEDEF_DECL,
266  clang.cindex.CursorKind.CLASS_DECL,
267  clang.cindex.CursorKind.ENUM_DECL,
268  clang.cindex.CursorKind.VAR_DECL,
269  clang.cindex.CursorKind.STRUCT_DECL,
270  clang.cindex.CursorKind.UNION_DECL,
271  clang.cindex.CursorKind.CONVERSION_FUNCTION,
272  clang.cindex.CursorKind.TYPE_REF,
273  clang.cindex.CursorKind.DECL_REF_EXPR,
274  ]):
275  logging.debug('Skipping member: %s %s %s %s', member.displayname, member.spelling, member.kind, member.type.kind)
276 
277  elif member.kind == clang.cindex.CursorKind.UNEXPOSED_DECL:
278  statement = get_statement(member)
279 
280  # Friends are unexposed but they are not data to serialize
281  if 'friend' in statement:
282  # If we know about them, skip the warning
283  if \
284  'friend class ' in statement or \
285  'friend struct ' in statement or \
286  'friend std::ostream& operator<<(' in statement or \
287  'friend std::istream& operator>>(' in statement:
288  logging.debug('Skipping known friend: %s', statement.splitlines()[0])
289  continue
290 
291  # Otherwise warn
292  logging.warning('Unexposed declaration that looks like a friend declaration -- please check: %s %s %s %s %s', member.displayname, member.spelling, member.kind, member.type.kind, statement)
293  continue
294 
295  raise Exception('Unexposed declaration. This probably means (at the time of writing) that an unknown class was found (may happen, for instance, when the compiler does not find the headers for std::vector, i.e. missing -I option): %s %s %s %s %s' % (member.displayname, member.spelling, member.kind, member.type.kind, statement))
296 
297  else:
298  raise Exception('Unknown kind. Please fix the script: %s %s %s %s %s' % (member.displayname, member.spelling, member.kind, member.type.kind, statement))
299 
300  if template_types:
301  template_use = '%s<%s>' % (child.spelling, ', '.join([template_type_name for (_, template_type_name) in template_types]))
302  else:
303  template_use = child.spelling
304 
305  new_namespace = namespace + template_use
306 
307  new_all_template_types = all_template_types + [template_types]
308 
309  results[new_namespace] = (child, serializable, new_all_template_types, base_objects, members, transients)
310 
311  results.update(get_serializable_classes_members(child, new_all_template_types, new_namespace + '::', only_from_path))
312 
313  for (klass, (node, serializable, all_template_types, base_objects, members, transients)) in results.items():
314  if serializable and len(members) == 0:
315  logging.info('No non-transient members found for serializable class %s', klass)
316 
317  return results
318 
319 
320 def split_path(path):
321  folders = []
322 
323  while True:
324  path, folder = os.path.split(path)
325 
326  if folder != '':
327  folders.append(folder)
328  else:
329  if path != '':
330  folders.append(path)
331  break
332 
333  folders.reverse()
334 
335  return folders
336 
337 
338 def get_flags(product_name, flags):
339  command = "scram b echo_%s_%s | tail -1 | cut -d '=' -f '2-' | xargs -n1" % (product_name, flags)
340  logging.debug('Running: %s', command)
341  return subprocess.check_output(command, shell=True).splitlines()
342 
343 def log_flags(name, flags):
344  logging.debug('%s = [', name)
345  for flag in flags:
346  logging.debug(' %s', flag)
347  logging.debug(']')
348 
349 
350 def get_diagnostics(translation_unit):
351  return map(lambda diag: {
352  'severity' : diag.severity,
353  'location' : diag.location,
354  'spelling' : diag.spelling,
355  'ranges' : diag.ranges,
356  'fixits' : diag.fixits,
357  }, translation_unit.diagnostics)
358 
359 
360 def get_default_gcc_search_paths(gcc = 'g++', language = 'c++'):
361  command = 'echo "" | %s -x%s -v -E - 2>&1' % (gcc, language)
362  logging.debug('Running: %s', command)
363 
364  paths = []
365  in_list = False
366  for line in subprocess.check_output(command, shell=True).splitlines():
367  if in_list:
368  if line == 'End of search list.':
369  break
370 
371  path = os.path.normpath(line.strip())
372 
373  # Intrinsics not handled by clang
374  # Note that /lib/gcc is found in other paths if not normalized,
375  # so has to go after normpath()
376  if '/lib/gcc/' in path:
377  continue
378 
379  paths.append('-I%s' % path)
380 
381  else:
382  if line == '#include <...> search starts here:':
383  in_list = True
384 
385  if not in_list:
386  raise Exception('Default GCC search paths not found.')
387 
388  return paths
389 
390 def sanitise(var):
391  return re.sub('[^a-zA-Z0-9.,-:]', '-', var)
392 
393 
395 
396  def __init__(self, scramFlags=None):
397 
398  self.cmssw_base = os.getenv('CMSSW_BASE')
399  if self.cmssw_base is None:
400  raise Exception('CMSSW_BASE is not set.')
401  logging.debug('cmssw_base = %s', self.cmssw_base)
402 
403  cwd = os.getcwd()
404  logging.debug('cwd = %s', cwd)
405 
406  if not cwd.startswith(self.cmssw_base):
407  raise Exception('The filepath does not start with CMSSW_BASE.')
408 
409  relative_path = cwd[len(self.cmssw_base)+1:]
410  logging.debug('relative_path = %s', relative_path)
411 
412  self.split_path = split_path(relative_path)
413  logging.debug('splitpath = %s', self.split_path)
414 
415  if len(self.split_path) < 3:
416  raise Exception('This script requires to be run inside a CMSSW package (usually within CondFormats), e.g. CondFormats/Alignment. The current path is: %s' % self.split_path)
417 
418  if self.split_path[0] != 'src':
419  raise Exception('The first folder should be src.')
420 
421  if self.split_path[1] != 'CondFormats':
422  raise Exception('The second folder should be CondFormats.')
423 
424  product_name = '%s%s' % (self.split_path[1], self.split_path[2])
425  logging.debug('product_name = %s', product_name)
426 
427  if not scramFlags:
428  cpp_flags = get_flags(product_name, 'CPPFLAGS')
429  cxx_flags = get_flags(product_name, 'CXXFLAGS')
430  else:
431  cpp_flags = self.cleanFlags( scramFlags )
432  cxx_flags = []
433 
434  # We are using libClang, thus we have to follow Clang include paths
435  std_flags = get_default_gcc_search_paths(gcc='clang++')
436  log_flags('cpp_flags', cpp_flags)
437  log_flags('cxx_flags', cxx_flags)
438  log_flags('std_flags', std_flags)
439 
440  flags = ['-xc++'] + cpp_flags + cxx_flags + std_flags
441 
442  headers_h = self._join_package_path('src', 'headers.h')
443  logging.debug('headers_h = %s', headers_h)
444  if not os.path.exists(headers_h):
445  raise Exception('File %s does not exist. Impossible to serialize package.' % headers_h)
446 
447  logging.info('Searching serializable classes in %s/%s ...', self.split_path[1], self.split_path[2])
448 
449  logging.debug('Parsing C++ classes in file %s ...', headers_h)
450  index = clang.cindex.Index.create()
451  translation_unit = index.parse(headers_h, flags)
452  if not translation_unit:
453  raise Exception('Unable to load input.')
454 
455  severity_names = ('Ignored', 'Note', 'Warning', 'Error', 'Fatal')
456  get_severity_name = lambda severity_num: severity_names[severity_num] if severity_num < len(severity_names) else 'Unknown'
457  max_severity_level = 0 # Ignored
458  diagnostics = get_diagnostics(translation_unit)
459  for diagnostic in diagnostics:
460  logf = logging.error
461 
462  # Ignore some known warnings
463  if diagnostic['spelling'].startswith('argument unused during compilation') \
464  or diagnostic['spelling'].startswith('unknown warning option'):
465  logf = logging.debug
466 
467  logf('Diagnostic: [%s] %s', get_severity_name(diagnostic['severity']), diagnostic['spelling'])
468  logf(' at line %s in %s', diagnostic['location'].line, diagnostic['location'].file)
469 
470  max_severity_level = max(max_severity_level, diagnostic['severity'])
471 
472  if max_severity_level >= 3: # Error
473  raise Exception('Please, resolve all errors before proceeding.')
474 
475  self.classes = get_serializable_classes_members(translation_unit.cursor, only_from_path=self._join_package_path())
476 
477  def _join_package_path(self, *path):
478  return os.path.join(self.cmssw_base, self.split_path[0], self.split_path[1], self.split_path[2], *path)
479 
480  def cleanFlags(self, flagsIn):
481  flags = [ flag for flag in flagsIn if not flag.startswith(('-march', '-mtune', '-fdebug-prefix-map')) ]
482  blackList = ['--', '-fipa-pta']
483  return [x for x in flags if x not in blackList]
484 
485  def generate(self, outFileName):
486 
487  filename = outFileName
488  if not filename: # in case we're not using scram, this may not be set, use the default then, assuming we're in the package dir ...
489  filename = self._join_package_path('src', 'Serialization.cc')
490 
491  n_serializable_classes = 0
492 
493  source = headers_template.format(headers=os.path.join(self.split_path[1], self.split_path[2], 'src', 'headers.h'))
494 
495  for klass in sorted(self.classes):
496  (node, serializable, all_template_types, base_objects, members, transients) = self.classes[klass]
497 
498  if not serializable:
499  continue
500 
501  n_serializable_classes += 1
502 
503  skip_instantiation = False
504  for template_types in all_template_types:
505  if template_types:
506  skip_instantiation = True
507  source += ('template <%s>' % ', '.join(['%s %s' % template_type for template_type in template_types])) + '\n'
508 
509  source += serialize_method_begin_template.format(klass=klass) + '\n'
510 
511  for base_object_name in base_objects:
512  base_object_name_sanitised = sanitise(base_object_name)
513  source += serialize_method_base_object_template.format(base_object_name=base_object_name, base_object_name_sanitised=base_object_name_sanitised) + '\n'
514 
515  for member_name in members:
516  member_name_sanitised = sanitise(member_name)
517  source += serialize_method_member_template.format(member_name=member_name, member_name_sanitised=member_name_sanitised) + '\n'
518 
519  source += serialize_method_end
520 
521  if skip_instantiation:
522  source += '\n'
523  else:
524  source += instantiation_template.format(klass=klass) + '\n'
525 
526  if n_serializable_classes == 0:
527  raise Exception('No serializable classes found, while this package has a headers.h file.')
528 
529  # check if we have a file for template instantiations and other "special" code:
530  if os.path.exists( './src/SerializationManual.h' ) :
531  source += '#include "%s/%s/src/SerializationManual.h"\n' % (self.split_path[1], self.split_path[2])
532 
533  logging.info('Writing serialization code for %s classes in %s ...', n_serializable_classes, filename)
534  with open(filename, 'wb') as fd:
535  fd.write(source)
536 
537 
538 def main():
539  parser = argparse.ArgumentParser(description='CMS Condition DB Serialization generator.')
540  parser.add_argument('--verbose', '-v', action='count', help='Verbosity level. -v reports debugging information.')
541  parser.add_argument('--output' , '-o', action='store', help='Specifies the path to the output file written. Default: src/Serialization.cc')
542  parser.add_argument('--package', '-p', action='store', help='Specifies the path to the package to be processed. Default: the actual package')
543 
544  opts, args = parser.parse_known_args()
545 
546  logLevel = logging.INFO
547  if opts.verbose < 1 and opts.output and opts.package: # assume we're called by scram and reduce logging - but only if no verbose is requested
548  logLevel = logging.WARNING
549 
550  if opts.verbose >= 1:
551  logLevel = logging.DEBUG
552 
553  logging.basicConfig(
554  format = '[%(asctime)s] %(levelname)s: %(message)s',
555  level = logLevel,
556  )
557 
558  if opts.package: # we got a directory name to process, assume it's from scram and remove the last ('/src') dir from the path
559  pkgDir = opts.package
560  if pkgDir.endswith('/src') :
561  pkgDir, srcDir = os.path.split( opts.package )
562  os.chdir( pkgDir )
563  logging.info("Processing package in %s " % pkgDir)
564 
565  if opts.output:
566  logging.info("Writing serialization code to %s " % opts.output)
567 
568  SerializationCodeGenerator( scramFlags=args[1:] ).generate( opts.output )
569 
570 if __name__ == '__main__':
571  main()
572 
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
list object
Definition: dbtoconf.py:77
Definition: main.py:1