CMS 3D CMS Logo

condformats_serialization_generate.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 '''CMS Conditions DB Serialization generator.
3 
4 Generates the non-intrusive serialization code required for the classes
5 marked with the COND_SERIALIZABLE macro.
6 
7 The code was taken from the prototype that did many other things as well
8 (finding transients, marking serializable classes, etc.). After removing
9 everything but what is required to build the serialization, the code was
10 made more robust and cleaned up a bit to be integrated on the BoostIO IB.
11 However, the code still needs to be restructured a bit more to improve
12 readability (e.g. name some constants, use a template engine, ask for
13 clang's bindings to be installed along clang itself, etc.).
14 '''
15 
16 __author__ = 'Miguel Ojeda'
17 __copyright__ = 'Copyright 2014, CERN'
18 __credits__ = ['Giacomo Govi', 'Miguel Ojeda', 'Andreas Pfeiffer']
19 __license__ = 'Unknown'
20 __maintainer__ = 'Miguel Ojeda'
21 __email__ = 'mojedasa@cern.ch'
22 
23 
24 import argparse
25 import logging
26 import os
27 import re
28 import subprocess
29 
30 import clang.cindex
31 
32 clang_version = None
33 
34 headers_template = '''
35 #include "{headers}"
36 
37 #include <boost/serialization/base_object.hpp>
38 #include <boost/serialization/nvp.hpp>
39 #include <boost/serialization/export.hpp>
40 
41 #include "CondFormats/Serialization/interface/Equal.h"
42 #include "CondFormats/Serialization/interface/Instantiate.h"
43 
44 '''
45 
46 serialize_method_begin_template = '''template <class Archive>
47 void {klass}::serialize(Archive & ar, const unsigned int)
48 {{'''
49 
50 serialize_method_base_object_template = ' ar & boost::serialization::make_nvp("{base_object_name_sanitised}", boost::serialization::base_object<{base_object_name}>(*this));'
51 
52 serialize_method_member_template = ''' ar & boost::serialization::make_nvp("{member_name_sanitised}", {member_name});'''
53 
54 serialize_method_end = '''}
55 '''
56 
57 instantiation_template = '''COND_SERIALIZATION_INSTANTIATE({klass});
58 '''
59 
60 
61 skip_namespaces = frozenset([
62  # Do not go inside anonymous namespaces (static)
63  '',
64 
65  # Do not go inside some standard namespaces
66  'std', 'boost', 'mpl_', 'boost_swap_impl',
67 
68  # Do not go inside some big namespaces coming from externals
69  'ROOT', 'edm', 'ora', 'coral', 'CLHEP', 'Geom', 'HepGeom',
70 ])
71 
73  if node.get_definition() is None:
74  return False
75  if node.location is None or node.get_definition().location is None:
76  return False
77  return node.location == node.get_definition().location
78 
80  for child in node.get_children():
81  if child.spelling != 'serialize' or child.kind != clang.cindex.CursorKind.FUNCTION_TEMPLATE or is_definition_by_loc(child):
82  continue
83 
84  if [(x.spelling, x.kind, is_definition_by_loc(x), x.type.kind) for x in child.get_children()] != [
85  ('Archive', clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER, True, clang.cindex.TypeKind.UNEXPOSED),
86  ('ar', clang.cindex.CursorKind.PARM_DECL, True, clang.cindex.TypeKind.LVALUEREFERENCE),
87  ('version', clang.cindex.CursorKind.PARM_DECL, True, clang.cindex.TypeKind.UINT),
88  ]:
89  continue
90 
91  return True
92 
93  return False
94 
95 
97  for child in node.get_children():
98  if child.spelling == 'cond_serialization_manual' and child.kind == clang.cindex.CursorKind.CXX_METHOD and not is_definition_by_loc(child):
99  return True
100 
101  return False
102 
103 
104 def get_statement(node):
105  # For some cursor kinds, their location is empty (e.g. translation units
106  # and attributes); either because of a bug or because they do not have
107  # a meaningful 'start' -- however, the extent is always available
108  if node.extent.start.file is None:
109  return None
110 
111  filename = node.extent.start.file.name
112  start = node.extent.start.offset
113  end = node.extent.end.offset
114 
115  with open(filename, 'rb') as fd:
116  source = fd.read()
117 
118  return source[start:source.find(';', end)]
119 
120 
122  typekinds = {
123  clang.cindex.TypeKind.BOOL: 'bool',
124  clang.cindex.TypeKind.INT: 'int',
125  clang.cindex.TypeKind.LONG: 'long',
126  clang.cindex.TypeKind.UINT: 'unsigned int',
127  clang.cindex.TypeKind.ULONG: 'unsigned long',
128  clang.cindex.TypeKind.FLOAT: 'float',
129  clang.cindex.TypeKind.DOUBLE: 'double',
130  }
131 
132  if node.type.kind not in typekinds:
133  raise Exception('Not a known basic type.')
134 
135  return typekinds[node.type.kind]
136 
137 
138 def get_type_string(node):
139  spelling = node.type.get_declaration().spelling
140  if spelling is not None:
141  return spelling
142 
143  return get_basic_type_string(node)
144 
145 
146 def get_serializable_classes_members(node, all_template_types=None, namespace='', only_from_path=None):
147  if all_template_types is None:
148  all_template_types = []
149 
150  logging.debug('%s', (node.spelling, all_template_types, namespace))
151  results = {}
152  for child in node.get_children():
153  if child.kind == clang.cindex.CursorKind.NAMESPACE:
154  # If we are in the root namespace, let's skip some common, big
155  # namespaces to improve speed and avoid serializing those.
156  if namespace == '':
157  if child.spelling in skip_namespaces:
158  continue
159 
160  # This skips compiler-specific stuff as well (e.g. __gnucxx...)
161  if child.spelling.startswith('_'):
162  continue
163 
164  logging.debug('Going into namespace %s', child.spelling)
165 
166  results.update(get_serializable_classes_members(child, all_template_types, namespace + child.spelling + '::', only_from_path))
167  continue
168 
169  if child.kind in [clang.cindex.CursorKind.CLASS_DECL, clang.cindex.CursorKind.STRUCT_DECL, clang.cindex.CursorKind.CLASS_TEMPLATE] and is_definition_by_loc(child):
170  logging.debug('Found struct/class/template definition: %s', child.spelling if child.spelling else '<anonymous>')
171 
172  if only_from_path is not None \
173  and child.location.file is not None \
174  and not child.location.file.name.startswith(only_from_path):
175  logging.debug('Skipping since it is an external of this package: %s', child.spelling)
176  continue
177 
178  serializable = is_serializable_class(child)
179  if serializable:
180  if child.spelling == '':
181  raise Exception('It is not possible to serialize anonymous/unnamed structs/classes.')
182 
184  logging.info('Found manual serializable struct/class/template: %s', child.spelling)
185  continue
186 
187  logging.info('Found serializable struct/class/template: %s', child.spelling)
188 
189  template_types = []
190  base_objects = []
191  members = []
192  transients = []
193  after_serialize = False
194  after_serialize_count = 0
195  for member in child.get_children():
196  if after_serialize:
197  if after_serialize_count == 2:
198  after_serialize = False
199  else:
200  after_serialize_count = after_serialize_count + 1
201 
202  if not is_friend_decl(member.kind):
203  raise Exception('Expected unexposed declaration (friend) after serialize() but found something else: looks like the COND_SERIALIZABLE macro has been changed without updating the script.')
204 
205  if 'COND_SERIALIZABLE' not in get_statement(member):
206  raise Exception('Could not find COND_SERIALIZABLE in the statement of the expected unexposed declarations (friends) after serialize(). Please fix the script/macro.')
207 
208  logging.debug('Skipping expected unexposed declaration (friend) after serialize().')
209  continue
210 
211  # Template type parameters (e.g. <typename T>)
212  if member.kind == clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER:
213  logging.info(' Found template type parameter: %s', member.spelling)
214  template_types.append(('typename', member.spelling))
215 
216  # Template non-type parameters (e.g. <int N>)
217  elif member.kind == clang.cindex.CursorKind.TEMPLATE_NON_TYPE_PARAMETER:
218  type_string = get_type_string(member)
219  if not type_string:
220  type_string = get_basic_type_string(member)
221  logging.info(' Found template non-type parameter: %s %s', type_string, member.spelling)
222  template_types.append((type_string, member.spelling))
223 
224  # Base objects
225  elif member.kind == clang.cindex.CursorKind.CXX_BASE_SPECIFIER:
226  # FIXME: .displayname gives sometimes things like "class mybase"
227  base_object = member.displayname
228  prefix = 'class '
229  if base_object.startswith(prefix):
230  base_object = base_object[len(prefix):]
231  logging.info(' Found base object: %s', base_object)
232  base_objects.append(base_object)
233 
234  # Member variables
235  elif member.kind == clang.cindex.CursorKind.FIELD_DECL and is_definition_by_loc(member):
236  # While clang 3.3 does not ignore unrecognized attributes
237  # (see http://llvm.org/viewvc/llvm-project?revision=165082&view=revision )
238  # for some reason they do not appear in the bindings yet
239  # so we just do it ourselves.
240 
241  # FIXME: To simplify and avoid parsing C++ ourselves, our transient
242  # attribute applies to *all* the variables declared in the same statement.
243  if 'COND_TRANSIENT' not in get_statement(member):
244  logging.info(' Found member variable: %s', member.spelling)
245  members.append(member.spelling)
246  else:
247  if serializable:
248  logging.info(' Found transient member variable: %s', member.spelling)
249  transients.append(member.spelling)
250  else:
251  raise Exception('Transient %s found for non-serializable class %s', member.spelling, child.spelling)
252 
253  elif member.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE and member.spelling == 'serialize':
254  after_serialize = True
255  logging.debug('Found serialize() method, skipping next two children which must be unexposed declarations.')
256 
257  elif member.kind in frozenset([
258  # For safety, we list all known kinds that we need to skip
259  # and raise in unknown cases (this helps catching problems
260  # with undefined classes)
261  clang.cindex.CursorKind.CONSTRUCTOR,
262  clang.cindex.CursorKind.DESTRUCTOR,
263  clang.cindex.CursorKind.CXX_METHOD,
264  clang.cindex.CursorKind.CXX_ACCESS_SPEC_DECL,
265  clang.cindex.CursorKind.FUNCTION_TEMPLATE,
266  clang.cindex.CursorKind.TYPEDEF_DECL,
267  clang.cindex.CursorKind.CLASS_DECL,
268  clang.cindex.CursorKind.ENUM_DECL,
269  clang.cindex.CursorKind.VAR_DECL,
270  clang.cindex.CursorKind.STRUCT_DECL,
271  clang.cindex.CursorKind.UNION_DECL,
272  clang.cindex.CursorKind.CONVERSION_FUNCTION,
273  clang.cindex.CursorKind.TYPE_REF,
274  clang.cindex.CursorKind.DECL_REF_EXPR,
275  clang.cindex.CursorKind.CLASS_TEMPLATE,
276  clang.cindex.CursorKind.TYPE_ALIAS_DECL,
277  ]):
278  logging.debug('Skipping member: %s %s %s %s', member.displayname, member.spelling, member.kind, member.type.kind)
279 
280  elif is_friend_decl(member.kind):
281  statement = get_statement(member)
282 
283  # Friends are unexposed but they are not data to serialize
284  if 'friend' in statement:
285  # If we know about them, skip the warning
286  if \
287  'friend class ' in statement or \
288  'friend struct ' in statement or \
289  'friend std::ostream& operator<<(' in statement or \
290  'friend std::istream& operator>>(' in statement:
291  logging.debug('Skipping known friend: %s', statement.splitlines()[0])
292  continue
293 
294  # Otherwise warn
295  logging.warning('Unexposed declaration that looks like a friend declaration -- please check: %s %s %s %s %s', member.displayname, member.spelling, member.kind, member.type.kind, statement)
296  continue
297 
298  raise Exception('Unexposed declaration. This probably means (at the time of writing) that an unknown class was found (may happen, for instance, when the compiler does not find the headers for std::vector, i.e. missing -I option): %s %s %s %s %s' % (member.displayname, member.spelling, member.kind, member.type.kind, statement))
299 
300  else:
301  statement = get_statement(member)
302  raise Exception('Unknown kind. Please fix the script: %s %s %s %s %s' % (member.displayname, member.spelling, member.kind, member.type.kind, statement))
303 
304  if template_types:
305  template_use = '%s<%s>' % (child.spelling, ', '.join([template_type_name for (_, template_type_name) in template_types]))
306  else:
307  template_use = child.spelling
308 
309  new_namespace = namespace + template_use
310 
311  new_all_template_types = all_template_types + [template_types]
312 
313  results[new_namespace] = (child, serializable, new_all_template_types, base_objects, members, transients)
314 
315  results.update(get_serializable_classes_members(child, new_all_template_types, new_namespace + '::', only_from_path))
316 
317  for (klass, (node, serializable, all_template_types, base_objects, members, transients)) in results.items():
318  if serializable and len(members) == 0:
319  logging.info('No non-transient members found for serializable class %s', klass)
320 
321  return results
322 
323 
324 def split_path(path):
325  folders = []
326 
327  while True:
328  path, folder = os.path.split(path)
329 
330  if folder != '':
331  folders.append(folder)
332  else:
333  if path != '':
334  folders.append(path)
335  break
336 
337  folders.reverse()
338 
339  return folders
340 
341 
342 def get_flags(product_name, flags):
343  command = "scram b echo_%s_%s | tail -1 | cut -d '=' -f '2-' | xargs -n1" % (product_name, flags)
344  logging.debug('Running: %s', command)
345  return subprocess.check_output(command, shell=True).splitlines()
346 
348  """Extract clang version and set global clang_version and also return the same value."""
349  global clang_version
350  if clang_version is not None:
351  return clang_version
352  command = "clang --version | grep 'clang version' | sed 's/clang version//'"
353  logging.debug("Running: {0}".format(command))
354  (clang_version_major, clang_version_minor, clang_version_patchlevel) = subprocess.check_output(command, shell=True).splitlines()[0].strip().split('.', 3)
355  clang_version = (int(clang_version_major), int(clang_version_minor), int(clang_version_patchlevel))
356  logging.debug("Detected Clang version: {0}".format(clang_version))
357  return clang_version
358 
359 def is_friend_decl(memkind):
360  """Check if declaration is a friend"""
361  clangv = get_clang_version()
362  if clangv >= (4, 0, 0):
363  return memkind == clang.cindex.CursorKind.FRIEND_DECL
364  else:
365  return memkind == clang.cindex.CursorKind.UNEXPOSED_DECL
366  return false
367 
368 def log_flags(name, flags):
369  logging.debug('%s = [', name)
370  for flag in flags:
371  logging.debug(' %s', flag)
372  logging.debug(']')
373 
374 
375 def get_diagnostics(translation_unit):
376  return map(lambda diag: {
377  'severity' : diag.severity,
378  'location' : diag.location,
379  'spelling' : diag.spelling,
380  'ranges' : diag.ranges,
381  'fixits' : diag.fixits,
382  }, translation_unit.diagnostics)
383 
384 
385 def get_default_gcc_search_paths(gcc = 'g++', language = 'c++'):
386  command = 'echo "" | %s -x%s -v -E - 2>&1' % (gcc, language)
387  logging.debug('Running: %s', command)
388 
389  paths = []
390  in_list = False
391  for line in subprocess.check_output(command, shell=True).splitlines():
392  if in_list:
393  if line == 'End of search list.':
394  break
395 
396  path = os.path.normpath(line.strip())
397 
398  # Intrinsics not handled by clang
399  # Note that /lib/gcc is found in other paths if not normalized,
400  # so has to go after normpath()
401  if '/lib/gcc/' in path:
402  continue
403 
404  paths.append('-I%s' % path)
405 
406  else:
407  if line == '#include <...> search starts here:':
408  in_list = True
409 
410  if not in_list:
411  raise Exception('Default GCC search paths not found.')
412 
413  return paths
414 
415 def sanitise(var):
416  return re.sub('[^a-zA-Z0-9.,-:]', '-', var)
417 
418 
420 
421  def __init__(self, scramFlags=None):
422 
423  self.cmssw_base = os.getenv('CMSSW_BASE')
424  if self.cmssw_base is None:
425  raise Exception('CMSSW_BASE is not set.')
426  logging.debug('cmssw_base = %s', self.cmssw_base)
427 
428  cwd = os.getcwd()
429  logging.debug('cwd = %s', cwd)
430 
431  if not cwd.startswith(self.cmssw_base):
432  raise Exception('The filepath does not start with CMSSW_BASE.')
433 
434  relative_path = cwd[len(self.cmssw_base)+1:]
435  logging.debug('relative_path = %s', relative_path)
436 
437  self.split_path = split_path(relative_path)
438  logging.debug('splitpath = %s', self.split_path)
439 
440  if len(self.split_path) < 3:
441  raise Exception('This script requires to be run inside a CMSSW package (usually within CondFormats), e.g. CondFormats/Alignment. The current path is: %s' % self.split_path)
442 
443  if self.split_path[0] != 'src':
444  raise Exception('The first folder should be src.')
445 
446  if self.split_path[1] != 'CondFormats':
447  raise Exception('The second folder should be CondFormats.')
448 
449  product_name = '%s%s' % (self.split_path[1], self.split_path[2])
450  logging.debug('product_name = %s', product_name)
451 
452  if not scramFlags:
453  cpp_flags = get_flags(product_name, 'CPPFLAGS')
454  cxx_flags = get_flags(product_name, 'CXXFLAGS')
455  else:
456  cpp_flags = self.cleanFlags( scramFlags )
457  cxx_flags = []
458 
459  # We are using libClang, thus we have to follow Clang include paths
460  std_flags = get_default_gcc_search_paths(gcc='clang++')
461  log_flags('cpp_flags', cpp_flags)
462  log_flags('cxx_flags', cxx_flags)
463  log_flags('std_flags', std_flags)
464 
465  flags = ['-xc++'] + cpp_flags + cxx_flags + std_flags
466 
467  headers_h = self._join_package_path('src', 'headers.h')
468  logging.debug('headers_h = %s', headers_h)
469  if not os.path.exists(headers_h):
470  raise Exception('File %s does not exist. Impossible to serialize package.' % headers_h)
471 
472  logging.info('Searching serializable classes in %s/%s ...', self.split_path[1], self.split_path[2])
473 
474  logging.debug('Parsing C++ classes in file %s ...', headers_h)
475  # On macOS we need to costruct library search path
476  if "SCRAM_ARCH" in os.environ and re.match('osx10*',os.environ['SCRAM_ARCH']):
477  cindex=clang.cindex
478  libpath=os.path.dirname(os.path.realpath(clang.cindex.__file__))+"/../../lib"
479  cindex.Config.set_library_path(libpath)
480  index = cindex.Index.create()
481  else :
482  index = clang.cindex.Index.create()
483  translation_unit = index.parse(headers_h, flags)
484  if not translation_unit:
485  raise Exception('Unable to load input.')
486 
487  severity_names = ('Ignored', 'Note', 'Warning', 'Error', 'Fatal')
488  get_severity_name = lambda severity_num: severity_names[severity_num] if severity_num < len(severity_names) else 'Unknown'
489  max_severity_level = 0 # Ignored
490  diagnostics = get_diagnostics(translation_unit)
491  for diagnostic in diagnostics:
492  logf = logging.error
493 
494  # Ignore some known warnings
495  if diagnostic['spelling'].startswith('argument unused during compilation') \
496  or diagnostic['spelling'].startswith('unknown warning option'):
497  logf = logging.debug
498 
499  logf('Diagnostic: [%s] %s', get_severity_name(diagnostic['severity']), diagnostic['spelling'])
500  logf(' at line %s in %s', diagnostic['location'].line, diagnostic['location'].file)
501 
502  max_severity_level = max(max_severity_level, diagnostic['severity'])
503 
504  if max_severity_level >= 3: # Error
505  raise Exception('Please, resolve all errors before proceeding.')
506 
507  self.classes = get_serializable_classes_members(translation_unit.cursor, only_from_path=self._join_package_path())
508 
509  def _join_package_path(self, *path):
510  return os.path.join(self.cmssw_base, self.split_path[0], self.split_path[1], self.split_path[2], *path)
511 
512  def cleanFlags(self, flagsIn):
513  flags = [ flag for flag in flagsIn if not flag.startswith(('-march', '-mtune', '-fdebug-prefix-map', '-ax', '-wd')) ]
514  blackList = ['--', '-fipa-pta', '-xSSE3', '-fno-crossjumping', '-fno-aggressive-loop-optimizations']
515  return [x for x in flags if x not in blackList]
516 
517  def generate(self, outFileName):
518 
519  filename = outFileName
520  if not filename: # in case we're not using scram, this may not be set, use the default then, assuming we're in the package dir ...
521  filename = self._join_package_path('src', 'Serialization.cc')
522 
523  n_serializable_classes = 0
524 
525  source = headers_template.format(headers=os.path.join(self.split_path[1], self.split_path[2], 'src', 'headers.h'))
526 
527  for klass in sorted(self.classes):
528  (node, serializable, all_template_types, base_objects, members, transients) = self.classes[klass]
529 
530  if not serializable:
531  continue
532 
533  n_serializable_classes += 1
534 
535  skip_instantiation = False
536  for template_types in all_template_types:
537  if template_types:
538  skip_instantiation = True
539  source += ('template <%s>' % ', '.join(['%s %s' % template_type for template_type in template_types])) + '\n'
540 
541  source += serialize_method_begin_template.format(klass=klass) + '\n'
542 
543  for base_object_name in base_objects:
544  base_object_name_sanitised = sanitise(base_object_name)
545  source += serialize_method_base_object_template.format(base_object_name=base_object_name, base_object_name_sanitised=base_object_name_sanitised) + '\n'
546 
547  for member_name in members:
548  member_name_sanitised = sanitise(member_name)
549  source += serialize_method_member_template.format(member_name=member_name, member_name_sanitised=member_name_sanitised) + '\n'
550 
551  source += serialize_method_end
552 
553  if skip_instantiation:
554  source += '\n'
555  else:
556  source += instantiation_template.format(klass=klass) + '\n'
557 
558  if n_serializable_classes == 0:
559  raise Exception('No serializable classes found, while this package has a headers.h file.')
560 
561  # check if we have a file for template instantiations and other "special" code:
562  if os.path.exists( './src/SerializationManual.h' ) :
563  source += '#include "%s/%s/src/SerializationManual.h"\n' % (self.split_path[1], self.split_path[2])
564 
565  logging.info('Writing serialization code for %s classes in %s ...', n_serializable_classes, filename)
566  with open(filename, 'wb') as fd:
567  fd.write(source)
568 
569 
570 def main():
571  parser = argparse.ArgumentParser(description='CMS Condition DB Serialization generator.')
572  parser.add_argument('--verbose', '-v', action='count', help='Verbosity level. -v reports debugging information.')
573  parser.add_argument('--output' , '-o', action='store', help='Specifies the path to the output file written. Default: src/Serialization.cc')
574  parser.add_argument('--package', '-p', action='store', help='Specifies the path to the package to be processed. Default: the actual package')
575 
576  opts, args = parser.parse_known_args()
577 
578  logLevel = logging.INFO
579  if opts.verbose < 1 and opts.output and opts.package: # assume we're called by scram and reduce logging - but only if no verbose is requested
580  logLevel = logging.WARNING
581 
582  if opts.verbose >= 1:
583  logLevel = logging.DEBUG
584 
585  logging.basicConfig(
586  format = '[%(asctime)s] %(levelname)s: %(message)s',
587  level = logLevel,
588  )
589 
590  if opts.package: # we got a directory name to process, assume it's from scram and remove the last ('/src') dir from the path
591  pkgDir = opts.package
592  if pkgDir.endswith('/src') :
593  pkgDir, srcDir = os.path.split( opts.package )
594  os.chdir( pkgDir )
595  logging.info("Processing package in %s " % pkgDir)
596 
597  if opts.output:
598  logging.info("Writing serialization code to %s " % opts.output)
599 
600  SerializationCodeGenerator( scramFlags=args[1:] ).generate( opts.output )
601 
602 if __name__ == '__main__':
603  main()
604 
def get_default_gcc_search_paths(gcc='g++', language='c++')
def generate(map_blobs=False, class_name=None)
Definition: models.py:189
static std::string join(char **cmd)
Definition: RemoteFile.cc:18
Definition: main.py:1
def get_serializable_classes_members(node, all_template_types=None, namespace='', only_from_path=None)
double split
Definition: MVATrainer.cc:139