From ab3b9b3d4d1207ba436aaf6a10a5bf6f6d0c1a0f Mon Sep 17 00:00:00 2001 From: Olivier Parcollet Date: Sun, 6 Jul 2014 23:08:33 +0200 Subject: [PATCH] Draft libclang based python wrapper desc generator - Given a C++ file, e.g. a class, it calls libclang to parse the C++, and retrieve from its AST the necessary info to write a xxx_desc.py file. - THIS IS WORK IN PROGRESS. There are several corner cases for which we may want (or not) the script to do better. - It is not designed to be used automatically, but to to 90 % of the boring typesetting work... - The preamble still needs manual choices - The properties, methods, functions are automatically declared in the _desc file, in the simplest possible way. - An option --properties, -p : to transform some simple methods or get_x, set_x into python properties, not methods. Cf doc. - requires clang (tested on 3.4). - the script is configured by cmake and installed in INSTALLATION_DIRECTORY/bin, with some other files. It can only be used for applications, after the lib has been installed. It is cmake configured, to include automatically the various include paths configure in the triqs installation, including the triqs install dir in order to simplify invocation. - TODO : improve, and test more in real cases. --- CMakeLists.txt | 10 + cmake/TRIQSConfig.cmake.in | 9 +- pytriqs/wrap_generator/CMakeLists.txt | 8 +- pytriqs/wrap_generator/clang_parser.py | 195 ++++++++++++++++++ pytriqs/wrap_generator/wrap_desc.mako.py | 84 ++++++++ .../wrapper_desc_generator.py.in | 84 ++++++++ 6 files changed, 384 insertions(+), 6 deletions(-) create mode 100644 pytriqs/wrap_generator/clang_parser.py create mode 100644 pytriqs/wrap_generator/wrap_desc.mako.py create mode 100644 pytriqs/wrap_generator/wrapper_desc_generator.py.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b6fd122..8730104b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -372,6 +372,16 @@ include_directories(${CMAKE_BINARY_DIR}/include/) # (Case of an installed lib) or the triqs_config.h just generated in the built dir (when compiling the lib itself). add_definitions ( ${TRIQS_CXX_DEFINITIONS} -DTRIQS_BUILDING_LIBRARY -I ${CMAKE_BINARY_DIR}/Config) +#------------------------ +# Not used in the main code, only in TRIQSConfig and wrapper_desc_generator configuration +#------------------------ +# for people who want to quickly add everything TRIQS has detected... +set(TRIQS_LIBRARY_ALL ${TRIQS_LIBRARY} ${TRIQS_LIBRARY_BOOST} ${TRIQS_LIBRARY_PYTHON} ${TRIQS_LIBRARY_MPI} ${TRIQS_LIBRARY_HDF5} ${TRIQS_LIBRARY_LAPACK} ${TRIQS_LIBRARY_FFTW} ${TRIQS_LIBRARY_GMP} ${TRIQS_LIBRARY_GSL} ) +set(TRIQS_INCLUDE_ALL ${TRIQS_INCLUDE} ${TRIQS_INCLUDE_BOOST} ${TRIQS_INCLUDE_PYTHON} ${TRIQS_INCLUDE_MPI} ${TRIQS_INCLUDE_HDF5} ${TRIQS_INCLUDE_LAPACK} ${TRIQS_INCLUDE_FFTW} ${TRIQS_INCLUDE_GMP} ${TRIQS_INCLUDE_GSL} ) +list (REMOVE_DUPLICATES TRIQS_INCLUDE_ALL) + +set(TRIQS_LIBCLANG_LOCATION "/usr/lib/libclang.dylib" CACHE STRING "Location of the libclang library") + #--------------------------------------------------------------------- # pytriqs modules : MUST be before TRIQS, to have the py_converters #--------------------------------------------------------------------- diff --git a/cmake/TRIQSConfig.cmake.in b/cmake/TRIQSConfig.cmake.in index 8a657ee4..7b386c0c 100644 --- a/cmake/TRIQSConfig.cmake.in +++ b/cmake/TRIQSConfig.cmake.in @@ -34,6 +34,10 @@ set(TRIQS_WITH_PYTHON_SUPPORT @TRIQS_WITH_PYTHON_SUPPORT@) set(TRIQS_HDF5_DIFF_EXECUTABLE @TRIQS_HDF5_DIFF_EXECUTABLE@) set(TRIQS_PYTHON_LIB_DEST @TRIQS_PYTHON_LIB_DEST@) +# for people who want to quickly add everything TRIQS has detected... +set(TRIQS_LIBRARY_ALL ${TRIQS_LIBRARY} @TRIQS_LIBRARY_ALL@) +set(TRIQS_INCLUDE_ALL ${TRIQS_INCLUDE} @TRIQS_INCLUDE_ALL@) + # Documentation set(TRIQS_WITH_DOCUMENTATION @Build_Documentation@) set(TRIQS_SPHINXBUILD_EXECUTABLE @SPHINXBUILD_EXECUTABLE@) @@ -51,11 +55,6 @@ macro(triqs_get_git_hash DIR PREFIX) OUTPUT_STRIP_TRAILING_WHITESPACE) endmacro(triqs_get_git_hash) -# for people who want to quickly add everything TRIQS has detected... -set(TRIQS_LIBRARY_ALL ${TRIQS_LIBRARY} ${TRIQS_LIBRARY_BOOST} ${TRIQS_LIBRARY_PYTHON} ${TRIQS_LIBRARY_MPI} ${TRIQS_LIBRARY_HDF5} ${TRIQS_LIBRARY_LAPACK} ${TRIQS_LIBRARY_FFTW} ${TRIQS_LIBRARY_GMP} ${TRIQS_LIBRARY_GSL} ) -set(TRIQS_INCLUDE_ALL ${TRIQS_INCLUDE} ${TRIQS_INCLUDE_BOOST} ${TRIQS_INCLUDE_PYTHON} ${TRIQS_INCLUDE_MPI} ${TRIQS_INCLUDE_HDF5} ${TRIQS_INCLUDE_LAPACK} ${TRIQS_INCLUDE_FFTW} ${TRIQS_INCLUDE_GMP} ${TRIQS_INCLUDE_GSL} ) -list (REMOVE_DUPLICATES TRIQS_INCLUDE_ALL) - # Python specific stuff set (TRIQS_PYTHON_INTERPRETER @PYTHON_INTERPRETER@) diff --git a/pytriqs/wrap_generator/CMakeLists.txt b/pytriqs/wrap_generator/CMakeLists.txt index 8868b4ac..e9550369 100644 --- a/pytriqs/wrap_generator/CMakeLists.txt +++ b/pytriqs/wrap_generator/CMakeLists.txt @@ -4,5 +4,11 @@ SET(PYTHON_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/wrapper.mako.cpp ) -install (FILES ${PYTHON_SOURCES} DESTINATION ${CMAKE_INSTALL_PREFIX}/share/triqs/wrap_generator) +install(FILES ${PYTHON_SOURCES} DESTINATION ${CMAKE_INSTALL_PREFIX}/share/triqs/wrap_generator) + +# The desc_file generator from libclang ... +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/wrapper_desc_generator.py.in ${CMAKE_CURRENT_BINARY_DIR}/wrapper_desc_generator.py @ONLY) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/wrapper_desc_generator.py DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/clang_parser.py DESTINATION bin) +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/wrap_desc.mako.py DESTINATION share/triqs/wrap_generator/) diff --git a/pytriqs/wrap_generator/clang_parser.py b/pytriqs/wrap_generator/clang_parser.py new file mode 100644 index 00000000..7dfbaa2c --- /dev/null +++ b/pytriqs/wrap_generator/clang_parser.py @@ -0,0 +1,195 @@ +# This module defines the function parse that +# call libclang to parse a C++ file, and retrieve +# from the clang AST the classes, functions, methods, members (including +# template). +# This module is use e..g by the wrapper desc generator. +import sys,re,os +import clang.cindex +import itertools +from mako.template import Template +import textwrap + +def get_annotations(node): + return [c.displayname for c in node.get_children() + if c.kind == clang.cindex.CursorKind.ANNOTATE_ATTR] + +def process_doc (doc) : + if not doc : return "" + for p in ["/\*","\*/","^\s*\*", "///", "//", r"\\brief"] : doc = re.sub(p,"",doc,flags = re.MULTILINE) + return doc.strip() + +file_locations = set(()) + +class member_(object): + def __init__(self, cursor,ns=()): + loc = cursor.location.file.name + if loc : file_locations.add(loc) + self.doc = process_doc(cursor.raw_comment) + self.ns = ns + self.name = cursor.spelling + self.access = cursor.access_specifier + self.ctype = cursor.type.spelling + + def namespace(self) : + return "::".join(self.ns) + +class Function(object): + def __init__(self, cursor, is_constructor = False, ns=() ): #, template_list =()): + loc = cursor.location.file.name + if loc : file_locations.add(loc) + self.doc = process_doc(cursor.raw_comment) + self.brief_doc = self.doc.split('\n')[0].strip() # improve ... + self.ns = ns + self.name = cursor.spelling + self.annotations = get_annotations(cursor) + self.access = cursor.access_specifier + self.params = [] + self.params_decay = [] + self.template_list = [] #template_list + self.is_constructor = is_constructor + self.is_static = cursor.is_static_method() + + def decay(s) : + s = re.sub('const','',s) + s = re.sub('&&','',s) + s = re.sub('&','',s) + return s.strip() + for c in cursor.get_children(): + if c.kind == clang.cindex.CursorKind.TEMPLATE_TYPE_PARAMETER : + self.template_list.append(c.spelling) + elif (c.kind == clang.cindex.CursorKind.PARM_DECL) : + self.params.append ( (c.type.spelling, c.spelling)) + self.params_decay.append ( (decay(c.type.spelling), c.spelling)) + #else : + # print " node in fun ", c.kind + # self.rtype = cursor.result_type.get_canonical().spelling + self.rtype = cursor.result_type.spelling if not is_constructor else None + #print 'params for ', self.name, self.params_decay + + def namespace(self) : + return "::".join(self.ns) + + def signature_cpp(self) : + s = "{name} ({args})" if not self.is_constructor else "{rtype} {name} ({args})" + s = s.format(args = ', '.join( ["%s %s"%t_n for t_n in self.params]), **self.__dict__) + if self.template_list : + s = "template<" + ', '.join(['typename ' + x for x in self.template_list]) + "> " + s + if self.is_static : s = "static " + s + return s.strip() + + @property + def is_template(self) : return len(self.template_list)>0 + + def __str__(self) : + return "%s\n%s\n"%(self.signature_cpp(),self.doc) + +class Class(object): + def __init__(self, cursor,ns): + loc = cursor.location.file.name + if loc : file_locations.add(loc) + self.doc = process_doc(cursor.raw_comment) + self.brief_doc = self.doc.split('\n')[0].strip() # improve ... + self.ns = ns + self.name = cursor.spelling + self.functions = [] + self.constructors = [] + self.methods = [] + self.members = [] + self.proplist = [] + self.annotations = get_annotations(cursor) + self.file = cursor.location.file.name + + # MISSING : constructors template not recognized + for c in cursor.get_children(): + # Only public nodes + if c.access_specifier != clang.cindex.AccessSpecifier.PUBLIC : continue + + if (c.kind == clang.cindex.CursorKind.FIELD_DECL): + m = member_(c) + self.members.append(m) + + elif (c.kind == clang.cindex.CursorKind.CXX_METHOD): + f = Function(c) + self.methods.append(f) + + elif (c.kind == clang.cindex.CursorKind.CONSTRUCTOR): + f = Function(c, is_constructor = True) + self.constructors.append(f) + + elif (c.kind == clang.cindex.CursorKind.FUNCTION_DECL): + f = Function(c) + self.functions.append(f) + + elif (c.kind == clang.cindex.CursorKind.FUNCTION_TEMPLATE): + f = Function(c) + self.methods.append(f) + + def namespace(self) : + return "::".join(self.ns) + + def __str__(self) : + s,s2 = "class {name}:\n {doc}\n\n".format(**self.__dict__),[] + for m in self.members : + s2 += ["%s %s"%(m.ctype,m.name)] + for m in self.methods : + s2 += str(m).split('\n') + for m in self.functions : + s2 += ("friend " + str(m)).split('\n') + s2 = '\n'.join( [ " " + l.strip() + '\n' for l in s2 if l.strip()]) + return s + s2 + +def build_functions_and_classes(cursor, namespaces=[]): + classes,functions = [],[] + for c in cursor.get_children(): + if (c.kind == clang.cindex.CursorKind.FUNCTION_DECL + and c.location.file.name == sys.argv[1]): + functions.append( Function(c,namespaces)) + elif (c.kind in [clang.cindex.CursorKind.CLASS_DECL, clang.cindex.CursorKind.STRUCT_DECL] + and c.location.file.name == sys.argv[1]): + classes.append( Class(c,namespaces)) + elif c.kind == clang.cindex.CursorKind.NAMESPACE: + child_fnt, child_classes = build_functions_and_classes(c, namespaces +[c.spelling]) + functions.extend(child_fnt) + classes.extend(child_classes) + + return functions,classes + +def parse(filename, debug, compiler_options, where_is_libclang): + + compiler_options = [ '-std=c++11', '-stdlib=libc++', '-D__CODE_GENERATOR__'] + compiler_options + + clang.cindex.Config.set_library_file(where_is_libclang) + index = clang.cindex.Index.create() + translation_unit = index.parse(filename, ['-x', 'c++'] + compiler_options) + print "Parsing done. \nExtracting ..." + + # If clang encounters errors, we report and stop + errors = [d for d in translation_unit.diagnostics if d.severity >= 3] + if errors : + s = "Clang reports the following errors in parsing\n" + for err in errors : + loc = err.location + s += '\n'.join(["file %s line %s col %s"%(loc.file, loc.line, loc.column), err.spelling]) + raise RuntimeError, s + "\n... Your code must compile before making the wrapper !" + + # Analyze the AST to extract classes and functions + functions, classes = build_functions_and_classes(translation_unit.cursor) + print "... done" + + global file_locations + if len(file_locations) != 1 : + print file_locations + raise RuntimeError, "Multiple file location not implemented" + file_locations = list(file_locations) + + if debug : + print "functions" + for f in functions : + print f + + print "classes" + for c in classes : + print c + + return functions, classes + diff --git a/pytriqs/wrap_generator/wrap_desc.mako.py b/pytriqs/wrap_generator/wrap_desc.mako.py new file mode 100644 index 00000000..27e85832 --- /dev/null +++ b/pytriqs/wrap_generator/wrap_desc.mako.py @@ -0,0 +1,84 @@ +from wrap_generator import * + +# The module +module = module_(full_name = "${modulename}", + doc = " ") + +# +# Need to add here the necessary include for compilation +#module.add_include("") +#module.add_include("") + +# Some include, using, etc... Cf doc. +module.add_preamble(""" +""") + +<% + def deduce_normalized_python_class_name(s) : + return ''.join([x.capitalize() for x in s.split('_')]) + + def make_signature(m) : + assert not m.template_list, "template functions can not be wrapped to Python" + s = "{rtype} {name} ({args})" if not m.is_constructor else "({args})" + s = s.format(args = ', '.join( ["%s %s"%t_n for t_n in m.params_decay]), **m.__dict__) + return s.strip() + +%> + +%for c in classes : +g = class_( + py_type = "${deduce_normalized_python_class_name(c.name)}", # name of the python class + c_type = "${c.name}", # name of the C++ class + # + # Hereafter several options to be selected by hand. Cf doc + #has_iterator = True, + #boost_serializable= True, + #is_printable= True, + #arithmetic = ("algebra","double") + ) + +%for m in c.members : +g.add_member(c_name = "${m.name}", + c_type = "${m.ctype}", + read_only= False, + doc = """${m.doc} """) + +%endfor +## +%for m in [m for m in c.constructors if not m.is_template]: +g.add_constructor("${make_signature(m)}", + doc = """${m.doc} """) + +%endfor +## +## +%for m in [m for m in c.methods]: +g.add_method("${make_signature(m)}", + %if m.is_static : + is_static = True, + %endif + doc = """${m.doc} """) + +%endfor +## +%for p in [p for p in c.proplist]: +g.add_property(name = "${p.name}", + getter = cfunction("${make_signature(p.getter)}"), + %if p.setter : + setter = cfunction("${make_signature(p.setter)}"), + %endif + doc = """${p.doc} """) + +%endfor +## +module.add_class(g) + +%endfor +## +%for f in functions : +module.add_function ("${make_signature(f)}", doc = "${f.doc}") + +%endfor +## +module.generate_code() + diff --git a/pytriqs/wrap_generator/wrapper_desc_generator.py.in b/pytriqs/wrap_generator/wrapper_desc_generator.py.in new file mode 100644 index 00000000..d6a20972 --- /dev/null +++ b/pytriqs/wrap_generator/wrapper_desc_generator.py.in @@ -0,0 +1,84 @@ +#!@PYTHON_INTERPRETER@ + +from clang_parser import parse +import sys, os +from mako.template import Template + +# --- Parsing the arguments of the script and options +import argparse + +parser = argparse.ArgumentParser(description='C++/Python wrapper desc file generator from C++ header code') + +parser.add_argument('filename', help = "Name of the file") +parser.add_argument('--modulename', help='Name of the Python module', default = '') +parser.add_argument('--libclang_location', help='Location of the libclang', default = '@TRIQS_LIBCLANG_LOCATION@') +parser.add_argument('--compiler_options', nargs ='*', help='Options to pass to clang') +parser.add_argument('--includes', '-I', action='append', help='Includes to pass to clang') +parser.add_argument('--properties', '-p', action='store_true', + help="""Transforms i) every method with no arguments into read-only property + ii) every method get_X into read-only property + iii) every couple of methods get_X, set_X into rw property + """) + +args = parser.parse_args() +args.includes = (args.includes or []) + '@TRIQS_INCLUDE_ALL@'.split(';') + +triqs_install_location = '@CMAKE_INSTALL_PREFIX@' +args.includes.insert(0, triqs_install_location + '/include') + +#------------ + +modulename = args.modulename or os.path.split(args.filename)[1].split('.',1)[0] + +class property_ : + def __init__ (self, **kw) : + self.__dict__.update(kw) + +if __name__ == '__main__' : + + compiler_options = args.compiler_options or [] + + + compiler_options += ['-I%s'%x for x in args.includes] + + + functions, classes = parse(args.filename, debug = False, compiler_options = compiler_options, where_is_libclang = args.libclang_location) + + print "Generating the wrapper ..." + + if args.properties : + print "making properties" + + for cls in classes : + cls.proplist, exclude =[], [] + m_by_names =dict( (m.name,m) for m in cls.methods) + # Find all the couples get_X, set_X + for m in cls.methods : + if m.is_template or m.name.startswith('operator') or m.name in ['begin','end'] : + exclude.append(m) + elif m.name.startswith('get_') : + X = m.name[4:] + set_m = m_by_names.get('set_' + X, None) + if set_m and set_m.rtype == "void" and len(set_m.params_decay) ==1 : + if set_m.params_decay[0][0] == m.rtype : + cls.proplist.append(property_(name= X, doc = m.doc, getter = m, setter = set_m)) + exclude += [m,set_m] + else : + print "Warning :" + print " in get_%s/set_%s" %(X,X) + print " The type taken from set_%s is not the return type of get_%s"%(X,X) + print " I am not transforming to property" + + elif len(m.params) == 0 and not m.is_static : # it is a property not starting with get_, pure getter + cls.proplist.append(property_(name= m.name, doc = m.doc, getter = m, setter = None)) + exclude.append(m) + cls.methods = [m for m in cls.methods if m not in exclude] + + tpl = Template(filename=triqs_install_location + '/share/triqs/wrap_generator/wrap_desc.mako.py') + rendered = tpl.render(classes = classes, functions = functions, modulename=modulename, args = args ) + + with open("{modulename}_desc.py".format(modulename=modulename), "w") as f: + f.write(rendered) + + print "... done" +