Merge branch 'good-dev-tc' of https://github.com/QuantumPackage/qp2 into good-dev-tc

2024-06-24 14:12:24 +02:00 · 2022-11-21 15:33:29 +01:00 · 2022-11-21 15:33:29 +01:00 · a810457eee
commit a810457eee
parent 6ea05c8adc dddd409008
92 changed files with 19790 additions and 2738 deletions
--- a/2
+++ b/2
@ -369,7 +369,7 @@ else
      echo ""
      echo "${QP_ROOT}/build.ninja does not exist,"
      echo "you need to specify the COMPILATION configuration file."
-      echo "See  ./configure --help  for more details."
+      echo "See  ./configure -h  for more details."
      echo ""
 fi

--- a/external/Python/.gitignore
+++ b/external/Python/.gitignore
@ -0,0 +1 @@
+docopt.py
--- a/external/Python/docopt.py
+++ b/external/Python/docopt.py
@ -1,579 +0,0 @@
-"""Pythonic command-line interface parser that will make you smile.
-
- * http://docopt.org
- * Repository and issue-tracker: https://github.com/docopt/docopt
- * Licensed under terms of MIT license (see LICENSE-MIT)
- * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com
-
-"""
-import sys
-import re
-
-
-__all__ = ['docopt']
-__version__ = '0.6.2'
-
-
-class DocoptLanguageError(Exception):
-
-    """Error in construction of usage-message by developer."""
-
-
-class DocoptExit(SystemExit):
-
-    """Exit in case user invoked program with incorrect arguments."""
-
-    usage = ''
-
-    def __init__(self, message=''):
-        SystemExit.__init__(self, (message + '\n' + self.usage).strip())
-
-
-class Pattern(object):
-
-    def __eq__(self, other):
-        return repr(self) == repr(other)
-
-    def __hash__(self):
-        return hash(repr(self))
-
-    def fix(self):
-        self.fix_identities()
-        self.fix_repeating_arguments()
-        return self
-
-    def fix_identities(self, uniq=None):
-        """Make pattern-tree tips point to same object if they are equal."""
-        if not hasattr(self, 'children'):
-            return self
-        uniq = list(set(self.flat())) if uniq is None else uniq
-        for i, c in enumerate(self.children):
-            if not hasattr(c, 'children'):
-                assert c in uniq
-                self.children[i] = uniq[uniq.index(c)]
-            else:
-                c.fix_identities(uniq)
-
-    def fix_repeating_arguments(self):
-        """Fix elements that should accumulate/increment values."""
-        either = [list(c.children) for c in self.either.children]
-        for case in either:
-            for e in [c for c in case if case.count(c) > 1]:
-                if type(e) is Argument or type(e) is Option and e.argcount:
-                    if e.value is None:
-                        e.value = []
-                    elif type(e.value) is not list:
-                        e.value = e.value.split()
-                if type(e) is Command or type(e) is Option and e.argcount == 0:
-                    e.value = 0
-        return self
-
-    @property
-    def either(self):
-        """Transform pattern into an equivalent, with only top-level Either."""
-        # Currently the pattern will not be equivalent, but more "narrow",
-        # although good enough to reason about list arguments.
-        ret = []
-        groups = [[self]]
-        while groups:
-            children = groups.pop(0)
-            types = [type(c) for c in children]
-            if Either in types:
-                either = [c for c in children if type(c) is Either][0]
-                children.pop(children.index(either))
-                for c in either.children:
-                    groups.append([c] + children)
-            elif Required in types:
-                required = [c for c in children if type(c) is Required][0]
-                children.pop(children.index(required))
-                groups.append(list(required.children) + children)
-            elif Optional in types:
-                optional = [c for c in children if type(c) is Optional][0]
-                children.pop(children.index(optional))
-                groups.append(list(optional.children) + children)
-            elif AnyOptions in types:
-                optional = [c for c in children if type(c) is AnyOptions][0]
-                children.pop(children.index(optional))
-                groups.append(list(optional.children) + children)
-            elif OneOrMore in types:
-                oneormore = [c for c in children if type(c) is OneOrMore][0]
-                children.pop(children.index(oneormore))
-                groups.append(list(oneormore.children) * 2 + children)
-            else:
-                ret.append(children)
-        return Either(*[Required(*e) for e in ret])
-
-
-class ChildPattern(Pattern):
-
-    def __init__(self, name, value=None):
-        self.name = name
-        self.value = value
-
-    def __repr__(self):
-        return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value)
-
-    def flat(self, *types):
-        return [self] if not types or type(self) in types else []
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        pos, match = self.single_match(left)
-        if match is None:
-            return False, left, collected
-        left_ = left[:pos] + left[pos + 1:]
-        same_name = [a for a in collected if a.name == self.name]
-        if type(self.value) in (int, list):
-            if type(self.value) is int:
-                increment = 1
-            else:
-                increment = ([match.value] if type(match.value) is str
-                             else match.value)
-            if not same_name:
-                match.value = increment
-                return True, left_, collected + [match]
-            same_name[0].value += increment
-            return True, left_, collected
-        return True, left_, collected + [match]
-
-
-class ParentPattern(Pattern):
-
-    def __init__(self, *children):
-        self.children = list(children)
-
-    def __repr__(self):
-        return '%s(%s)' % (self.__class__.__name__,
-                           ', '.join(repr(a) for a in self.children))
-
-    def flat(self, *types):
-        if type(self) in types:
-            return [self]
-        return sum([c.flat(*types) for c in self.children], [])
-
-
-class Argument(ChildPattern):
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if type(p) is Argument:
-                return n, Argument(self.name, p.value)
-        return None, None
-
-    @classmethod
-    def parse(class_, source):
-        name = re.findall('(<\S*?>)', source)[0]
-        value = re.findall('\[default: (.*)\]', source, flags=re.I)
-        return class_(name, value[0] if value else None)
-
-
-class Command(Argument):
-
-    def __init__(self, name, value=False):
-        self.name = name
-        self.value = value
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if type(p) is Argument:
-                if p.value == self.name:
-                    return n, Command(self.name, True)
-                else:
-                    break
-        return None, None
-
-
-class Option(ChildPattern):
-
-    def __init__(self, short=None, long=None, argcount=0, value=False):
-        assert argcount in (0, 1)
-        self.short, self.long = short, long
-        self.argcount, self.value = argcount, value
-        self.value = None if value is False and argcount else value
-
-    @classmethod
-    def parse(class_, option_description):
-        short, long, argcount, value = None, None, 0, False
-        options, _, description = option_description.strip().partition('  ')
-        options = options.replace(',', ' ').replace('=', ' ')
-        for s in options.split():
-            if s.startswith('--'):
-                long = s
-            elif s.startswith('-'):
-                short = s
-            else:
-                argcount = 1
-        if argcount:
-            matched = re.findall('\[default: (.*)\]', description, flags=re.I)
-            value = matched[0] if matched else None
-        return class_(short, long, argcount, value)
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if self.name == p.name:
-                return n, p
-        return None, None
-
-    @property
-    def name(self):
-        return self.long or self.short
-
-    def __repr__(self):
-        return 'Option(%r, %r, %r, %r)' % (self.short, self.long,
-                                           self.argcount, self.value)
-
-
-class Required(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        l = left
-        c = collected
-        for p in self.children:
-            matched, l, c = p.match(l, c)
-            if not matched:
-                return False, left, collected
-        return True, l, c
-
-
-class Optional(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        for p in self.children:
-            m, left, collected = p.match(left, collected)
-        return True, left, collected
-
-
-class AnyOptions(Optional):
-
-    """Marker/placeholder for [options] shortcut."""
-
-
-class OneOrMore(ParentPattern):
-
-    def match(self, left, collected=None):
-        assert len(self.children) == 1
-        collected = [] if collected is None else collected
-        l = left
-        c = collected
-        l_ = None
-        matched = True
-        times = 0
-        while matched:
-            # could it be that something didn't match but changed l or c?
-            matched, l, c = self.children[0].match(l, c)
-            times += 1 if matched else 0
-            if l_ == l:
-                break
-            l_ = l
-        if times >= 1:
-            return True, l, c
-        return False, left, collected
-
-
-class Either(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        outcomes = []
-        for p in self.children:
-            matched, _, _ = outcome = p.match(left, collected)
-            if matched:
-                outcomes.append(outcome)
-        if outcomes:
-            return min(outcomes, key=lambda outcome: len(outcome[1]))
-        return False, left, collected
-
-
-class TokenStream(list):
-
-    def __init__(self, source, error):
-        self += source.split() if hasattr(source, 'split') else source
-        self.error = error
-
-    def move(self):
-        return self.pop(0) if len(self) else None
-
-    def current(self):
-        return self[0] if len(self) else None
-
-
-def parse_long(tokens, options):
-    """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;"""
-    long, eq, value = tokens.move().partition('=')
-    assert long.startswith('--')
-    value = None if eq == value == '' else value
-    similar = [o for o in options if o.long == long]
-    if tokens.error is DocoptExit and similar == []:  # if no exact match
-        similar = [o for o in options if o.long and o.long.startswith(long)]
-    if len(similar) > 1:  # might be simply specified ambiguously 2+ times?
-        raise tokens.error('%s is not a unique prefix: %s?' %
-                           (long, ', '.join(o.long for o in similar)))
-    elif len(similar) < 1:
-        argcount = 1 if eq == '=' else 0
-        o = Option(None, long, argcount)
-        options.append(o)
-        if tokens.error is DocoptExit:
-            o = Option(None, long, argcount, value if argcount else True)
-    else:
-        o = Option(similar[0].short, similar[0].long,
-                   similar[0].argcount, similar[0].value)
-        if o.argcount == 0:
-            if value is not None:
-                raise tokens.error('%s must not have an argument' % o.long)
-        else:
-            if value is None:
-                if tokens.current() is None:
-                    raise tokens.error('%s requires argument' % o.long)
-                value = tokens.move()
-        if tokens.error is DocoptExit:
-            o.value = value if value is not None else True
-    return [o]
-
-
-def parse_shorts(tokens, options):
-    """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;"""
-    token = tokens.move()
-    assert token.startswith('-') and not token.startswith('--')
-    left = token.lstrip('-')
-    parsed = []
-    while left != '':
-        short, left = '-' + left[0], left[1:]
-        similar = [o for o in options if o.short == short]
-        if len(similar) > 1:
-            raise tokens.error('%s is specified ambiguously %d times' %
-                               (short, len(similar)))
-        elif len(similar) < 1:
-            o = Option(short, None, 0)
-            options.append(o)
-            if tokens.error is DocoptExit:
-                o = Option(short, None, 0, True)
-        else:  # why copying is necessary here?
-            o = Option(short, similar[0].long,
-                       similar[0].argcount, similar[0].value)
-            value = None
-            if o.argcount != 0:
-                if left == '':
-                    if tokens.current() is None:
-                        raise tokens.error('%s requires argument' % short)
-                    value = tokens.move()
-                else:
-                    value = left
-                    left = ''
-            if tokens.error is DocoptExit:
-                o.value = value if value is not None else True
-        parsed.append(o)
-    return parsed
-
-
-def parse_pattern(source, options):
-    tokens = TokenStream(re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source),
-                         DocoptLanguageError)
-    result = parse_expr(tokens, options)
-    if tokens.current() is not None:
-        raise tokens.error('unexpected ending: %r' % ' '.join(tokens))
-    return Required(*result)
-
-
-def parse_expr(tokens, options):
-    """expr ::= seq ( '|' seq )* ;"""
-    seq = parse_seq(tokens, options)
-    if tokens.current() != '|':
-        return seq
-    result = [Required(*seq)] if len(seq) > 1 else seq
-    while tokens.current() == '|':
-        tokens.move()
-        seq = parse_seq(tokens, options)
-        result += [Required(*seq)] if len(seq) > 1 else seq
-    return [Either(*result)] if len(result) > 1 else result
-
-
-def parse_seq(tokens, options):
-    """seq ::= ( atom [ '...' ] )* ;"""
-    result = []
-    while tokens.current() not in [None, ']', ')', '|']:
-        atom = parse_atom(tokens, options)
-        if tokens.current() == '...':
-            atom = [OneOrMore(*atom)]
-            tokens.move()
-        result += atom
-    return result
-
-
-def parse_atom(tokens, options):
-    """atom ::= '(' expr ')' | '[' expr ']' | 'options'
-             | long | shorts | argument | command ;
-    """
-    token = tokens.current()
-    result = []
-    if token in '([':
-        tokens.move()
-        matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token]
-        result = pattern(*parse_expr(tokens, options))
-        if tokens.move() != matching:
-            raise tokens.error("unmatched '%s'" % token)
-        return [result]
-    elif token == 'options':
-        tokens.move()
-        return [AnyOptions()]
-    elif token.startswith('--') and token != '--':
-        return parse_long(tokens, options)
-    elif token.startswith('-') and token not in ('-', '--'):
-        return parse_shorts(tokens, options)
-    elif token.startswith('<') and token.endswith('>') or token.isupper():
-        return [Argument(tokens.move())]
-    else:
-        return [Command(tokens.move())]
-
-
-def parse_argv(tokens, options, options_first=False):
-    """Parse command-line argument vector.
-
-    If options_first:
-        argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
-    else:
-        argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
-
-    """
-    parsed = []
-    while tokens.current() is not None:
-        if tokens.current() == '--':
-            return parsed + [Argument(None, v) for v in tokens]
-        elif tokens.current().startswith('--'):
-            parsed += parse_long(tokens, options)
-        elif tokens.current().startswith('-') and tokens.current() != '-':
-            parsed += parse_shorts(tokens, options)
-        elif options_first:
-            return parsed + [Argument(None, v) for v in tokens]
-        else:
-            parsed.append(Argument(None, tokens.move()))
-    return parsed
-
-
-def parse_defaults(doc):
-    # in python < 2.7 you can't pass flags=re.MULTILINE
-    split = re.split('\n *(<\S+?>|-\S+?)', doc)[1:]
-    split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
-    options = [Option.parse(s) for s in split if s.startswith('-')]
-    #arguments = [Argument.parse(s) for s in split if s.startswith('<')]
-    #return options, arguments
-    return options
-
-
-def printable_usage(doc):
-    # in python < 2.7 you can't pass flags=re.IGNORECASE
-    usage_split = re.split(r'([Uu][Ss][Aa][Gg][Ee]:)', doc)
-    if len(usage_split) < 3:
-        raise DocoptLanguageError('"usage:" (case-insensitive) not found.')
-    if len(usage_split) > 3:
-        raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
-    return re.split(r'\n\s*\n', ''.join(usage_split[1:]))[0].strip()
-
-
-def formal_usage(printable_usage):
-    pu = printable_usage.split()[1:]  # split and drop "usage:"
-    return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )'
-
-
-def extras(help, version, options, doc):
-    if help and any((o.name in ('-h', '--help')) and o.value for o in options):
-        print(doc.strip("\n"))
-        sys.exit()
-    if version and any(o.name == '--version' and o.value for o in options):
-        print(version)
-        sys.exit()
-
-
-class Dict(dict):
-    def __repr__(self):
-        return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items()))
-
-
-def docopt(doc, argv=None, help=True, version=None, options_first=False):
-    """Parse `argv` based on command-line interface described in `doc`.
-
-    `docopt` creates your command-line interface based on its
-    description that you pass as `doc`. Such description can contain
-    --options, <positional-argument>, commands, which could be
-    [optional], (required), (mutually | exclusive) or repeated...
-
-    Parameters
-    ----------
-    doc : str
-        Description of your command-line interface.
-    argv : list of str, optional
-        Argument vector to be parsed. sys.argv[1:] is used if not
-        provided.
-    help : bool (default: True)
-        Set to False to disable automatic help on -h or --help
-        options.
-    version : any object
-        If passed, the object will be printed if --version is in
-        `argv`.
-    options_first : bool (default: False)
-        Set to True to require options preceed positional arguments,
-        i.e. to forbid options and positional arguments intermix.
-
-    Returns
-    -------
-    args : dict
-        A dictionary, where keys are names of command-line elements
-        such as e.g. "--verbose" and "<path>", and values are the
-        parsed values of those elements.
-
-    Example
-    -------
-    >>> from docopt import docopt
-    >>> doc = '''
-    Usage:
-        my_program tcp <host> <port> [--timeout=<seconds>]
-        my_program serial <port> [--baud=<n>] [--timeout=<seconds>]
-        my_program (-h | --help | --version)
-
-    Options:
-        -h, --help  Show this screen and exit.
-        --baud=<n>  Baudrate [default: 9600]
-    '''
-    >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30']
-    >>> docopt(doc, argv)
-    {'--baud': '9600',
-     '--help': False,
-     '--timeout': '30',
-     '--version': False,
-     '<host>': '127.0.0.1',
-     '<port>': '80',
-     'serial': False,
-     'tcp': True}
-
-    See also
-    --------
-    * For video introduction see http://docopt.org
-    * Full documentation is available in README.rst as well as online
-      at https://github.com/docopt/docopt#readme
-
-    """
-    if argv is None:
-        argv = sys.argv[1:]
-    DocoptExit.usage = printable_usage(doc)
-    options = parse_defaults(doc)
-    pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
-    # [default] syntax for argument is disabled
-    #for a in pattern.flat(Argument):
-    #    same_name = [d for d in arguments if d.name == a.name]
-    #    if same_name:
-    #        a.value = same_name[0].value
-    argv = parse_argv(TokenStream(argv, DocoptExit), list(options),
-                      options_first)
-    pattern_options = set(pattern.flat(Option))
-    for ao in pattern.flat(AnyOptions):
-        doc_options = parse_defaults(doc)
-        ao.children = list(set(doc_options) - pattern_options)
-        #if any_options:
-        #    ao.children += [Option(o.short, o.long, o.argcount)
-        #                    for o in argv if type(o) is Option]
-    extras(help, version, argv, doc)
-    matched, left, collected = pattern.fix().match(argv)
-    if matched and left == []:  # better error message if left?
-        return Dict((a.name, a.value) for a in (pattern.flat() + collected))
-    raise DocoptExit()
--- a/include/.gitignore
+++ b/include/.gitignore
@ -5,3 +5,4 @@ zconf.h
 zlib.h
 zmq_utils.h
 f77_zmq_free.h
+f77_zmq.h
--- a/include/f77_zmq.h
+++ b/include/f77_zmq.h
@ -1,617 +0,0 @@
-      integer EADDRINUSE
-      integer EADDRNOTAVAIL
-      integer EAFNOSUPPORT
-      integer ECONNABORTED
-      integer ECONNREFUSED
-      integer ECONNRESET
-      integer EFSM
-      integer EHOSTUNREACH
-      integer EINPROGRESS
-      integer EMSGSIZE
-      integer EMTHREAD
-      integer ENETDOWN
-      integer ENETRESET
-      integer ENETUNREACH
-      integer ENOBUFS
-      integer ENOCOMPATPROTO
-      integer ENOTCONN
-      integer ENOTSOCK
-      integer ENOTSUP
-      integer EPROTONOSUPPORT
-      integer ETERM
-      integer ETIMEDOUT
-      integer ZMQ_AFFINITY
-      integer ZMQ_BACKLOG
-      integer ZMQ_BINDTODEVICE
-      integer ZMQ_BLOCKY
-      integer ZMQ_CHANNEL
-      integer ZMQ_CLIENT
-      integer ZMQ_CONFLATE
-      integer ZMQ_CONNECT_RID
-      integer ZMQ_CONNECT_ROUTING_ID
-      integer ZMQ_CONNECT_TIMEOUT
-      integer ZMQ_CURRENT_EVENT_VERSION
-      integer ZMQ_CURRENT_EVENT_VERSION_DRAFT
-      integer ZMQ_CURVE
-      integer ZMQ_CURVE_PUBLICKEY
-      integer ZMQ_CURVE_SECRETKEY
-      integer ZMQ_CURVE_SERVER
-      integer ZMQ_CURVE_SERVERKEY
-      integer ZMQ_DEALER
-      integer ZMQ_DEFINED_STDINT
-      integer ZMQ_DELAY_ATTACH_ON_CONNECT
-      integer ZMQ_DGRAM
-      integer ZMQ_DISCONNECT_MSG
-      integer ZMQ_DISH
-      integer ZMQ_DONTWAIT
-      integer ZMQ_EVENTS
-      integer ZMQ_EVENT_ACCEPTED
-      integer ZMQ_EVENT_ACCEPT_FAILED
-      integer ZMQ_EVENT_ALL
-      integer ZMQ_EVENT_ALL_V1
-      integer ZMQ_EVENT_ALL_V2
-      integer ZMQ_EVENT_BIND_FAILED
-      integer ZMQ_EVENT_CLOSED
-      integer ZMQ_EVENT_CLOSE_FAILED
-      integer ZMQ_EVENT_CONNECTED
-      integer ZMQ_EVENT_CONNECT_DELAYED
-      integer ZMQ_EVENT_CONNECT_RETRIED
-      integer ZMQ_EVENT_DISCONNECTED
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_AUTH
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL
-      integer ZMQ_EVENT_HANDSHAKE_SUCCEEDED
-      integer ZMQ_EVENT_LISTENING
-      integer ZMQ_EVENT_MONITOR_STOPPED
-      integer ZMQ_EVENT_PIPES_STATS
-      integer ZMQ_FAIL_UNROUTABLE
-      integer ZMQ_FD
-      integer ZMQ_FORWARDER
-      integer ZMQ_GATHER
-      integer ZMQ_GROUP_MAX_LENGTH
-      integer ZMQ_GSSAPI
-      integer ZMQ_GSSAPI_NT_HOSTBASED
-      integer ZMQ_GSSAPI_NT_KRB5_PRINCIPAL
-      integer ZMQ_GSSAPI_NT_USER_NAME
-      integer ZMQ_GSSAPI_PLAINTEXT
-      integer ZMQ_GSSAPI_PRINCIPAL
-      integer ZMQ_GSSAPI_PRINCIPAL_NAMETYPE
-      integer ZMQ_GSSAPI_SERVER
-      integer ZMQ_GSSAPI_SERVICE_PRINCIPAL
-      integer ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE
-      integer ZMQ_HANDSHAKE_IVL
-      integer ZMQ_HAS_CAPABILITIES
-      integer ZMQ_HAUSNUMERO
-      integer ZMQ_HEARTBEAT_IVL
-      integer ZMQ_HEARTBEAT_TIMEOUT
-      integer ZMQ_HEARTBEAT_TTL
-      integer ZMQ_HELLO_MSG
-      integer ZMQ_IDENTITY
-      integer ZMQ_IMMEDIATE
-      integer ZMQ_INVERT_MATCHING
-      integer ZMQ_IN_BATCH_SIZE
-      integer ZMQ_IO_THREADS
-      integer ZMQ_IO_THREADS_DFLT
-      integer ZMQ_IPC_FILTER_GID
-      integer ZMQ_IPC_FILTER_PID
-      integer ZMQ_IPC_FILTER_UID
-      integer ZMQ_IPV4ONLY
-      integer ZMQ_IPV6
-      integer ZMQ_LAST_ENDPOINT
-      integer ZMQ_LINGER
-      integer ZMQ_LOOPBACK_FASTPATH
-      integer ZMQ_MAXMSGSIZE
-      integer ZMQ_MAX_MSGSZ
-      integer ZMQ_MAX_SOCKETS
-      integer ZMQ_MAX_SOCKETS_DFLT
-      integer ZMQ_MECHANISM
-      integer ZMQ_METADATA
-      integer ZMQ_MORE
-      integer ZMQ_MSG_T_SIZE
-      integer ZMQ_MULTICAST_HOPS
-      integer ZMQ_MULTICAST_LOOP
-      integer ZMQ_MULTICAST_MAXTPDU
-      integer ZMQ_NOBLOCK
-      integer ZMQ_NOTIFY_CONNECT
-      integer ZMQ_NOTIFY_DISCONNECT
-      integer ZMQ_NULL
-      integer ZMQ_ONLY_FIRST_SUBSCRIBE
-      integer ZMQ_OUT_BATCH_SIZE
-      integer ZMQ_PAIR
-      integer ZMQ_PEER
-      integer ZMQ_PLAIN
-      integer ZMQ_PLAIN_PASSWORD
-      integer ZMQ_PLAIN_SERVER
-      integer ZMQ_PLAIN_USERNAME
-      integer ZMQ_POLLERR
-      integer ZMQ_POLLIN
-      integer ZMQ_POLLITEMS_DFLT
-      integer ZMQ_POLLOUT
-      integer ZMQ_POLLPRI
-      integer ZMQ_PRIORITY
-      integer ZMQ_PROBE_ROUTER
-      integer ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID
-      integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION
-      integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA
-      integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE
-      integer ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY
-      integer ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED
-      integer ZMQ_PTR
-      integer ZMQ_PUB
-      integer ZMQ_PULL
-      integer ZMQ_PUSH
-      integer ZMQ_QUEUE
-      integer ZMQ_RADIO
-      integer ZMQ_RATE
-      integer ZMQ_RCVBUF
-      integer ZMQ_RCVHWM
-      integer ZMQ_RCVMORE
-      integer ZMQ_RCVTIMEO
-      integer ZMQ_RECONNECT_IVL
-      integer ZMQ_RECONNECT_IVL_MAX
-      integer ZMQ_RECONNECT_STOP
-      integer ZMQ_RECONNECT_STOP_AFTER_DISCONNECT
-      integer ZMQ_RECONNECT_STOP_CONN_REFUSED
-      integer ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED
-      integer ZMQ_RECOVERY_IVL
-      integer ZMQ_REP
-      integer ZMQ_REQ
-      integer ZMQ_REQ_CORRELATE
-      integer ZMQ_REQ_RELAXED
-      integer ZMQ_ROUTER
-      integer ZMQ_ROUTER_BEHAVIOR
-      integer ZMQ_ROUTER_HANDOVER
-      integer ZMQ_ROUTER_MANDATORY
-      integer ZMQ_ROUTER_NOTIFY
-      integer ZMQ_ROUTER_RAW
-      integer ZMQ_ROUTING_ID
-      integer ZMQ_SCATTER
-      integer ZMQ_SERVER
-      integer ZMQ_SHARED
-      integer ZMQ_SNDBUF
-      integer ZMQ_SNDHWM
-      integer ZMQ_SNDMORE
-      integer ZMQ_SNDTIMEO
-      integer ZMQ_SOCKET_LIMIT
-      integer ZMQ_SOCKS_PASSWORD
-      integer ZMQ_SOCKS_PROXY
-      integer ZMQ_SOCKS_USERNAME
-      integer ZMQ_SRCFD
-      integer ZMQ_STREAM
-      integer ZMQ_STREAMER
-      integer ZMQ_STREAM_NOTIFY
-      integer ZMQ_SUB
-      integer ZMQ_SUBSCRIBE
-      integer ZMQ_TCP_ACCEPT_FILTER
-      integer ZMQ_TCP_KEEPALIVE
-      integer ZMQ_TCP_KEEPALIVE_CNT
-      integer ZMQ_TCP_KEEPALIVE_IDLE
-      integer ZMQ_TCP_KEEPALIVE_INTVL
-      integer ZMQ_TCP_MAXRT
-      integer ZMQ_THREAD_AFFINITY_CPU_ADD
-      integer ZMQ_THREAD_AFFINITY_CPU_REMOVE
-      integer ZMQ_THREAD_NAME_PREFIX
-      integer ZMQ_THREAD_PRIORITY
-      integer ZMQ_THREAD_PRIORITY_DFLT
-      integer ZMQ_THREAD_SAFE
-      integer ZMQ_THREAD_SCHED_POLICY
-      integer ZMQ_THREAD_SCHED_POLICY_DFLT
-      integer ZMQ_TOS
-      integer ZMQ_TYPE
-      integer ZMQ_UNSUBSCRIBE
-      integer ZMQ_USE_FD
-      integer ZMQ_VERSION
-      integer ZMQ_VERSION_MAJOR
-      integer ZMQ_VERSION_MINOR
-      integer ZMQ_VERSION_PATCH
-      integer ZMQ_VMCI_BUFFER_MAX_SIZE
-      integer ZMQ_VMCI_BUFFER_MIN_SIZE
-      integer ZMQ_VMCI_BUFFER_SIZE
-      integer ZMQ_VMCI_CONNECT_TIMEOUT
-      integer ZMQ_WSS_CERT_PEM
-      integer ZMQ_WSS_HOSTNAME
-      integer ZMQ_WSS_KEY_PEM
-      integer ZMQ_WSS_TRUST_PEM
-      integer ZMQ_WSS_TRUST_SYSTEM
-      integer ZMQ_XPUB
-      integer ZMQ_XPUB_MANUAL
-      integer ZMQ_XPUB_MANUAL_LAST_VALUE
-      integer ZMQ_XPUB_NODROP
-      integer ZMQ_XPUB_VERBOSE
-      integer ZMQ_XPUB_VERBOSER
-      integer ZMQ_XPUB_WELCOME_MSG
-      integer ZMQ_XREP
-      integer ZMQ_XREQ
-      integer ZMQ_XSUB
-      integer ZMQ_ZAP_DOMAIN
-      integer ZMQ_ZAP_ENFORCE_DOMAIN
-      integer ZMQ_ZERO_COPY_RECV
-      parameter(EADDRINUSE=156384717)
-      parameter(EADDRNOTAVAIL=156384718)
-      parameter(EAFNOSUPPORT=156384723)
-      parameter(ECONNABORTED=156384725)
-      parameter(ECONNREFUSED=156384719)
-      parameter(ECONNRESET=156384726)
-      parameter(EFSM=156384763)
-      parameter(EHOSTUNREACH=156384729)
-      parameter(EINPROGRESS=156384720)
-      parameter(EMSGSIZE=156384722)
-      parameter(EMTHREAD=156384766)
-      parameter(ENETDOWN=156384716)
-      parameter(ENETRESET=156384730)
-      parameter(ENETUNREACH=156384724)
-      parameter(ENOBUFS=156384715)
-      parameter(ENOCOMPATPROTO=156384764)
-      parameter(ENOTCONN=156384727)
-      parameter(ENOTSOCK=156384721)
-      parameter(ENOTSUP=156384713)
-      parameter(EPROTONOSUPPORT=156384714)
-      parameter(ETERM=156384765)
-      parameter(ETIMEDOUT=156384728)
-      parameter(ZMQ_AFFINITY=4)
-      parameter(ZMQ_BACKLOG=19)
-      parameter(ZMQ_BINDTODEVICE=92)
-      parameter(ZMQ_BLOCKY=70)
-      parameter(ZMQ_CHANNEL=20)
-      parameter(ZMQ_CLIENT=13)
-      parameter(ZMQ_CONFLATE=54)
-      parameter(ZMQ_CONNECT_RID=61)
-      parameter(ZMQ_CONNECT_ROUTING_ID=61)
-      parameter(ZMQ_CONNECT_TIMEOUT=79)
-      parameter(ZMQ_CURRENT_EVENT_VERSION=1)
-      parameter(ZMQ_CURRENT_EVENT_VERSION_DRAFT=2)
-      parameter(ZMQ_CURVE=2)
-      parameter(ZMQ_CURVE_PUBLICKEY=48)
-      parameter(ZMQ_CURVE_SECRETKEY=49)
-      parameter(ZMQ_CURVE_SERVER=47)
-      parameter(ZMQ_CURVE_SERVERKEY=50)
-      parameter(ZMQ_DEALER=5)
-      parameter(ZMQ_DEFINED_STDINT=1)
-      parameter(ZMQ_DELAY_ATTACH_ON_CONNECT=39)
-      parameter(ZMQ_DGRAM=18)
-      parameter(ZMQ_DISCONNECT_MSG=111)
-      parameter(ZMQ_DISH=15)
-      parameter(ZMQ_DONTWAIT=1)
-      parameter(ZMQ_EVENTS=15)
-      parameter(ZMQ_EVENT_ACCEPTED=32)
-      parameter(ZMQ_EVENT_ACCEPT_FAILED=64)
-      parameter(ZMQ_EVENT_ALL=65535)
-      parameter(ZMQ_EVENT_ALL_V1=65535)
-      parameter(ZMQ_EVENT_ALL_V2=131071)
-      parameter(ZMQ_EVENT_BIND_FAILED=16)
-      parameter(ZMQ_EVENT_CLOSED=128)
-      parameter(ZMQ_EVENT_CLOSE_FAILED=256)
-      parameter(ZMQ_EVENT_CONNECTED=1)
-      parameter(ZMQ_EVENT_CONNECT_DELAYED=2)
-      parameter(ZMQ_EVENT_CONNECT_RETRIED=4)
-      parameter(ZMQ_EVENT_DISCONNECTED=512)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_AUTH=16384)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL=2048)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL=8192)
-      parameter(ZMQ_EVENT_HANDSHAKE_SUCCEEDED=4096)
-      parameter(ZMQ_EVENT_LISTENING=8)
-      parameter(ZMQ_EVENT_MONITOR_STOPPED=1024)
-      parameter(ZMQ_EVENT_PIPES_STATS=65536)
-      parameter(ZMQ_FAIL_UNROUTABLE=33)
-      parameter(ZMQ_FD=14)
-      parameter(ZMQ_FORWARDER=2)
-      parameter(ZMQ_GATHER=16)
-      parameter(ZMQ_GROUP_MAX_LENGTH=255)
-      parameter(ZMQ_GSSAPI=3)
-      parameter(ZMQ_GSSAPI_NT_HOSTBASED=0)
-      parameter(ZMQ_GSSAPI_NT_KRB5_PRINCIPAL=2)
-      parameter(ZMQ_GSSAPI_NT_USER_NAME=1)
-      parameter(ZMQ_GSSAPI_PLAINTEXT=65)
-      parameter(ZMQ_GSSAPI_PRINCIPAL=63)
-      parameter(ZMQ_GSSAPI_PRINCIPAL_NAMETYPE=90)
-      parameter(ZMQ_GSSAPI_SERVER=62)
-      parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL=64)
-      parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE=91)
-      parameter(ZMQ_HANDSHAKE_IVL=66)
-      parameter(ZMQ_HAS_CAPABILITIES=1)
-      parameter(ZMQ_HAUSNUMERO=156384712)
-      parameter(ZMQ_HEARTBEAT_IVL=75)
-      parameter(ZMQ_HEARTBEAT_TIMEOUT=77)
-      parameter(ZMQ_HEARTBEAT_TTL=76)
-      parameter(ZMQ_HELLO_MSG=110)
-      parameter(ZMQ_IDENTITY=5)
-      parameter(ZMQ_IMMEDIATE=39)
-      parameter(ZMQ_INVERT_MATCHING=74)
-      parameter(ZMQ_IN_BATCH_SIZE=101)
-      parameter(ZMQ_IO_THREADS=1)
-      parameter(ZMQ_IO_THREADS_DFLT=1)
-      parameter(ZMQ_IPC_FILTER_GID=60)
-      parameter(ZMQ_IPC_FILTER_PID=58)
-      parameter(ZMQ_IPC_FILTER_UID=59)
-      parameter(ZMQ_IPV4ONLY=31)
-      parameter(ZMQ_IPV6=42)
-      parameter(ZMQ_LAST_ENDPOINT=32)
-      parameter(ZMQ_LINGER=17)
-      parameter(ZMQ_LOOPBACK_FASTPATH=94)
-      parameter(ZMQ_MAXMSGSIZE=22)
-      parameter(ZMQ_MAX_MSGSZ=5)
-      parameter(ZMQ_MAX_SOCKETS=2)
-      parameter(ZMQ_MAX_SOCKETS_DFLT=1023)
-      parameter(ZMQ_MECHANISM=43)
-      parameter(ZMQ_METADATA=95)
-      parameter(ZMQ_MORE=1)
-      parameter(ZMQ_MSG_T_SIZE=6)
-      parameter(ZMQ_MULTICAST_HOPS=25)
-      parameter(ZMQ_MULTICAST_LOOP=96)
-      parameter(ZMQ_MULTICAST_MAXTPDU=84)
-      parameter(ZMQ_NOBLOCK=1)
-      parameter(ZMQ_NOTIFY_CONNECT=1)
-      parameter(ZMQ_NOTIFY_DISCONNECT=2)
-      parameter(ZMQ_NULL=0)
-      parameter(ZMQ_ONLY_FIRST_SUBSCRIBE=108)
-      parameter(ZMQ_OUT_BATCH_SIZE=102)
-      parameter(ZMQ_PAIR=0)
-      parameter(ZMQ_PEER=19)
-      parameter(ZMQ_PLAIN=1)
-      parameter(ZMQ_PLAIN_PASSWORD=46)
-      parameter(ZMQ_PLAIN_SERVER=44)
-      parameter(ZMQ_PLAIN_USERNAME=45)
-      parameter(ZMQ_POLLERR=4)
-      parameter(ZMQ_POLLIN=1)
-      parameter(ZMQ_POLLITEMS_DFLT=16)
-      parameter(ZMQ_POLLOUT=2)
-      parameter(ZMQ_POLLPRI=8)
-      parameter(ZMQ_PRIORITY=112)
-      parameter(ZMQ_PROBE_ROUTER=51)
-      parameter(ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED=805306368)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID=536870914)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION=536870915)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA=536870917)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE=536870916)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY=536870913)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED=536870912)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC=285212673)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA=268435480)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE=268435458)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE=268435459)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR=268435477)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO=268435475)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE=268435476)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE=268435474)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY=268435478)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED=268435473)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME=268435479)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH=285212674)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND=268435457)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED=268435456)
-      parameter(ZMQ_PTR=8)
-      parameter(ZMQ_PUB=1)
-      parameter(ZMQ_PULL=7)
-      parameter(ZMQ_PUSH=8)
-      parameter(ZMQ_QUEUE=3)
-      parameter(ZMQ_RADIO=14)
-      parameter(ZMQ_RATE=8)
-      parameter(ZMQ_RCVBUF=12)
-      parameter(ZMQ_RCVHWM=24)
-      parameter(ZMQ_RCVMORE=13)
-      parameter(ZMQ_RCVTIMEO=27)
-      parameter(ZMQ_RECONNECT_IVL=18)
-      parameter(ZMQ_RECONNECT_IVL_MAX=21)
-      parameter(ZMQ_RECONNECT_STOP=109)
-      parameter(ZMQ_RECONNECT_STOP_AFTER_DISCONNECT=3)
-      parameter(ZMQ_RECONNECT_STOP_CONN_REFUSED=1)
-      parameter(ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED=2)
-      parameter(ZMQ_RECOVERY_IVL=9)
-      parameter(ZMQ_REP=4)
-      parameter(ZMQ_REQ=3)
-      parameter(ZMQ_REQ_CORRELATE=52)
-      parameter(ZMQ_REQ_RELAXED=53)
-      parameter(ZMQ_ROUTER=6)
-      parameter(ZMQ_ROUTER_BEHAVIOR=33)
-      parameter(ZMQ_ROUTER_HANDOVER=56)
-      parameter(ZMQ_ROUTER_MANDATORY=33)
-      parameter(ZMQ_ROUTER_NOTIFY=97)
-      parameter(ZMQ_ROUTER_RAW=41)
-      parameter(ZMQ_ROUTING_ID=5)
-      parameter(ZMQ_SCATTER=17)
-      parameter(ZMQ_SERVER=12)
-      parameter(ZMQ_SHARED=3)
-      parameter(ZMQ_SNDBUF=11)
-      parameter(ZMQ_SNDHWM=23)
-      parameter(ZMQ_SNDMORE=2)
-      parameter(ZMQ_SNDTIMEO=28)
-      parameter(ZMQ_SOCKET_LIMIT=3)
-      parameter(ZMQ_SOCKS_PASSWORD=100)
-      parameter(ZMQ_SOCKS_PROXY=68)
-      parameter(ZMQ_SOCKS_USERNAME=99)
-      parameter(ZMQ_SRCFD=2)
-      parameter(ZMQ_STREAM=11)
-      parameter(ZMQ_STREAMER=1)
-      parameter(ZMQ_STREAM_NOTIFY=73)
-      parameter(ZMQ_SUB=2)
-      parameter(ZMQ_SUBSCRIBE=6)
-      parameter(ZMQ_TCP_ACCEPT_FILTER=38)
-      parameter(ZMQ_TCP_KEEPALIVE=34)
-      parameter(ZMQ_TCP_KEEPALIVE_CNT=35)
-      parameter(ZMQ_TCP_KEEPALIVE_IDLE=36)
-      parameter(ZMQ_TCP_KEEPALIVE_INTVL=37)
-      parameter(ZMQ_TCP_MAXRT=80)
-      parameter(ZMQ_THREAD_AFFINITY_CPU_ADD=7)
-      parameter(ZMQ_THREAD_AFFINITY_CPU_REMOVE=8)
-      parameter(ZMQ_THREAD_NAME_PREFIX=9)
-      parameter(ZMQ_THREAD_PRIORITY=3)
-      parameter(ZMQ_THREAD_PRIORITY_DFLT=-1)
-      parameter(ZMQ_THREAD_SAFE=81)
-      parameter(ZMQ_THREAD_SCHED_POLICY=4)
-      parameter(ZMQ_THREAD_SCHED_POLICY_DFLT=-1)
-      parameter(ZMQ_TOS=57)
-      parameter(ZMQ_TYPE=16)
-      parameter(ZMQ_UNSUBSCRIBE=7)
-      parameter(ZMQ_USE_FD=89)
-      parameter(ZMQ_VERSION=40304)
-      parameter(ZMQ_VERSION_MAJOR=4)
-      parameter(ZMQ_VERSION_MINOR=3)
-      parameter(ZMQ_VERSION_PATCH=4)
-      parameter(ZMQ_VMCI_BUFFER_MAX_SIZE=87)
-      parameter(ZMQ_VMCI_BUFFER_MIN_SIZE=86)
-      parameter(ZMQ_VMCI_BUFFER_SIZE=85)
-      parameter(ZMQ_VMCI_CONNECT_TIMEOUT=88)
-      parameter(ZMQ_WSS_CERT_PEM=104)
-      parameter(ZMQ_WSS_HOSTNAME=106)
-      parameter(ZMQ_WSS_KEY_PEM=103)
-      parameter(ZMQ_WSS_TRUST_PEM=105)
-      parameter(ZMQ_WSS_TRUST_SYSTEM=107)
-      parameter(ZMQ_XPUB=9)
-      parameter(ZMQ_XPUB_MANUAL=71)
-      parameter(ZMQ_XPUB_MANUAL_LAST_VALUE=98)
-      parameter(ZMQ_XPUB_NODROP=69)
-      parameter(ZMQ_XPUB_VERBOSE=40)
-      parameter(ZMQ_XPUB_VERBOSER=78)
-      parameter(ZMQ_XPUB_WELCOME_MSG=72)
-      parameter(ZMQ_XREP=6)
-      parameter(ZMQ_XREQ=5)
-      parameter(ZMQ_XSUB=10)
-      parameter(ZMQ_ZAP_DOMAIN=55)
-      parameter(ZMQ_ZAP_ENFORCE_DOMAIN=93)
-      parameter(ZMQ_ZERO_COPY_RECV=10)
-      integer              f77_zmq_bind
-      external             f77_zmq_bind
-      integer              f77_zmq_close
-      external             f77_zmq_close
-      integer              f77_zmq_connect
-      external             f77_zmq_connect
-      integer              f77_zmq_ctx_destroy
-      external             f77_zmq_ctx_destroy
-      integer              f77_zmq_ctx_get
-      external             f77_zmq_ctx_get
-      integer*8            f77_zmq_ctx_new
-      external             f77_zmq_ctx_new
-      integer              f77_zmq_ctx_set
-      external             f77_zmq_ctx_set
-      integer              f77_zmq_ctx_shutdown
-      external             f77_zmq_ctx_shutdown
-      integer              f77_zmq_ctx_term
-      external             f77_zmq_ctx_term
-      integer              f77_zmq_disconnect
-      external             f77_zmq_disconnect
-      integer              f77_zmq_errno
-      external             f77_zmq_errno
-      integer              f77_zmq_getsockopt
-      external             f77_zmq_getsockopt
-      integer              f77_zmq_microsleep
-      external             f77_zmq_microsleep
-      integer              f77_zmq_msg_close
-      external             f77_zmq_msg_close
-      integer              f77_zmq_msg_copy
-      external             f77_zmq_msg_copy
-      integer              f77_zmq_msg_copy_from_data
-      external             f77_zmq_msg_copy_from_data
-      integer              f77_zmq_msg_copy_to_data
-      external             f77_zmq_msg_copy_to_data
-      integer              f77_zmq_msg_copy_to_data8
-      external             f77_zmq_msg_copy_to_data8
-      integer*8            f77_zmq_msg_data
-      external             f77_zmq_msg_data
-      integer*8            f77_zmq_msg_data_new
-      external             f77_zmq_msg_data_new
-      integer              f77_zmq_msg_destroy
-      external             f77_zmq_msg_destroy
-      integer              f77_zmq_msg_destroy_data
-      external             f77_zmq_msg_destroy_data
-      integer              f77_zmq_msg_get
-      external             f77_zmq_msg_get
-      character*(64)       f77_zmq_msg_gets
-      external             f77_zmq_msg_gets
-      integer              f77_zmq_msg_init
-      external             f77_zmq_msg_init
-      integer              f77_zmq_msg_init_data
-      external             f77_zmq_msg_init_data
-      integer              f77_zmq_msg_init_size
-      external             f77_zmq_msg_init_size
-      integer              f77_zmq_msg_more
-      external             f77_zmq_msg_more
-      integer              f77_zmq_msg_move
-      external             f77_zmq_msg_move
-      integer*8            f77_zmq_msg_new
-      external             f77_zmq_msg_new
-      integer              f77_zmq_msg_recv
-      external             f77_zmq_msg_recv
-      integer*8            f77_zmq_msg_recv8
-      external             f77_zmq_msg_recv8
-      integer              f77_zmq_msg_send
-      external             f77_zmq_msg_send
-      integer*8            f77_zmq_msg_send8
-      external             f77_zmq_msg_send8
-      integer              f77_zmq_msg_set
-      external             f77_zmq_msg_set
-      integer              f77_zmq_msg_size
-      external             f77_zmq_msg_size
-      integer*8            f77_zmq_msg_size8
-      external             f77_zmq_msg_size8
-      integer              f77_zmq_poll
-      external             f77_zmq_poll
-      integer              f77_zmq_pollitem_destroy
-      external             f77_zmq_pollitem_destroy
-      integer*8            f77_zmq_pollitem_new
-      external             f77_zmq_pollitem_new
-      integer              f77_zmq_pollitem_revents
-      external             f77_zmq_pollitem_revents
-      integer              f77_zmq_pollitem_set_events
-      external             f77_zmq_pollitem_set_events
-      integer              f77_zmq_pollitem_set_socket
-      external             f77_zmq_pollitem_set_socket
-      integer              f77_zmq_proxy
-      external             f77_zmq_proxy
-      integer              f77_zmq_proxy_steerable
-      external             f77_zmq_proxy_steerable
-      integer              f77_zmq_recv
-      external             f77_zmq_recv
-      integer*8            f77_zmq_recv8
-      external             f77_zmq_recv8
-      integer              f77_zmq_send
-      external             f77_zmq_send
-      integer*8            f77_zmq_send8
-      external             f77_zmq_send8
-      integer              f77_zmq_send_const
-      external             f77_zmq_send_const
-      integer*8            f77_zmq_send_const8
-      external             f77_zmq_send_const8
-      integer              f77_zmq_setsockopt
-      external             f77_zmq_setsockopt
-      integer*8            f77_zmq_socket
-      external             f77_zmq_socket
-      integer              f77_zmq_socket_monitor
-      external             f77_zmq_socket_monitor
-      character*(64)       f77_zmq_strerror
-      external             f77_zmq_strerror
-      integer              f77_zmq_term
-      external             f77_zmq_term
-      integer              f77_zmq_unbind
-      external             f77_zmq_unbind
-      integer              f77_zmq_version
-      external             f77_zmq_version
-      integer              pthread_create
-      external             pthread_create
-      integer              pthread_create_arg
-      external             pthread_create_arg
-      integer              pthread_detach
-      external             pthread_detach
-      integer              pthread_join
-      external             pthread_join
--- a/ocaml/Message.ml
+++ b/ocaml/Message.ml
@ -63,11 +63,11 @@ end

 module Connect_msg : sig
  type t = Tcp | Inproc | Ipc
-  val create : typ:string -> t
+  val create : string -> t
  val to_string : t -> string
 end = struct
  type t = Tcp | Inproc | Ipc
-  let create ~typ =
+  let create typ =
    match typ with
    | "tcp" -> Tcp
    | "inproc" -> Inproc
@ -515,9 +515,9 @@ let of_string s =
    | Connect_ socket ->
        Connect (Connect_msg.create socket)
    | NewJob_ { state ; push_address_tcp ; push_address_inproc } ->
-        Newjob (Newjob_msg.create push_address_tcp push_address_inproc state)
+        Newjob (Newjob_msg.create ~address_tcp:push_address_tcp ~address_inproc:push_address_inproc ~state)
    | EndJob_ state  ->
-        Endjob (Endjob_msg.create state)
+        Endjob (Endjob_msg.create ~state)
    | GetData_ { state ; client_id ; key } ->
        GetData (GetData_msg.create ~client_id ~state ~key)
    | PutData_ { state ; client_id ; key } ->
--- a/ocaml/TaskServer.ml
+++ b/ocaml/TaskServer.ml
@ -776,7 +776,7 @@ let run ~port =
      Zmq.Socket.create zmq_context Zmq.Socket.rep
    in
    Zmq.Socket.set_linger_period rep_socket 1_000_000;
-    bind_socket "REP" rep_socket port;
+    bind_socket ~socket_type:"REP" ~socket:rep_socket ~port;

    let initial_program_state =
    {   queue = Queuing_system.create () ;
--- a/ocaml/qp_run.ml
+++ b/ocaml/qp_run.ml
@ -110,7 +110,7 @@ let run slave ?prefix exe ezfio_file =
  let task_thread =
     let thread =
      Thread.create ( fun () ->
-         TaskServer.run port_number )
+         TaskServer.run ~port:port_number )
     in
     thread ();
  in
--- a/scripts/compilation/qp_create_ninja
+++ b/scripts/compilation/qp_create_ninja
@ -121,6 +121,7 @@ def ninja_create_env_variable(pwd_config_file):

    l_string.append("LIB = {0} ".format(str_lib))

+    l_string.append("CONFIG_FILE = {0}".format(pwd_config_file))
    l_string.append("")

    return l_string
--- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@ -19,11 +19,11 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
  return
 endif
 n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
 num_A = ao_nucl(j)
 power_A(1:3)= ao_power(j,1:3)
 A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
 num_B = ao_nucl(i)
 power_B(1:3)= ao_power(i,1:3)
 B_center(1:3) = nucl_coord(num_B,1:3)
@ -33,19 +33,19 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
  do m=1,ao_prim_num(i)
    beta = ao_expo_ordered_transp(m,i)
    do mm = 1, 3
-     ! (x phi_i ) * phi_j 
+     ! (x phi_i ) * phi_j
     ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
     !
     ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
     power_B_tmp = power_B
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
     xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j)             &
-                                            * ao_coef_normalized_ordered_transp(m,i) 
-     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 
+                                            * ao_coef_normalized_ordered_transp(m,i)
+     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1
     power_B_tmp(mm) += 1
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
     xyz_ints(mm) += contrib * 1.d0        * ao_coef_normalized_ordered_transp(l,j)             &
-                                           * ao_coef_normalized_ordered_transp(m,i) 
+                                           * ao_coef_normalized_ordered_transp(m,i)
    enddo
  enddo
 enddo
@ -58,7 +58,7 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
  BEGIN_DOC
  ! phi_j_erf_mu_r_phi  = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  phi_i(r)
  END_DOC
- 
+
  implicit none
  integer,          intent(in) :: i,j
  double precision, intent(in) :: mu_in, C_center(3)
@ -77,24 +77,24 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)

  n_pt_in = n_pt_max_integrals

-  ! j 
+  ! j
  num_A         = ao_nucl(j)
  power_A(1:3)  = ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)

-  ! i 
+  ! i
  num_B         = ao_nucl(i)
  power_B(1:3)  = ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
  do l = 1, ao_prim_num(j)
   alpha = ao_expo_ordered_transp(l,j)
   do m = 1, ao_prim_num(i)
     beta = ao_expo_ordered_transp(m,i)

-     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)  
+     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)

-     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i) 
+     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i)
   enddo
  enddo

@ -124,11 +124,11 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
  return
 endif
 n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
 num_A = ao_nucl(j)
 power_A(1:3)= ao_power(j,1:3)
 A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
 num_B = ao_nucl(i)
 power_B(1:3)= ao_power(i,1:3)
 B_center(1:3) = nucl_coord(num_B,1:3)
@ -141,7 +141,7 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
    call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
    do mm = 1, 4
     gauss_ints(mm) += xyz_ints(mm)  * ao_coef_normalized_ordered_transp(l,j)             &
-                                     * ao_coef_normalized_ordered_transp(m,i) 
+                                     * ao_coef_normalized_ordered_transp(m,i)
    enddo
  enddo
 enddo
@ -161,7 +161,7 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
  integer,          intent(in)  :: i, j
  double precision, intent(in)  :: mu, C_center(3), delta
  double precision, intent(out) :: gauss_ints
- 
+
  integer                       :: n_pt_in, l, m
  integer                       :: num_A, power_A(3), num_b, power_B(3)
  double precision              :: alpha, beta, A_center(3), B_center(3), coef
@ -177,16 +177,16 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)

  n_pt_in = n_pt_max_integrals

-  ! j 
+  ! j
  num_A         = ao_nucl(j)
  power_A(1:3)  = ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)

-  ! i 
+  ! i
  num_B         = ao_nucl(i)
  power_B(1:3)  = ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
  do l = 1, ao_prim_num(j)
   alpha = ao_expo_ordered_transp(l,j)
   do m = 1, ao_prim_num(i)
@ -219,7 +219,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
  !
  END_DOC

-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'

  implicit none

@ -274,7 +274,82 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
 end subroutine NAI_pol_x_mult_erf_ao

 ! ---
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)

+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, n_points
+  double precision, intent(in)  :: mu_in, C_center(n_points,3)
+  double precision, intent(out) :: ints(n_points,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  integer                       :: power_xA(3), m, ipoint
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+  double precision              :: NAI_pol_mult_erf
+
+  ints = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
+    return
+  endif
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      power_xA = power_A
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v
+
+! ---
 subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)

  BEGIN_DOC
@ -289,7 +364,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
  !
  END_DOC

-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'

  implicit none

@ -333,7 +408,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen

      do j = 1, ao_prim_num(j_ao)
        alphaj = ao_expo_ordered_transp                   (j,j_ao)
-        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)

        ! First term = (x-Ax)**(ax+1)
        integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
@ -351,6 +426,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen

 end subroutine NAI_pol_x_mult_erf_ao_with1s

+!--
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)   :: i_ao, j_ao, n_points
+  double precision, intent(in)   :: beta, B_center(n_points,3), mu_in, C_center(n_points,3)
+  double precision, intent(out)  :: ints(n_points,3)
+
+  integer                        :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision               :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                        :: ipoint
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
+    return
+  endif
+
+  ints(:,:) = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, &
+            alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, &
+          alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s
+
+
 ! ---

 subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
@ -361,7 +521,7 @@ subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
  !
  ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z
  END_DOC
- include 'utils/constants.include.F'                                                                                                                                  
+ include 'utils/constants.include.F'
 integer, intent(in) :: i_ao,j_ao,m
 double precision, intent(in) :: mu_in, C_center(3)
 double precision, intent(out):: ints
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@ -22,15 +22,15 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
 power_B(1:3)= ao_power(j,1:3)
 B_center(1:3) = nucl_coord(num_B,1:3)
 do l=1,ao_prim_num(i)
-  alpha = ao_expo_ordered_transp(l,i)     
+  alpha = ao_expo_ordered_transp(l,i)
  do k=1,ao_prim_num(j)
-   beta = ao_expo_ordered_transp(k,j)     
+   beta = ao_expo_ordered_transp(k,j)
   call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp)
   do m = 1, 3
    gauss_ints(m) += gauss_ints_tmp(m) *  ao_coef_normalized_ordered_transp(l,i)             &
-                                       *  ao_coef_normalized_ordered_transp(k,j) 
+                                       *  ao_coef_normalized_ordered_transp(k,j)
   enddo
-  enddo 
+  enddo
 enddo
 end

@ -61,13 +61,13 @@ double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,m
 power_B(1:3)= ao_power(j,1:3)
 B_center(1:3) = nucl_coord(num_B,1:3)
 do l=1,ao_prim_num(i)
-  alpha = ao_expo_ordered_transp(l,i)     
+  alpha = ao_expo_ordered_transp(l,i)
  do k=1,ao_prim_num(j)
-   beta = ao_expo_ordered_transp(k,j)     
+   beta = ao_expo_ordered_transp(k,j)
   gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
   overlap_gauss_xyz_r12_ao_specific = gauss_int *  ao_coef_normalized_ordered_transp(l,i)             &
-                                                 *  ao_coef_normalized_ordered_transp(k,j) 
-  enddo 
+                                                 *  ao_coef_normalized_ordered_transp(k,j)
+  enddo
 enddo
 end

@ -90,13 +90,13 @@ subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints)
   power_B(1:3)= ao_power(j,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
   do l=1,ao_prim_num(i)
-    alpha = ao_expo_ordered_transp(l,i)     
+    alpha = ao_expo_ordered_transp(l,i)
    do k=1,ao_prim_num(j)
-     beta = ao_expo_ordered_transp(k,j)     
+     beta = ao_expo_ordered_transp(k,j)
     analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
     aos_ints(j,i) += analytical_j *  ao_coef_normalized_ordered_transp(l,i)             &
-                                   *  ao_coef_normalized_ordered_transp(k,j) 
-    enddo 
+                                   *  ao_coef_normalized_ordered_transp(k,j)
+    enddo
   enddo
  enddo
 enddo
@ -114,7 +114,7 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
  implicit none
  integer,          intent(in) :: i, j
  double precision, intent(in) :: D_center(3), delta
- 
+
  integer                      :: power_A(3), power_B(3), l, k
  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j

@ -133,23 +133,75 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)

  do l = 1, ao_prim_num(i)
-    alpha = ao_expo_ordered_transp           (l,i)     
-    coef1 = ao_coef_normalized_ordered_transp(l,i) 
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)

    do k = 1, ao_prim_num(j)
      beta = ao_expo_ordered_transp(k,j)
-      coef = coef1 * ao_coef_normalized_ordered_transp(k,j) 
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)

      if(dabs(coef) .lt. 1d-12) cycle

      analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)

      overlap_gauss_r12_ao += coef * analytical_j
-    enddo 
+    enddo
  enddo

 end function overlap_gauss_r12_ao

+! --
+
+subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j, n_points
+  double precision, intent(in) :: D_center(n_points,3), delta
+  double precision, intent(out) :: resv(n_points)
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1 
+  double precision, allocatable :: analytical_j(:)
+
+  double precision, external   :: overlap_gauss_r12
+  integer                        :: ipoint
+
+  resv(:) = 0.d0
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(analytical_j(n_points))
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points)
+      do ipoint=1, n_points
+        resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint)
+      enddo
+    enddo
+  enddo
+  deallocate(analytical_j)
+
+end
+
 ! ---

 double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j)
@ -163,14 +215,13 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center,
  implicit none
  integer,          intent(in) :: i, j
  double precision, intent(in) :: B_center(3), beta, D_center(3), delta
- 
+
  integer                      :: power_A1(3), power_A2(3), l, k
  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j
  double precision             :: G_center(3), gama, fact_g, gama_inv

  double precision, external   :: overlap_gauss_r12, overlap_gauss_r12_ao

-  ASSERT(beta .gt. 0.d0)
  if(beta .lt. 1d-10) then
    overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j)
    return
@ -204,22 +255,118 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center,
  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)

  do l = 1, ao_prim_num(i)
-    alpha1 = ao_expo_ordered_transp                    (l,i)     
+    alpha1 = ao_expo_ordered_transp                    (l,i)
    coef1  = fact_g * ao_coef_normalized_ordered_transp(l,i)
    if(dabs(coef1) .lt. 1d-12) cycle

    do k = 1, ao_prim_num(j)
      alpha2 = ao_expo_ordered_transp                   (k,j)
-      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) 
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
      if(dabs(coef12) .lt. 1d-12) cycle

      analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2)

      overlap_gauss_r12_ao_with1s += coef12 * analytical_j
-    enddo 
+    enddo
  enddo

 end function overlap_gauss_r12_ao_with1s

 ! ---

+subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points)
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  ! using an array of D_centers.
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, n_points
+  double precision, intent(in)  :: B_center(3), beta, D_center(n_points,3), delta
+  double precision, intent(out) :: resv(n_points)
+
+  integer                      :: power_A1(3), power_A2(3), l, k
+  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
+  double precision             :: coef12, coef12f
+  double precision             :: gama, gama_inv
+  double precision             :: bg, dg, bdg
+
+  integer :: ipoint
+
+  double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:)
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ASSERT(beta .gt. 0.d0)
+
+  if(beta .lt. 1d-10) then
+    call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+    return
+  endif
+
+  resv(:) = 0.d0
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama        = beta + delta
+  gama_inv    = 1.d0 / gama
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+
+  bg = beta * gama_inv
+  dg = delta * gama_inv
+  bdg = bg * delta 
+  do ipoint=1,n_points
+    G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
+    G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
+    G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
+    fact_g(ipoint) = bdg * ( &
+          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
+        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
+        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+
+    if(fact_g(ipoint) < 10d0) then
+      fact_g(ipoint) = dexp(-fact_g(ipoint))
+    else
+      fact_g(ipoint) = 0.d0
+    endif
+
+  enddo
+
+    ! ---
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp                    (l,i)
+    coef1  = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(G_center, gama, A1_center,&
+            A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points)
+
+      do ipoint=1,n_points
+        coef12f = coef12 * fact_g(ipoint)
+        resv(ipoint) += coef12f * analytical_j(ipoint)
+      end do
+
+    enddo
+  enddo
+  deallocate (fact_g, G_center, analytical_j )
+
+
+end
+
+
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@ -11,60 +11,65 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n

  implicit none
  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: r(3), expo_fit, coef_fit
  double precision              :: coef, beta, B_center(3)
  double precision              :: tmp
  double precision              :: wall0, wall1

+  double precision, allocatable :: int_fit_v(:)
  double precision, external    :: overlap_gauss_r12_ao_with1s

-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
  call wall_time(wall0)

-  int2_grad1u2_grad2u2_j1b2 = 0.d0
+ int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0

- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,    &
+     !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,&
+     !$OMP          ao_overlap_abs)

-    do i = 1, ao_num
-      do j = i, ao_num
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num

-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif

-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+     do i_1s = 1, List_all_comb_b3_size

-          do i_fit = 1, n_max_fit_slat
+       coef        = List_all_comb_b3_coef  (i_1s)
+       beta        = List_all_comb_b3_expo  (i_1s)
+       B_center(1) = List_all_comb_b3_cent(1,i_1s)
+       B_center(2) = List_all_comb_b3_cent(2,i_1s)
+       B_center(3) = List_all_comb_b3_cent(3,i_1s)

-            expo_fit = expo_gauss_1_erf_x_2(i_fit)
-            coef_fit = coef_gauss_1_erf_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+       do i_fit = 1, n_max_fit_slat

-            tmp += -0.25d0 * coef * coef_fit * int_fit
-          enddo
-        enddo
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef

-        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+               expo_fit, i, j, int_fit_v, n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
 !$OMP END DO
+ deallocate(int_fit_v)
 !$OMP END PARALLEL

  do ipoint = 1, n_points_final_grid
@ -78,7 +83,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
  call wall_time(wall1)
  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0

-END_PROVIDER 
+END_PROVIDER

 ! ---

@ -91,61 +96,60 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
  END_DOC

  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
-  double precision              :: coef, beta, B_center(3), tmp
-  double precision              :: wall0, wall1
+  integer                        :: i, j, ipoint, i_1s, i_fit
+  double precision               :: r(3), expo_fit, coef_fit
+  double precision               :: coef, beta, B_center(3), tmp
+  double precision               :: wall0, wall1
+  double precision, allocatable  :: int_fit_v(:)

-  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision, external     :: overlap_gauss_r12_ao_with1s

-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
  call wall_time(wall0)

-  int2_u2_j1b2 = 0.d0
+  int2_u2_j1b2(:,:,:) = 0.d0

- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+      !$OMP          coef_fit, expo_fit, int_fit_v)                  &
+      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+      !$OMP          final_grid_points_transp, n_max_fit_slat,       &
+      !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,       &
+      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
+      !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+  allocate(int_fit_v(n_points_final_grid))
+  !$OMP DO SCHEDULE(dynamic)
+  do i = 1, ao_num
+    do j = i, ao_num

-    do i = 1, ao_num
-      do j = i, ao_num
+      do i_1s = 1, List_all_comb_b3_size

-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+        coef        = List_all_comb_b3_coef  (i_1s)
+        beta        = List_all_comb_b3_expo  (i_1s)
+        B_center(1) = List_all_comb_b3_cent(1,i_1s)
+        B_center(2) = List_all_comb_b3_cent(2,i_1s)
+        B_center(3) = List_all_comb_b3_cent(3,i_1s)

-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+        do i_fit = 1, n_max_fit_slat

-          do i_fit = 1, n_max_fit_slat
+          expo_fit = expo_gauss_j_mu_x_2(i_fit)
+          coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef

-            expo_fit = expo_gauss_j_mu_x_2(i_fit)
-            coef_fit = coef_gauss_j_mu_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+                expo_fit, i, j, int_fit_v, n_points_final_grid)

-            tmp += coef * coef_fit * int_fit
+          do ipoint = 1, n_points_final_grid
+            int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
          enddo
+
        enddo

-        int2_u2_j1b2(j,i,ipoint) = tmp
      enddo
    enddo
  enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  deallocate(int_fit_v)
+  !$OMP END PARALLEL

  do ipoint = 1, n_points_final_grid
    do i = 2, ao_num
@ -158,7 +162,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
  call wall_time(wall1)
  print*, ' wall time for int2_u2_j1b2', wall1 - wall0

-END_PROVIDER 
+END_PROVIDER

 ! ---

@ -171,84 +175,95 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
  END_DOC

  implicit none
-  integer          :: i, j, ipoint, i_1s, i_fit
-  double precision :: r(3), int_fit(3), expo_fit, coef_fit
-  double precision :: coef, beta, B_center(3), dist
-  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
-  double precision :: tmp_x, tmp_y, tmp_z
-  double precision :: wall0, wall1
+  integer                        :: i, j, ipoint, i_1s, i_fit
+  double precision               :: r(3), expo_fit, coef_fit
+  double precision               :: coef, beta, B_center(3)
+  double precision               :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp
+  double precision               :: tmp_x, tmp_y, tmp_z
+  double precision               :: wall0, wall1
+  double precision, allocatable  :: int_fit_v(:,:), dist(:), centr_1s(:,:)

-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
  call wall_time(wall0)

-  int2_u_grad1u_x_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
- !$OMP DO
+  allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3))
  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+    r(1) = final_grid_points_transp(ipoint,1)
+    r(2) = final_grid_points_transp(ipoint,2)
+    r(3) = final_grid_points_transp(ipoint,3)

-    do i = 1, ao_num
-      do j = i, ao_num
+    dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1))  &
+                 + (B_center(2) - r(2)) * (B_center(2) - r(2))  &
+                 + (B_center(3) - r(3)) * (B_center(3) - r(3))
+  enddo

-        tmp_x = 0.d0
-        tmp_y = 0.d0
-        tmp_z = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0

-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+      !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s,        &
+      !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
+      !$OMP          tmp_x, tmp_y, tmp_z)                            &
+      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+      !$OMP          final_grid_points_transp, n_max_fit_slat, dist, &
+      !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,   &
+      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
+      !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+  allocate(int_fit_v(n_points_final_grid,3))

-          do i_fit = 1, n_max_fit_slat
+  do i_1s = 1, List_all_comb_b3_size

-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    coef        = List_all_comb_b3_coef  (i_1s)
+    beta        = List_all_comb_b3_expo  (i_1s)
+    B_center(1) = List_all_comb_b3_cent(1,i_1s)
+    B_center(2) = List_all_comb_b3_cent(2,i_1s)
+    B_center(3) = List_all_comb_b3_cent(3,i_1s)

-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
+    do i_fit = 1, n_max_fit_slat

-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+      coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef

-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
-            !if(expo_coef_1s .gt. 80.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
-            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+      alpha_1s     = beta + expo_fit
+      alpha_1s_inv = 1.d0 / alpha_1s

-            tmp_x += coef_tmp * int_fit(1)
-            tmp_y += coef_tmp * int_fit(2)
-            tmp_z += coef_tmp * int_fit(3)
+      do ipoint = 1, n_points_final_grid
+        r(1) = final_grid_points_transp(ipoint,1)
+        r(2) = final_grid_points_transp(ipoint,2)
+        r(3) = final_grid_points_transp(ipoint,3)
+
+        centr_1s(ipoint,1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+        centr_1s(ipoint,2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+        centr_1s(ipoint,3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      enddo
+
+      expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+      !$OMP BARRIER
+      !$OMP DO SCHEDULE(dynamic)
+      do i = 1, ao_num
+        do j = i, ao_num
+          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,&
+              1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid)
+
+          do ipoint = 1, n_points_final_grid
+            coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint))
+            int2_u_grad1u_x_j1b2(1,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1)
+            int2_u_grad1u_x_j1b2(2,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2)
+            int2_u_grad1u_x_j1b2(3,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3)
          enddo
        enddo
-
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
      enddo
+      !$OMP END DO NOWAIT
+
    enddo
  enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  deallocate(int_fit_v)
+  !$OMP END PARALLEL
+
+  deallocate(dist)

  do ipoint = 1, n_points_final_grid
    do i = 2, ao_num
@ -263,7 +278,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
  call wall_time(wall1)
  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0

-END_PROVIDER 
+END_PROVIDER

 ! ---

@ -291,11 +306,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
 !$OMP PARALLEL DEFAULT (NONE)                                      &
 !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
 !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
 !$OMP          final_grid_points, n_max_fit_slat,                  &
 !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
 !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
 !$OMP DO
  do ipoint = 1, n_points_final_grid
@ -323,7 +338,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
            coef_fit = coef_gauss_j_mu_1_erf(i_fit)

            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
+            alpha_1s_inv = 1.d0 / alpha_1s
            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
@ -332,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
            !if(expo_coef_1s .gt. 80.d0) cycle
            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
+
            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)

            tmp += coef_tmp * int_fit
@ -357,7 +372,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
  call wall_time(wall1)
  print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0

-END_PROVIDER 
+END_PROVIDER

 ! ---

--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@ -63,7 +63,6 @@ END_PROVIDER
      tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
    enddo

-    ASSERT(List_all_comb_b2_expo(i) .gt. 0d0)
    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle

    List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
@ -177,8 +176,8 @@ END_PROVIDER

    enddo

-    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)

    List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
    List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@ -1,67 +1,139 @@
-
 double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
  BEGIN_DOC
  ! Computes the following integral :
  !
-  ! .. math::
-  ! 
+  ! .. math                      ::
+  !
  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
  !
  END_DOC

- implicit none
+  implicit none
  include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
- double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
- integer, intent(in)             :: power_A(3),power_B(3)
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)

- double precision  :: overlap_x,overlap_y,overlap_z,overlap
- ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
- double precision  :: A_center_new(3)   ! new center
- integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
- double precision  :: alpha_new         ! new exponent
- double precision  :: fact_a_new        ! constant factor
- double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
- integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
- dim1=100
- thr = 1.d-10
- d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=100
+  thr = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0

- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
-                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
- ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
- accu = 0.d0
- do lx = 0, iorder_a_new(1)
-  coefx = A_new(lx,1)
-  if(dabs(coefx).lt.thr)cycle
-  iorder_tmp(1) = lx
-  do ly = 0, iorder_a_new(2)
-   coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
-   if(dabs(coefxy).lt.thr)cycle
-   iorder_tmp(2) = ly
-   do lz = 0, iorder_a_new(3)
-    coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
-    if(dabs(coefxyz).lt.thr)cycle
-    iorder_tmp(3) = lz
-    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-    accu += coefxyz * overlap
-   enddo
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+        accu += coefxyz * overlap
+      enddo
+    enddo
  enddo
- enddo
- overlap_gauss_r12 = fact_a_new * accu 
+  overlap_gauss_r12 = fact_a_new * accu
 end

+!---
+
+subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   using an array of D_centers
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  integer, intent(in)            :: n_points
+  double precision, intent(in)   :: D_center(n_points,3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+  double precision, intent(out)  :: rvec(n_points)
+
+  double precision, allocatable  :: overlap(:)
+  double precision               :: overlap_x, overlap_y, overlap_z
+
+  integer                        :: maxab
+  integer, allocatable           :: iorder_a_new(:)
+  double precision, allocatable  :: A_new(:,:,:), A_center_new(:,:)
+  double precision, allocatable  :: fact_a_new(:)
+  double precision               :: alpha_new
+  double precision               :: accu,thr, coefxy
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint
+
+  dim1=100
+  thr = 1.d-10
+  d(:) = 0
+
+  maxab = maxval(power_A(1:3))
+
+  allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), &
+            fact_a_new(n_points), iorder_a_new(3), overlap(n_points) )
+
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, &
+        alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, &
+        D_center, A_center, n_points)
+
+  do ipoint=1,n_points
+    rvec(ipoint) = 0.d0
+  enddo
+
+  do lx = 0, iorder_a_new(1)
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        iorder_tmp(3) = lz
+        call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points)
+        do ipoint=1,n_points
+          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * &
+                                        A_new(ipoint,ly,2) * &
+                                        A_new(ipoint,lz,3) * overlap(ipoint)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do ipoint=1,n_points
+    rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
+  enddo
+  deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
+end
+
+!---
+!---

 subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints)
  BEGIN_DOC
  ! Computes the following integral :
  !
  ! .. math::
-  ! 
+  !
  !   gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
  !
  ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
@ -69,14 +141,14 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_

 implicit none
  include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
 double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
 integer, intent(in)             :: power_A(3),power_B(3)
 double precision, intent(out)   :: gauss_ints(3)

 double precision  :: overlap_x,overlap_y,overlap_z,overlap
 ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_new(0:max_dim,3)! new polynom
 double precision  :: A_center_new(3)   ! new center
 integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
 integer           :: power_B_new(3)
@ -88,8 +160,8 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_
 thr = 1.d-10
 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0

- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
 ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
 gauss_ints = 0.d0
@ -99,12 +171,12 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_
  iorder_tmp(1) = lx
  do ly = 0, iorder_a_new(2)
   coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
+   coefxy = coefx * coefy
   if(dabs(coefxy).lt.thr)cycle
   iorder_tmp(2) = ly
   do lz = 0, iorder_a_new(3)
    coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
+    coefxyz = coefxy * coefz
    if(dabs(coefxyz).lt.thr)cycle
    iorder_tmp(3) = lz
    do m = 1, 3
@ -129,7 +201,7 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
  ! Computes the following integral :
  !
  ! .. math::
-  ! 
+  !
  !    \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
  !
  ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
@ -137,13 +209,13 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center

 implicit none
  include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
 double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
 integer, intent(in)             :: power_A(3),power_B(3),mx

 double precision  :: overlap_x,overlap_y,overlap_z,overlap
 ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_new(0:max_dim,3)! new polynom
 double precision  :: A_center_new(3)   ! new center
 integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
 integer           :: power_B_new(3)
@ -155,8 +227,8 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
 thr = 1.d-10
 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0

- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
 ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
 overlap_gauss_xyz_r12_specific = 0.d0
@ -166,12 +238,12 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
  iorder_tmp(1) = lx
  do ly = 0, iorder_a_new(2)
   coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
+   coefxy = coefx * coefy
   if(dabs(coefxy).lt.thr)cycle
   iorder_tmp(2) = ly
   do lz = 0, iorder_a_new(3)
    coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
+    coefxyz = coefxy * coefz
    if(dabs(coefxyz).lt.thr)cycle
    iorder_tmp(3) = lz
    m = mx
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@ -124,7 +124,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
  ! Computes the following integral :
  !
  ! .. math::
-  ! 
+  !
  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
  !
@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,

 end function NAI_pol_mult_erf

+! ---
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in, n_points
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(out) :: res_v(n_points)
+
+  integer                      :: i, n_pt, n_pt_out, ipoint
+  double precision             :: P_center(3)
+  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision             :: const_factor, dist_integral
+  double precision             :: accu, p_inv, p, rho, p_inv_2
+  double precision             :: p_new
+
+  double precision             :: rint
+
+  p       = alpha + beta
+  p_inv   = 1.d0 / p
+  p_inv_2 = 0.5d0 * p_inv
+  rho     = alpha * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+
+  dist          = 0.d0
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  do ipoint=1,n_points
+    dist_integral = 0.d0
+    do i = 1, 3
+      dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+    enddo
+    const_factor = dist * rho
+    if(const_factor > 80.d0) then
+      res_V(ipoint) = 0.d0
+      cycle
+    endif
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    n_pt  =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+    const = p * dist_integral * p_new * p_new
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+    p_new = p_new * p_new
+    call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  enddo
+
+end
+
 ! ---

 double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
@ -207,7 +293,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
  ! Computes the following integral :
  !
  ! .. math::
-  ! 
+  !
  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A

 end function NAI_pol_mult_erf_with1s

+!--
+
+subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2&
+      , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in)   :: n_pt_in, n_points
+  integer,          intent(in)   :: power_A1(3), power_A2(3)
+  double precision, intent(in)   :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3)
+  double precision, intent(in)   :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out)  :: res_v(n_points)
+
+  integer                        :: i, n_pt, n_pt_out, ipoint
+  double precision               :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision               :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision               :: dist_integral
+  double precision               :: d(0:n_pt_in), coeff, const, factor
+  double precision               :: accu
+  double precision               :: p_new, p_new2
+
+  double precision               :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+      + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+      + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+
+  if(const_factor12 > 80.d0) then
+    res_v(:) = 0.d0
+    return
+  endif
+
+    ! ---
+
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2 = p_new * p_new
+  n_pt  =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) &
+              + power_A1(3) + power_A2(3) )
+
+  do ipoint=1,n_points
+
+    P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+    P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+    P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+    dist   = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+        + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+        + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+
+    const_factor = const_factor12 + dist * rho
+    if(const_factor > 80.d0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+        + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+        + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+
+    ! ---
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    const = p * dist_integral * p_new2
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+
+    !TODO: VECTORIZE HERE
+    call give_polynomial_mult_center_one_e_erf_opt(                  &
+        A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  end do
+
+end
+
+! ---
 ! ---

 subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
@ -432,10 +643,11 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt
 ! ---


+
 subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
  BEGIN_DOC
-  ! Returns the explicit polynomial in terms of the $t$ variable of the 
+  ! Returns the explicit polynomial in terms of the $t$ variable of the
  ! following polynomial:
  !
  ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@ -1095,9 +1095,9 @@ double precision function overlap_orb_ylm_grid(nptsgrid,r_orb,npower_orb,center_
 implicit none
 !! PSEUDOS
 integer nptsgridmax,nptsgrid
-double precision coefs_pseudo,ptsgrid
 parameter(nptsgridmax=50)
-common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+double precision coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+common/pseudos/coefs_pseudo,ptsgrid
 !!!!!
 integer npower_orb(3),l,m,i
 double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3)
@ -1235,10 +1235,10 @@ end
      subroutine initpseudos(nptsgrid)
      implicit none
      integer nptsgridmax,nptsgrid,ik
-      double precision coefs_pseudo,ptsgrid
      double precision p,q,r,s
      parameter(nptsgridmax=50)
-      common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+      double precision :: coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+      common/pseudos/coefs_pseudo,ptsgrid

      p=1.d0/dsqrt(2.d0)
      q=1.d0/dsqrt(3.d0)
--- a/src/becke_numerical_grid/grid_becke_vector.irp.f
+++ b/src/becke_numerical_grid/grid_becke_vector.irp.f
@ -58,3 +58,18 @@ END_PROVIDER
  enddo

 END_PROVIDER
+
+
+BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)]
+  implicit none
+  BEGIN_DOC
+  !  final_grid_points_transp(j,1:3) = (/ x, y, z /) of the jth grid point
+  END_DOC
+  integer :: i
+  do i=1,n_points_final_grid
+    final_grid_points_transp(i,1) = final_grid_points(1,i)
+    final_grid_points_transp(i,2) = final_grid_points(2,i)
+    final_grid_points_transp(i,3) = final_grid_points(3,i)
+  enddo
+END_PROVIDER
+
--- a/src/bitmask/core_inact_act_virt.irp.f
+++ b/src/bitmask/core_inact_act_virt.irp.f
@ -173,10 +173,7 @@ BEGIN_PROVIDER [integer, n_core_inact_act_orb ]
  END_DOC
  n_core_inact_act_orb = (n_core_orb + n_inact_orb + n_act_orb)
 END_PROVIDER
- 

-
- 
 BEGIN_PROVIDER [ integer(bit_kind), core_bitmask , (N_int,2) ]
   implicit none
   BEGIN_DOC
@ -443,5 +440,4 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)]
  endif
 enddo

-END_PROVIDER 
-
+END_PROVIDER
--- a/src/cis/cis.irp.f
+++ b/src/cis/cis.irp.f
@ -79,6 +79,6 @@ subroutine run
  call ezfio_set_cis_energy(CI_energy)
  psi_coef = ci_eigenvectors
  SOFT_TOUCH psi_coef
-  call save_wavefunction_truncated(thresh_save_wf)
+  call save_wavefunction_truncated(save_threshold)

 end
--- a/src/csf/cfgCI_interface.f90
+++ b/src/csf/cfgCI_interface.f90
@ -46,6 +46,24 @@ module cfunctions
           real   (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax)
         end subroutine getCSFtoDETTransformationMatrix
      end interface
+       interface
+         subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt
+      end interface
+       interface
+         subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt_qp
+      end interface
 end module cfunctions

 subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) &
--- a/src/csf/cfgCI_utils.c
+++ b/src/csf/cfgCI_utils.c
@ -1,5 +1,6 @@
 #include <stdint.h>
 #include <stdio.h>
+#include <assert.h>
 #include "tree_utils.h"

 void int_to_bin_digit(int64_t in, int count, int* out)
@ -28,19 +29,19 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){
    int nsomo = *inpnsomo;
    int ms = *inpms;
    int nparcoupl = (nsomo + ms)/2;
-    *outncsfs = binom(nsomo, nparcoupl);
+    *outncsfs = binom((double)nsomo, (double)nparcoupl);
 }

 void getncsfs(int NSOMO, int MS, int *outncsfs){
-    int nparcoupl = (NSOMO + MS)/2;
-    int nparcouplp1 = ((NSOMO + MS)/2)+1;
+    int nparcoupl = (NSOMO + MS)/2; // n_alpha
+    int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1
    double tmpndets=0.0;
    if(NSOMO == 0){
        (*outncsfs) = 1;
        return;
    }
-    tmpndets = binom(NSOMO, nparcoupl);
-    (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1));
+    tmpndets = binom((double)NSOMO, (double)nparcoupl);
+    (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1));
 }

 #include <stdint.h>
@ -252,6 +253,27 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
    buildTreeDriver(bftree, *NSOMO, MS, NBF);
 }

+void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols);
+
+// QR to orthogonalize CSFs does not work
+//void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
+//  int i,j;
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",overlapMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//  // Call the function ortho_qr from qp
+//  ortho_qr_csf(overlapMatrix, rows, orthoMatrix, rows, cols);
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",orthoMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//}
+
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix){

    // vector
@ -341,8 +363,12 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
         Get BFtoDeterminant Matrix
    ************************************/

-    printf("In convertcsftodet\n");
+
+    //printf(" --- In convet ----\n");
    convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);
+    //printf(" --- done bf det basis ---- row=%d col=%d\n",rowsbftodetI,colsbftodetI);
+
+    //printRealMatrix(bftodetmatrixI,rowsbftodetI,colsbftodetI);

    int rowsI = 0;
    int colsI = 0;
@ -350,6 +376,8 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
    //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
    getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);

+    //printf("Overlap matrix\n");
+    //printRealMatrix(overlapMatrixI,rowsI,colsI);

    /***********************************
         Get Orthonormalization Matrix
@ -359,6 +387,9 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl

    gramSchmidt(overlapMatrixI, rowsI, colsI, orthoMatrixI);

+    //printf("Ortho matrix\n");
+    //printRealMatrix(orthoMatrixI,rowsI,colsI);
+
    /***********************************
         Get Final CSF to Det Matrix
    ************************************/
@ -1340,11 +1371,11 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
    for(int i = 0; i < npairs; i++){
        for(int j = 0; j < NSOMO; j++) {
            inpdet[j] = detslist[i*NSOMO + j];
-            printf(" %d ",inpdet[j]);
+            //printf(" %d ",inpdet[j]);
        }
-        printf("\n");
+        //printf("\n");
        findAddofDetDriver(dettree, NSOMO, inpdet, &addr);
-        printf("(%d) - addr  = %d\n",i,addr);
+        //printf("(%d) - addr  = %d\n",i,addr);
        // Calculate the phase for cfg to QP2 conversion
        //get_phase_cfg_to_qp_inpList(inpdet, NSOMO, &phase_cfg_to_qp);
        //rowvec[addr] = 1.0 * phaselist[i]*phase_cfg_to_qp/sqrt(fac);
@ -1363,12 +1394,23 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
 void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols){

    int NSOMO=0;
+    //printf("before getSetBits Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
    getSetBits(Isomo, &NSOMO);
+    //printf("Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
    int ndets = 0;
    int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    // MS = alpha_num - beta_num
+    int nalpha = (NSOMO + MS)/2;
+    //printf(" in convertbftodet : MS=%d nalpha=%3.2f\n",MS,nalpha);
+    //ndets = (int)binom(dNSOMO, nalpha);
+    if(NSOMO > 0){
+      ndets = (int)binom((double)NSOMO, (double)nalpha);
+    }
+    else if(NSOMO == 0){
+      ndets = 1;
+    }
+    else printf("Something is wrong in calcMEdetpair\n");

    Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
    dettree.rootNode = malloc(sizeof(Node));
@ -1389,16 +1431,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
    }
    else{

-    //int addr = -1;
-    //int inpdet[NSOMO];
-    //inpdet[0] = 1;
-    //inpdet[1] = 1;
-    //inpdet[2] = 1;
-    //inpdet[3] = 0;
-    //inpdet[4] = 0;
-    //inpdet[5] = 0;
-
-    //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr);

    int detlist[ndets];
    getDetlistDriver(&dettree, NSOMO, detlist);
@ -1411,6 +1443,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
    generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO);

    // Initialize transformation matrix
+    //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO);
+    assert( NBF > 0);
+    assert( ndets > 0);
    (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double));
    (*rows) = NBF;
    (*cols) = ndets;
@ -1465,9 +1500,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co
    getSetBits(Isomo, &NSOMO);
    int ndets = 0;
    int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    //double nalpha = (NSOMO + MS)/2.0;
+    int nalpha = (NSOMO + MS)/2;
+    ndets = (int)binom((double)NSOMO, (double)nalpha);

    Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
    dettree.rootNode = malloc(sizeof(Node));
@ -1551,6 +1587,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso
    getncsfs(NSOMOJ, MS, &NBFJ);
    (*rowsout) = NBFI;
    (*colsout) = NBFJ;
+  //exit(0);
 }

 void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){
@ -1669,6 +1706,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in

    int rowsbftodetI, colsbftodetI;

+    //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS);
    convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);

    // Fill matrix
@ -1676,8 +1714,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
    int colsI = 0;

    //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
-    //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    //printf("Isomo=%ld\n",Isomo);
    getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    if(Isomo == 0){
+      rowsI = 1;
+      colsI = 1;
+    }
+
+    //printf("Isomo=%ld\n",Isomo);

    orthoMatrixI = malloc(rowsI*colsI*sizeof(double));

@ -1689,6 +1733,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in

    int rowsbftodetJ, colsbftodetJ;

+    //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS);
    convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ);

    int rowsJ = 0;
@ -1696,6 +1741,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
    // Fill matrix
    //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
    getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
+    if(Jsomo == 0){
+      rowsJ = 1;
+      colsJ = 1;
+    }

    orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double));

@ -1713,18 +1762,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in

    int transA=false;
    int transB=false;
+    //printf("1Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA);
    callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB);
+    //printf("done\n");

    // now transform I in csf basis
    double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double));
    transA = false;
    transB = false;
+    //printf("2Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA);
    callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB);

    // now transform J in BF basis
    double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double));
    transA = false;
    transB = true;
+    //printf("3Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ);
    callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB);

    // now transform J in CSF basis
@ -1735,13 +1791,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
    double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double));
    transA = false;
    transB = true;
+    //printf("4Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ);
    callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB);
    // Transfer to actual buffer in Fortran order
    for(int i = 0; i < rowsI; i++)
        for(int j = 0; j < rowsJ; j++)
            CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j];

-
    // Garbage collection
    free(overlapMatrixI);
    free(overlapMatrixJ);
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@ -1,3 +1,592 @@
+use bitmasks
+
+ BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
+&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
+  implicit none
+  !use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer                        :: idxI ! The id of the Ith CFG
+  integer(bit_kind)              :: Icfg(N_int,2)
+  integer                        :: NalphaIcfg
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                      :: Idomo
+  integer*8                      :: Isomo
+  integer*8                      :: Jdomo
+  integer*8                      :: Jsomo
+  integer*8                      :: diffSOMO
+  integer*8                      :: diffDOMO
+  integer*8                      :: xordiffSOMODOMO
+  integer                        :: ndiffSOMO
+  integer                        :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: ndiffAll
+  integer                        :: i,ii
+  integer                        :: j,jj
+  integer                        :: k,kk
+  integer                        :: kstart
+  integer                        :: kend
+  integer                        :: Nsomo_I
+  integer                        :: hole
+  integer                        :: p
+  integer                        :: q
+  integer                        :: countalphas
+  logical                        :: pqAlreadyGenQ
+  logical                        :: pqExistsQ
+  logical                        :: ppExistsQ
+  integer*8                      :: MS
+
+  double precision               :: t0, t1
+  call wall_time(t0)
+
+  MS = elec_alpha_num-elec_beta_num
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  NalphaIcfg_list = 0
+
+  do idxI = 1, N_configuration
+
+    Icfg  = psi_configuration(:,:,idxI)
+
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+
+    ! find out all pq holes possible
+    nholes = 0
+    ! holes in SOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+      endif
+    end do
+    ! holes in DOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+      endif
+    end do
+
+    ! find vmos
+    listvmos = -1
+    vmotype = -1
+    nvmos = 0
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+        if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 1
+        else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 2
+        end if
+      end if
+    end do
+
+    tableUniqueAlphas = .FALSE.
+
+    ! Now find the allowed (p,q) excitations
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+    Nsomo_I = POPCNT(Isomo)
+    if(Nsomo_I .EQ. 0) then
+      kstart = 1
+    else
+      kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+    endif
+    kend = idxI-1
+
+    do i = 1,nholes
+      p = listholes(i)
+      do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+          ! SOMO -> VMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = Idomo
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+          ! SOMO -> SOMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBSET(Idomo,q-1)
+          ! Check for Minimal alpha electrons (MS)
+          if(POPCNT(Jsomo).ge.MS)then
+            kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+            kend = idxI-1
+          else
+            cycle
+          endif
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+          ! DOMO -> VMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          kstart = cfg_seniority_index(Nsomo_I)
+          kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+          ! DOMO -> SOMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          Jdomo = IBSET(Jdomo,q-1)
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else
+          print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+        ! Check for Minimal alpha electrons (MS)
+        if(POPCNT(Jsomo).lt.MS)then
+          cycle
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniority to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+          diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
+          ndiffSOMO = POPCNT(diffSOMO)
+          if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+          diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
+          xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+          ndiffDOMO = POPCNT(diffDOMO)
+          nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+          nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+          !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+          if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+            pqAlreadyGenQ = .TRUE.
+            ppExistsQ = .TRUE.
+            EXIT
+          endif
+          if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+            pqAlreadyGenQ = .TRUE.
+            EXIT
+          endif
+        end do
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+
+        if(.NOT. pqExistsQ) then
+          tableUniqueAlphas(p,q) = .TRUE.
+        endif
+      end do
+    end do
+
+    !print *,tableUniqueAlphas(:,:)
+
+    ! prune list of alphas
+    Isomo = Icfg(1,1)
+    Idomo = Icfg(1,2)
+    Jsomo = Icfg(1,1)
+    Jdomo = Icfg(1,2)
+    NalphaIcfg = 0
+    do i = 1, nholes
+      p = listholes(i)
+      do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+          if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+            ! SOMO -> VMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = Idomo
+          else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+            ! SOMO -> SOMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBSET(Idomo,q-1)
+            if(POPCNT(Jsomo).ge.MS)then
+              kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+              kend = idxI-1
+            else
+              cycle
+            endif
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+            ! DOMO -> VMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+            ! DOMO -> SOMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+            Jdomo = IBSET(Jdomo,q-1)
+          else
+            print*,"Something went wrong in obtain_associated_alphaI"
+          endif
+
+          ! SOMO
+          !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+          if(POPCNT(Jsomo) .ge. NSOMOMin) then
+            NalphaIcfg += 1
+            alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo
+            alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+            NalphaIcfg_list(idxI) = NalphaIcfg
+          endif
+        endif
+      end do
+    end do
+
+    ! Check if this Icfg has been previously generated as a mono
+    ppExistsQ = .False.
+    Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
+    kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    do k = kstart, idxI-1
+      diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+      ndiffSOMO = POPCNT(diffSOMO)
+      if (ndiffSOMO /= 2) cycle
+      diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+      xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+      ndiffDOMO = POPCNT(diffDOMO)
+      nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
+        ppExistsQ = .TRUE.
+        EXIT
+      endif
+    end do
+    ! Diagonal part (pp,qq)
+    if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+      ! SOMO
+      if(POPCNT(Jsomo) .ge. NSOMOMin) then
+        NalphaIcfg += 1
+        alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1)
+        alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2)
+        NalphaIcfg_list(idxI) = NalphaIcfg
+      endif
+    endif
+
+    NalphaIcfg = 0
+  enddo ! end loop idxI
+  call wall_time(t1)
+  print *, 'Preparation : ', t1 - t0
+
+END_PROVIDER
+
+  subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer,intent(in)                 :: idxI ! The id of the Ith CFG
+  integer(bit_kind),intent(in)       :: Icfg(N_int,2)
+  integer,intent(out)                :: NalphaIcfg
+  integer(bit_kind),intent(out)      :: alphasIcfg(N_int,2,*)
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                            :: listholes(mo_num)
+  integer                            :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                            :: nholes
+  integer                            :: nvmos
+  integer                            :: listvmos(mo_num)
+  integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                          :: Idomo
+  integer*8                          :: Isomo
+  integer*8                          :: Jdomo
+  integer*8                          :: Jsomo
+  integer*8                          :: diffSOMO
+  integer*8                          :: diffDOMO
+  integer*8                          :: xordiffSOMODOMO
+  integer                            :: ndiffSOMO
+  integer                            :: ndiffDOMO
+  integer                            :: nxordiffSOMODOMO
+  integer                            :: ndiffAll
+  integer                            :: i, ii
+  integer                            :: j, jj
+  integer                            :: k, kk
+  integer                            :: kstart
+  integer                            :: kend
+  integer                            :: Nsomo_I
+  integer                            :: hole
+  integer                            :: p
+  integer                            :: q
+  integer                            :: countalphas
+  logical                            :: pqAlreadyGenQ
+  logical                            :: pqExistsQ
+  logical                            :: ppExistsQ
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  !print*,"Input cfg"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  ! find out all pq holes possible
+  nholes = 0
+  ! holes in SOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+     endif
+  end do
+  ! holes in DOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+     endif
+  end do
+
+  ! find vmos
+  listvmos = -1
+  vmotype = -1
+  nvmos = 0
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+     if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 1
+     else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 2
+     end if
+  end do
+
+  !print *,"Nvmo=",nvmos
+  !print *,listvmos
+  !print *,vmotype
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  tableUniqueAlphas = .FALSE.
+
+  ! Now find the allowed (p,q) excitations
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  Nsomo_I = POPCNT(Isomo)
+  if(Nsomo_I .EQ. 0) then
+    kstart = 1
+  else
+    kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+  endif
+  kend = idxI-1
+  !print *,"Isomo"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI
+  !do i = 1,nholes
+  !   print *,i,"->",listholes(i)
+  !enddo
+  !do i = 1,nvmos
+  !   print *,i,"->",listvmos(i)
+  !enddo
+
+  do i = 1,nholes
+     p = listholes(i)
+     do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+           ! SOMO -> VMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = Idomo
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+           ! SOMO -> SOMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBSET(Idomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+           ! DOMO -> VMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           kstart = cfg_seniority_index(Nsomo_I)
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+           ! DOMO -> SOMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           Jdomo = IBSET(Jdomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else
+           print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniortiy to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+           diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+           ndiffSOMO = POPCNT(diffSOMO)
+           if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+           diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+           xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+           ndiffDOMO = POPCNT(diffDOMO)
+           nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+           nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+           !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+           if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+              pqAlreadyGenQ = .TRUE.
+              ppExistsQ = .TRUE.
+              EXIT
+           endif
+           if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+              pqAlreadyGenQ = .TRUE.
+              !EXIT
+              !ppExistsQ = .TRUE.
+              !print *,i,k,ndiffSOMO,ndiffDOMO
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Jdomo,1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1)
+              EXIT
+           endif
+        end do
+
+        !print *,"(,",p,",",q,")",pqAlreadyGenQ
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+        ! now check if this exists in the selected list
+        !do k = idxI+1, N_configuration
+        !   diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k))
+        !   diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k))
+        !   ndiffSOMO = POPCNT(diffSOMO)
+        !   ndiffDOMO = POPCNT(diffDOMO)
+        !   if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        !      pqExistsQ = .TRUE.
+        !      EXIT
+        !   endif
+        !end do
+
+        if(.NOT. pqExistsQ) then
+           tableUniqueAlphas(p,q) = .TRUE.
+           !print *,p,q
+           !call debug_spindet(Jsomo,1)
+           !call debug_spindet(Jdomo,1)
+        endif
+     end do
+  end do
+
+  !print *,tableUniqueAlphas(:,:)
+
+  ! prune list of alphas
+  Isomo = Icfg(1,1)
+  Idomo = Icfg(1,2)
+  Jsomo = Icfg(1,1)
+  Jdomo = Icfg(1,2)
+  NalphaIcfg = 0
+  do i = 1, nholes
+     p = listholes(i)
+     do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+           if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+              ! SOMO -> VMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = Idomo
+           else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+              ! SOMO -> SOMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBSET(Idomo,q-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+              ! DOMO -> VMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+              ! DOMO -> SOMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+              Jdomo = IBSET(Jdomo,q-1)
+           else
+              print*,"Something went wrong in obtain_associated_alphaI"
+           endif
+
+           ! SOMO
+           NalphaIcfg += 1
+           !print *,i,j,"|",NalphaIcfg
+           alphasIcfg(1,1,NalphaIcfg) = Jsomo
+           alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+           !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+        endif
+     end do
+  end do
+
+  ! Check if this Icfg has been previously generated as a mono
+  ppExistsQ = .False.
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  do k = 1, idxI-1
+     diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+     diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffSOMO = POPCNT(diffSOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        ppExistsQ = .TRUE.
+        EXIT
+     endif
+  end do
+  ! Diagonal part (pp,qq)
+  if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+     ! SOMO
+     NalphaIcfg += 1
+     !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg
+     !call debug_spindet(Idomo,1)
+     !call debug_spindet(Jdomo,1)
+     alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1)
+     alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2)
+  endif
+
+  end subroutine
+
  function getNSOMO(Icfg) result(NSOMO)
    implicit none
    integer(bit_kind),intent(in)   :: Icfg(N_int,2)
@ -8,98 +597,3 @@
       NSOMO += POPCNT(Icfg(i,1))
    enddo
  end function getNSOMO
-
-subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel)
-  implicit none
-  BEGIN_DOC
-  ! This function converts the orbital ids
-  ! in real space to those used in model space
-  ! in order to identify the matrices required
-  ! for the calculation of MEs.
-  !
-  ! The type of excitations are ordered as follows:
-  ! Type 1 - SOMO -> SOMO
-  ! Type 2 - DOMO -> VMO
-  ! Type 3 - SOMO -> VMO
-  ! Type 4 - DOMO -> SOMO
-  END_DOC
-  integer(bit_kind),intent(in)         :: Ialpha(N_int,2)
-  integer(bit_kind),intent(in)         :: Jcfg(N_int,2)
-  integer,intent(in)                   :: p,q
-  integer,intent(in)                   :: extype
-  integer,intent(out)                  :: pmodel,qmodel
-  integer*8                            :: Isomo
-  integer*8                            :: Idomo
-  integer*8                            :: Jsomo
-  integer*8                            :: Jdomo
-  integer*8                            :: mask
-  integer*8                            :: Isomotmp
-  integer*8                            :: Jsomotmp
-  integer                              :: pos0,pos0prev
-
-  ! TODO Flag (print) when model space indices is > 64
-  Isomo = Ialpha(1,1)
-  Idomo = Ialpha(1,2)
-  Jsomo = Jcfg(1,1)
-  Jdomo = Jcfg(1,2)
-  pos0prev = 0
-  pmodel = p
-  qmodel = q
-
-  if(p .EQ. q) then
-     pmodel = 1
-     qmodel = 1
-  else
-     !print *,"input pq=",p,q,"extype=",extype
-     !call debug_spindet(Isomo,1)
-     !call debug_spindet(Idomo,1)
-     !call debug_spindet(Jsomo,1)
-     !call debug_spindet(Jdomo,1)
-     select case(extype)
-       case (1)
-          ! SOMO -> SOMO
-          ! remove all domos
-          !print *,"type -> SOMO -> SOMO"
-          mask = ISHFT(1_8,p) - 1
-          Isomotmp = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomotmp = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-       case (2)
-          ! DOMO -> VMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> VMO"
-          mask = ISHFT(1_8,p) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-       case (3)
-          ! SOMO -> VMO
-          !print *,"type -> SOMO -> VMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Isomo = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomo = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-       case (4)
-          ! DOMO -> SOMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> SOMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Jsomo = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomo = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-       case default
-          print *,"something is wrong in convertOrbIdsToModelSpaceIds"
-     end select
-  endif
-     !print *,p,q,"model ids=",pmodel,qmodel
-end subroutine convertOrbIdsToModelSpaceIds
--- a/src/csf/configurations.irp.f
+++ b/src/csf/configurations.irp.f
@ -458,8 +458,9 @@ end

 END_PROVIDER

- BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ]
+ BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ]
 &BEGIN_PROVIDER [ integer, cfg_nsomo_max ]
+&BEGIN_PROVIDER [ integer, cfg_nsomo_min ]
  implicit none
  BEGIN_DOC
 ! Returns the index in psi_configuration of the first cfg with
@ -467,9 +468,10 @@ END_PROVIDER
 !
 ! cfg_nsomo_max : Max number of SOMO in the current wave function
 END_DOC
- integer :: i, k, s, sold
+ integer :: i, k, s, sold, soldmin
 cfg_seniority_index(:) = -1
 sold = -1
+ soldmin = 2000
 cfg_nsomo_max = 0
 do i=1,N_configuration
   s = 0
@ -482,6 +484,10 @@ END_PROVIDER
     cfg_seniority_index(s) = i
     cfg_nsomo_max = s
   endif
+   if (soldmin .GT. s ) then
+     soldmin = s
+     cfg_nsomo_min = s
+   endif
 enddo
 END_PROVIDER

@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d
 enddo
 END_PROVIDER

-subroutine binary_search_cfg(cfgInp,addcfg)
+subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp)
  use bitmasks
  implicit none
  BEGIN_DOC
  ! Documentation for binary_search
-  ! 
-  ! Does a binary search to find 
+  !
+  ! Does a binary search to find
  ! the address of a configuration in a list of
  ! configurations.
  END_DOC
  integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
  integer          , intent(out) :: addcfg
-  integer :: i,j,k,r,l
-  integer*8 :: key, key2
-  logical :: found
-  !integer*8, allocatable :: bit_tmp(:)
-  !integer*8, external            :: configuration_search_key
+  integer*8,          intent(in) :: bit_tmp(0:N_configuration+1)

-  !allocate(bit_tmp(0:N_configuration))
-  !bit_tmp(0) = 0
-  do i=1,N_configuration
-    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
-    found = .True.
-    do k=1,N_int
-      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
-                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
-    enddo
-    if (found) then
-      addcfg = i
-      exit
+  logical                        :: found
+  integer                        :: l, r, j, k
+  integer*8                      :: key
+
+  integer*8, external            :: configuration_search_key
+
+  key = configuration_search_key(cfgInp,N_int)
+
+  ! Binary search
+  l = 0
+  r = N_configuration+1
+IRP_IF WITHOUT_SHIFTRL
+  j = ishft(r-l,-1)
+IRP_ELSE
+  j = shiftr(r-l,1)
+IRP_ENDIF
+  do while (j>=1)
+    j = j+l
+    if (bit_tmp(j) == key) then
+      ! Find 1st element which matches the key
+      if (j > 1) then
+        do while (j>1 .and. bit_tmp(j-1) == key)
+          j = j-1
+        enddo
+      endif
+      ! Find correct element matching the key
+      do while (bit_tmp(j) == key)
+        found = .True.
+        do k=1,N_int
+          found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))&
+                        .and. (psi_configuration(k,2,j) == cfgInp(k,2))
+        enddo
+        if (found) then
+          addcfg = j
+          return
+        endif
+        j = j+1
+      enddo
+      addcfg = -1
+      return
+    else if (bit_tmp(j) > key) then
+      r = j
+    else
+      l = j
    endif
+IRP_IF WITHOUT_SHIFTRL
+    j = ishft(r-l,-1)
+IRP_ELSE
+    j = shiftr(r-l,1)
+IRP_ENDIF
  enddo

+  addcfg = -1
+  return
+
 end subroutine

+!subroutine binary_search_cfg(cfgInp,addcfg)
+!  use bitmasks
+!  implicit none
+!  BEGIN_DOC
+!  ! Documentation for binary_search
+!  ! 
+!  ! Does a binary search to find 
+!  ! the address of a configuration in a list of
+!  ! configurations.
+!  END_DOC
+!  integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
+!  integer          , intent(out) :: addcfg
+!  integer :: i,j,k,r,l
+!  integer*8 :: key, key2
+!  logical :: found
+!  !integer*8, allocatable :: bit_tmp(:)
+!  !integer*8, external            :: configuration_search_key
+!
+!  !allocate(bit_tmp(0:N_configuration))
+!  !bit_tmp(0) = 0
+!  do i=1,N_configuration
+!    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
+!    found = .True.
+!    do k=1,N_int
+!      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
+!                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
+!    enddo
+!    if (found) then
+!      addcfg = i
+!      exit
+!    endif
+!  enddo
+!
+!end subroutine
+!
 BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ]
 &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ]

--- a/src/csf/conversion.irp.f
+++ b/src/csf/conversion.irp.f
@ -1,3 +1,16 @@
+BEGIN_PROVIDER [ double precision, psi_csf_coef, (N_csf, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Wafe function in CSF basis
+ END_DOC
+
+ double precision, allocatable :: buffer(:,:)
+ allocate ( buffer(N_det, N_states) )
+ buffer(1:N_det, 1:N_states) = psi_coef(1:N_det, 1:N_states)
+ call convertWFfromDETtoCSF(N_states, buffer, psi_csf_coef)
+END_PROVIDER
+
+
 subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
  use cfunctions
  use bitmasks
@ -12,7 +25,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
  double precision, intent(out)  :: psi_coef_cfg_out(n_CSF,N_st)
  integer*8                      :: Isomo, Idomo, mask
  integer(bit_kind)              :: Ialpha(N_int) ,Ibeta(N_int)
-  integer                        :: rows, cols, i, j, k
+  integer                        :: rows, cols, i, j, k, salpha
  integer                        :: startdet, enddet
  integer                        :: ndetI
  integer                        :: getNSOMO
@ -26,6 +39,8 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)

  integer s, bfIcfg
  integer countcsf
+  integer MS
+  MS = elec_alpha_num-elec_beta_num
  countcsf = 0
  phasedet = 1.0d0
  do i = 1,N_configuration
@ -44,12 +59,19 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
      enddo
    enddo

-    s = 0
+    s = 0 ! s == total number of SOMOs
    do k=1,N_int
      if (psi_configuration(k,1,i) == 0_bit_kind) cycle
      s = s + popcnt(psi_configuration(k,1,i))
    enddo
-    bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+
+    if(iand(s,1) .EQ. 0) then
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+    else
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+    endif

    ! perhaps blocking with CFGs of same seniority
    ! can be more efficient
@ -80,7 +102,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
  double precision,intent(in)    :: psi_coef_cfg_in(n_CSF,N_st)
  double precision,intent(out)   :: psi_coef_det(N_det,N_st)
  double precision               :: tmp_psi_coef_det(maxDetDimPerBF,N_st)
-  integer                        :: s, bfIcfg
+  integer                        :: s, bfIcfg, salpha
  integer                        :: countcsf
  integer(bit_kind)              :: Ialpha(N_int), Ibeta(N_int)
  integer                        :: rows, cols, i, j, k
@ -91,6 +113,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
  double precision,allocatable   :: tempCoeff (:,:)
  double precision               :: phasedet
  integer                        :: idx
+  integer MS
+  MS = elec_alpha_num-elec_beta_num

  countcsf = 0

@ -104,7 +128,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
      if (psi_configuration(k,1,i) == 0_bit_kind) cycle
      s = s + popcnt(psi_configuration(k,1,i))
    enddo
-    bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+    salpha = (s + MS)/2
+    bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))

    allocate(tempCoeff(bfIcfg,N_st))

--- a/src/csf/create_excitations.irp.f
+++ b/src/csf/create_excitations.irp.f
@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint)
  enddo
 end

-subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
+subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
  implicit none
  use bitmasks
  BEGIN_DOC
@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
  ! ex_type_singles : on output contains type of excitations  :
  !
  END_DOC
+  integer*8, intent(in)          :: bit_tmp(0:N_configuration+1)
  integer, intent(in)            :: Nint
  integer, intent(inout)         :: n_singles
  integer, intent(out)           :: idxs_singles(*)
@ -248,20 +249,26 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
  integer(bit_kind)              :: Jdet(Nint,2)

  integer           :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg
+  integer           :: ii,kk
  integer(bit_kind) :: single(Nint,2)
  logical           :: i_ok

+
  n_singles = 0
  !TODO
  !Make list of Somo  and Domo for holes
  !Make list of Unocc and Somo for particles
-  do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
-    do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+  !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
+  do ii = 1, n_act_orb
+    i_hole = list_act(ii)
+    !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+    do kk = 1, n_act_orb
+      i_particle = list_act(kk)
      if(i_hole .EQ. i_particle) cycle
      addcfg = -1
      call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok)
      if (i_ok) then
-        call binary_search_cfg(single,addcfg)
+        call binary_search_cfg(single,addcfg,bit_tmp)
        if(addcfg .EQ. -1) cycle
        n_singles = n_singles + 1
        do k=1,Nint
--- a/src/csf/obtain_I_foralpha.irp.f
+++ b/src/csf/obtain_I_foralpha.irp.f
@ -0,0 +1,397 @@
+subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! givenI by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: givenI(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: ntotalconnectedI
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                                  :: nxordiffSOMODOMO
+  integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer :: listholes(mo_num)
+  integer :: holetype(mo_num)
+  integer :: end_index
+  integer :: Nsomo_I
+
+  ! 
+  ! 2 2 1 1 0 0 : 1 1 0 0 0 0
+  !               0 0 1 1 0 0
+  !
+  ! 2 1 1 1 1 0 : 1 0 0 0 0 0
+  !               0 1 1 1 1 0
+  !xorS           0 1 0 0 1 0 : 2
+  !xorD           0 1 0 0 0 0 : 1 
+  !xorSD          0 0 0 0 1 0 : 1
+  !                           -----
+  !                             4
+  ! 1 1 1 1 1 1 : 0 0 0 0 0 0
+  !               1 1 1 1 1 1
+  !               1 1 0 0 1 1 : 4
+  !               1 1 0 0 0 0 : 2 
+  !               0 0 0 0 1 1 : 2
+  !                           -----
+  !                             8
+  !
+
+  nconnectedI = 0
+  ntotalconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = givenI(1,1)
+  Idomo = givenI(1,2)
+  Nsomo_I = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  !end_index = N_configuration
+  !print *,"Start and End = ",idxI, end_index
+
+
+  p = 0
+  q = 0
+  do i=idxI,end_index
+     !if(.True.) then
+     !  nconnectedI += 1
+     !  connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+     !  idxs_connectedI(nconnectedI)=i
+     !  cycle
+     !endif
+     Isomo = givenI(1,1)
+     Idomo = givenI(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+       !-------
+       ! MONO |
+       !-------
+        nconnectedI += 1
+        connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+        idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then
+       !----------------------------
+       ! DOMO -> VMO + DOMO -> VMO |
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then
+       !----------------------------
+       ! DOUBLE
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+       !--------
+       ! I = I |
+       !--------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)= i
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes)
+     endif
+  end do
+
+end subroutine obtain_connected_J_givenI
+
+subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! Ialpha by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: Ialpha(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: excitationIds(2,*)
+  integer,intent(out)                      :: excitationTypes(*)
+  real*8 ,intent(out)                      :: diagfactors(*)
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num)
+  integer                        :: end_index
+  integer                        :: Nsomo_alpha
+  integer*8                      :: MS
+  MS = elec_alpha_num-elec_beta_num
+
+  nconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Nsomo_alpha = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  end_index = N_configuration
+
+
+  p = 0
+  q = 0
+  if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
+  do i=idxI,end_index
+     Isomo = Ialpha(1,1)
+     Idomo = Ialpha(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     ! Check for Minimal alpha electrons (MS)
+     if(POPCNT(Isomo).lt.MS)then
+       cycle
+     endif
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     !if(idxI.eq.1)then
+     !  print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo)
+     !endif
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        select case(ndiffDOMO)
+        case (0)
+           ! SOMO -> VMO
+           !print *,"obt SOMO -> VMO"
+           extyp = 3
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case (1)
+           ! DOMO -> VMO
+           ! or
+           ! SOMO -> SOMO
+           nsomoJ = POPCNT(Jsomo)
+           nsomoalpha = POPCNT(Isomo)
+           if(nsomoJ .GT. nsomoalpha) then
+              ! DOMO -> VMO
+              !print *,"obt DOMO -> VMO"
+              extyp = 2
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+           else
+              ! SOMO -> SOMO
+              !print *,"obt SOMO -> SOMO"
+              extyp = 1
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,q-1)
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+              ! Check for Minimal alpha electrons (MS)
+              !if(POPCNT(Isomo).lt.MS)then
+              !  cycle
+              !endif
+           end if
+        case (2)
+           ! DOMO -> SOMO
+           !print *,"obt DOMO -> SOMO"
+           extyp = 4
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case default
+           print *,"something went wront in get connectedI"
+        end select
+        starti = psi_config_data(i,1)
+        endi   = psi_config_data(i,2)
+        nconnectedI += 1
+        do k=1,N_int
+          connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
+          connectedI(k,2,nconnectedI) = psi_configuration(k,2,i)
+        enddo
+        idxs_connectedI(nconnectedI)=starti
+        excitationIds(1,nconnectedI)=p
+        excitationIds(2,nconnectedI)=q
+        excitationTypes(nconnectedI) = extyp
+        diagfactors(nconnectedI) = 1.0d0
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+
+        do k=1,nholes
+           p = listholes(k)
+           q = p
+           extyp = 1
+           if(holetype(k) .EQ. 1) then
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 1.0d0
+           else
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 2.0d0
+           endif
+        enddo
+     endif
+  end do
+
+end subroutine obtain_connected_I_foralpha
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
--- a/src/csf/tree_utils.c
+++ b/src/csf/tree_utils.c
@ -1,3 +1,4 @@
+#include <assert.h>
 #include "tree_utils.h"

 void buildTree(Tree *bftree,
@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){
    int icpl  = 0; // keep track of the ith ms (cannot be -ve)
    int addr  = 0; // Counts the total BF's

+    assert(bftree->rootNode->addr == 0);
    buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS);

    *NBF = bftree->rootNode->addr;
@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){
    int NSOMO=0;
    getSetBits(Isomo, &NSOMO);
    genDetsDriver(dettree, NSOMO, MS, ndets);
+    // Closed shell case
+    if(NSOMO==0) (*ndets) = 1;

 }

@ -311,3 +315,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col
            break;
    }
 }
+
+void printRealMatrix(double *orthoMatrix, int rows, int cols){
+  int i,j;
+  for(i=0;i<rows;++i){
+    for(j=0;j<cols;++j){
+      printf(" %3.5f ",orthoMatrix[i*cols + j]);
+    }
+    printf("\n");
+  }
+}
--- a/src/csf/tree_utils.h
+++ b/src/csf/tree_utils.h
@ -47,6 +47,7 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
 void getSetBits(int64_t n, int *nsetbits);
 void getOverlapMatrix(int64_t Isomo, int64_t MS, double **overlapMatrixptr, int *rows, int *cols, int *NSOMOout);
 void getOverlapMatrix_withDet(double *bftodetmatrixI, int rowsbftodetI, int colsbftodetI, int64_t Isomo, int64_t MS, double **overlapMatrixI, int *rowsI, int *colsI, int *NSOMO);
+void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix);
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix);


--- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f
@ -27,7 +27,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
  double precision,  intent(in)   :: H_jj(sze),Dress_jj(sze)
  double precision, intent(inout) :: u_in(sze,N_st_diag_in)
  double precision, intent(out)   :: energies(N_st)
-  external hcalc
+  external                        :: hcalc

  integer                        :: iter, N_st_diag
  integer                        :: i,j,k,l,m
@ -207,7 +207,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
  enddo
  ! Normalize all states 
  do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
  enddo

  ! Copy from the guess input "u_in" to the working vectors "U"
@ -238,10 +238,10 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
          call ortho_qr(U,size(U,1),sze,shift2)
          !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
          !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
          ! Compute then the DIAGONAL PART OF THE DRESSING 
          ! <i|W_k> += Dress_jj(i) * <i|U>
-          call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze)
+          call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze)
      else
         ! Already computed in update below
         continue
@ -303,9 +303,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
      ! --------------------------------------------------

      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))

      ! Compute residual vector and davidson step
      ! -----------------------------------------
@ -319,7 +319,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
        enddo

        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
          to_print(1,k) = lambda(k) 
          to_print(2,k) = residual_norm(k)
        endif
--- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f
@ -31,7 +31,8 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
  double precision, intent(inout) :: u_in(sze,N_st_diag)
  double precision, intent(out)   :: energies(N_st_diag)
  logical, intent(out)            :: converged
-  external hcalc
+
+  external                        :: hcalc

  double precision, allocatable   :: H_jj_tmp(:)
  ASSERT (N_st > 0)
@ -224,7 +225,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
    u_in(k,k) = u_in(k,k) + 10.d0
  enddo
  do k=1,N_st_diag_in
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
  enddo

  do k=1,N_st_diag_in
@ -248,10 +249,10 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
      if ((iter > 1).or.(itertot == 1)) then
        ! Compute |W_k> = \sum_i |i><i|H|u_k>
        ! -----------------------------------
-        call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag_in,sze)
+        call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag_in,sze)
        ! Compute then the DIAGONAL PART OF THE DRESSING 
        ! <i|W_k> += Dress_jj(i) * <i|U>
-        call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze)
+        call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze)
      else
         ! Already computed in update below
         continue
@ -275,20 +276,20 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
 !
 !          call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, &
 !            psi_coef, size(psi_coef,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, &
 !            Dressing_vec, size(Dressing_vec,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
 !
 !          call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, &
 !            Dressing_vec, size(Dressing_vec,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, &
 !            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
        endif

@ -376,9 +377,9 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
      ! --------------------------------------------------

      call dgemm('N','N', sze, N_st_diag_in, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag_in, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))

      ! Compute residual vector and davidson step
      ! -----------------------------------------
@ -392,7 +393,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
        enddo

        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
          to_print(1,k) = lambda(k) 
          to_print(2,k) = residual_norm(k)
        endif
--- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f
@ -214,7 +214,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
  enddo
  ! Normalize all states 
  do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
  enddo
  ! Copy from the guess input "u_in" to the working vectors "U"

@ -244,7 +244,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
          call ortho_qr(U,size(U,1),sze,shift2)
          !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
          !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
      else
         ! Already computed in update below
         continue
@ -268,20 +268,20 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
          stop
 !          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
 !            psi_coef, size(psi_coef,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
 !            dressing_vec, size(dressing_vec,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
 !
 !          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
 !            dressing_vec, size(dressing_vec,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
 !            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))

        endif
      endif
@ -370,9 +370,9 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
      ! --------------------------------------------------

      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))

      ! Compute residual vector and davidson step
      ! -----------------------------------------
@ -386,7 +386,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
        enddo

        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
          to_print(1,k) = lambda(k) 
          to_print(2,k) = residual_norm(k)
        endif
--- a/src/dav_general_mat/dav_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_ext_rout.irp.f
@ -196,7 +196,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
  enddo
  ! Normalize all states 
  do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
  enddo

  ! Copy from the guess input "u_in" to the working vectors "U"
@ -226,7 +226,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
          call ortho_qr(U,size(U,1),sze,shift2)
          !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
          !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
      else
         ! Already computed in update below
         continue
@ -288,9 +288,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
      ! --------------------------------------------------

      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))

      ! Compute residual vector and davidson step
      ! -----------------------------------------
@ -304,7 +304,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
        enddo

        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
          to_print(1,k) = lambda(k) 
          to_print(2,k) = residual_norm(k)
        endif
--- a/src/dav_general_mat/dav_general.irp.f
+++ b/src/dav_general_mat/dav_general.irp.f
@ -206,7 +206,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
  enddo
  ! Normalize all states 
  do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
  enddo

  ! Copy from the guess input "u_in" to the working vectors "U"
@ -236,8 +236,8 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
          call ortho_qr(U,size(U,1),sze,shift2)
          call ortho_qr(U,size(U,1),sze,shift2)

-!            call H_S2_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
-            call hpsi(W(1,shift+1),U(1,shift+1),N_st_diag,sze,h_mat)
+!            call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
+            call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat)
      else
         ! Already computed in update below
         continue
@ -299,9 +299,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
      ! --------------------------------------------------

      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
      call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))

      ! Compute residual vector and davidson step
      ! -----------------------------------------
@ -315,7 +315,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
        enddo

        if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
          to_print(1,k) = lambda(k) 
          to_print(2,k) = residual_norm(k)
        endif
--- a/src/davidson/diagonalization_hcfg.irp.f
+++ b/src/davidson/diagonalization_hcfg.irp.f
@ -0,0 +1,624 @@
+subroutine davidson_diag_h_cfg(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
+  double precision, intent(out)  :: energies(N_st_diag)
+  integer, intent(in)            :: dressing_state
+  logical, intent(out)           :: converged
+  double precision, allocatable  :: H_jj(:)
+
+  double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
+  integer                        :: i,k
+  ASSERT (N_st > 0)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint)
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP  SHARED(sze,H_jj, dets_in,Nint)                    &
+      !$OMP  PRIVATE(i)
+  !$OMP DO SCHEDULE(static)
+  do i=2,sze
+    H_jj(i)  = diag_H_mat_elem(dets_in(1,1,i),Nint)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (dressing_state > 0) then
+    do k=1,N_st
+      do i=1,sze
+        H_jj(i)  += u_in(i,k) * dressing_column_h(i,k)
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  deallocate(H_jj)
+end
+
+
+subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged)
+  use bitmasks
+  use mmap_module
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision,  intent(in)  :: H_jj(sze)
+  integer, intent(in)            :: dressing_state
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag_in)
+  double precision, intent(out)  :: energies(N_st_diag_in)
+
+  integer                        :: iter, N_st_diag
+  integer                        :: i,j,k,l,m,kk,ii,ll
+  logical, intent(inout)         :: converged
+
+  double precision, external     :: u_dot_v, u_dot_u
+
+  integer                        :: k_pairs, kl
+
+  integer                        :: iter2, itertot
+  double precision, allocatable  :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable  :: s_tmp(:,:)
+  double precision               :: diag_h_mat_elem
+  double precision, allocatable  :: residual_norm(:)
+  character*(16384)              :: write_buffer
+  double precision               :: to_print(2,N_st)
+  double precision               :: cpu, wall
+  integer                        :: shift, shift2, itermax, istate
+  double precision               :: r1, r2, alpha
+  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
+  integer                        :: nproc_target
+  integer                        :: order(N_st_diag_in)
+  double precision               :: cmax
+  double precision, allocatable  :: U(:,:), U_csf(:,:), overlap(:,:)
+  double precision, allocatable  :: tmpU(:,:), tmpW(:,:)
+  double precision, pointer      :: W(:,:), W_csf(:,:)
+  logical                        :: disk_based
+  double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if (N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
+  itertot = 0
+
+  if (state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag)                  &! U
+         + dble(sze_csf)*(N_st_diag*itermax)      &! U_csf
+         + dble(sze)*(N_st_diag)                  &! W
+         + dble(sze_csf)*(N_st_diag*itermax)      &! W_csf
+         + 3.0d0*(N_st_diag*itermax)**2           &! h,y,s_tmp
+         + 1.d0*(N_st_diag*itermax)               &! lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_u_0_nstates_zmq
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on collector
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on slave
+         + 0.5d0*maxab                            &! idx0 in H_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if (nproc_target == 0) then
+      call check_mem(r1,irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if (r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if (itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m=0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+  call write_int(6,N_st,'Number of states')
+  call write_int(6,N_st_diag,'Number of states in diagonalization')
+  call write_int(6,sze,'Number of determinants')
+  call write_int(6,sze_csf,'Number of CSFs')
+  call write_int(6,nproc_target,'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if (disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy        Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+
+  if (disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8, fd_w, .False., ptr_w)
+    call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/))
+  else
+    allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax))
+  endif
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag),                                              &
+      U_csf(sze_csf,N_st_diag*itermax),                              &
+
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      residual_norm(N_st_diag),                                      &
+      lambda(N_st_diag*itermax))
+
+
+  h = 0.d0
+  U = 0.d0
+  y = 0.d0
+  s_tmp = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+  call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1))
+  do k=N_st+1,N_st_diag
+    do i=1,sze_csf
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st)
+    enddo
+    U_csf(k,k) = u_csf(k,k) + 10.d0
+  enddo
+  do k=1,N_st_diag
+    call normalize(U_csf(1,k),sze_csf)
+  enddo
+  call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1))
+
+  do while (.not.converged)
+    itertot = itertot+1
+    if (itertot == 8) then
+      exit
+    endif
+
+    do iter=1,itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+!      if ((iter > 1).or.(itertot == 1)) then
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U)
+        PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals
+        if ((sze > 100000).and.distributed_davidson) then
+            !call H_u_0_nstates_zmq   (W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+            deallocate(tmpW)
+            deallocate(tmpU)
+        else
+            !call H_u_0_nstates_openmp(W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            !tmpU     =0.0d0
+            !tmpU(1,2)=1.0d0
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+
+            !U_csf = 0.0d0
+            !U_csf(1,1) = 1.0d0
+            !u_in = 0.0d0
+            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            !do i=1,sze_csf
+            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+            !    print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  endif
+            !end do
+            !stop
+            deallocate(tmpW)
+            deallocate(tmpU)
+        endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif
+
+      if (dressing_state > 0) then
+
+        if (N_st == 1) then
+
+          l = dressed_column_idx(1)
+          double precision :: f
+          f = 1.0d0/psi_coef(l,1)
+          do istate=1,N_st_diag
+            do i=1,sze
+              W(i,istate) += dressing_column_h(i,1) *f * U(l,istate)
+              W(l,istate) += dressing_column_h(i,1) *f * U(i,istate)
+            enddo
+
+          enddo
+
+        else
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            psi_coef, size(psi_coef,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            dressing_column_h, size(dressing_column_h,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+        endif
+      endif
+
+      !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1),                          &
+          0.d0, h, size(h,1))
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      ! Diagonalize h
+      ! ---------------
+
+       integer :: lwork, info
+       double precision, allocatable :: work(:)
+
+       y = h
+       lwork = -1
+       allocate(work(1))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       lwork = int(work(1))
+       deallocate(work)
+       allocate(work(lwork))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       deallocate(work)
+       if (info /= 0) then
+         stop 'DSYGV Diagonalization failed'
+       endif
+
+      ! Compute Energy for each eigenvector
+      ! -----------------------------------
+
+      call dgemm('N','N',shift2,shift2,shift2,                       &
+          1.d0, h, size(h,1), y, size(y,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      call dgemm('T','N',shift2,shift2,shift2,                       &
+          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+          0.d0, h, size(h,1))
+
+      do k=1,shift2
+        lambda(k) = h(k,k)
+      enddo
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do i=1,shift2
+          do k=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+        enddo
+
+      endif
+
+
+      ! Express eigenvectors of h in the csf basis
+      ! ------------------------------------------
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U)
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W)
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !if (without_diagonal) then
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k=1,N_st_diag
+        do i=1,sze
+           U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )      &
+              /max(H_jj(i) - lambda (k),1.d-2)
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+      !else
+      !  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      !  do k=1,N_st_diag
+      !    do i=1,sze
+      !       U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )  
+      !    enddo
+      !  enddo
+      !  !$OMP END PARALLEL DO
+      !endif
+
+      do k=1,N_st
+        residual_norm(k) = u_dot_u(U(1,k),sze)
+        to_print(1,k) = lambda(k) + nuclear_repulsion
+        to_print(2,k) = residual_norm(k)
+      enddo
+      call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1))
+
+      if ((itertot>1).and.(iter == 1)) then
+        !don't print
+        continue
+      else
+        write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if (iter > 1) then
+        if (threshold_davidson_from_pt2) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+        else
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson
+        endif
+      endif
+
+      do k=1,N_st
+        if (residual_norm(k) > 1.d8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if (converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if (qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U
+    ! -------------
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        W_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,W_csf,W)
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        U_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,U_csf,U)
+
+  enddo
+
+
+  call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2)
+  do k=1,N_st_diag
+    do i=1,sze
+      u_in(i,k) = U(i,k)
+    enddo
+  enddo
+
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '======'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if (disk_based)then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+  else
+    deallocate(W, W_csf)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, U_csf, overlap,                                             &
+      h, y, s_tmp,                                                   &
+      lambda                                                         &
+      )
+  FREE nthreads_davidson
+end
+
+
+
+
+
+
+
--- a/src/davidson/diagonalization_hcsf_dressed.irp.f
+++ b/src/davidson/diagonalization_hcsf_dressed.irp.f
@ -89,7 +89,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
  double precision, intent(out)  :: energies(N_st_diag_in)

  integer                        :: iter, N_st_diag
-  integer                        :: i,j,k,l,m
+  integer                        :: i,j,k,l,m,kk
  logical, intent(inout)         :: converged

  double precision, external     :: u_dot_v, u_dot_u
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@ -154,7 +154,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
  character*(16384)              :: write_buffer
  double precision               :: to_print(3,N_st)
  double precision               :: cpu, wall
-  integer                        :: shift, shift2, itermax, istate
+  integer                        :: shift, shift2, itermax, istate, ii
  double precision               :: r1, r2, alpha
  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
  integer                        :: nproc_target
@ -361,7 +361,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
        if ((sze > 100000).and.distributed_davidson) then
            call H_S2_u_0_nstates_zmq   (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
        else
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
            call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
        endif
        S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag))
      else
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@ -1,9 +1,20 @@
+BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ]
+ implicit none
+ BEGIN_DOC
+ ! If 'det', use <Psi_det|H|Psi_det> in Davidson
+ !
+ ! If 'cfg', use <Psi_csf|H|Psi_csf> in Davidson
+ END_DOC
+ sigma_vector_algorithm = 'det'
+ !sigma_vector_algorithm = 'cfg'
+END_PROVIDER

 BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]
  implicit none
  BEGIN_DOC
  ! :c:data:`n_states` lowest eigenvalues of the |CI| matrix
  END_DOC
+  PROVIDE distributed_davidson

  integer                        :: j
  character*(8)                  :: st
@ -61,7 +72,7 @@ END_PROVIDER
   if (diag_algorithm == 'Davidson') then

     if (do_csf) then
-!      if (sigma_vector_algorithm == 'det') then
+       if (sigma_vector_algorithm == 'det') then
         call davidson_diag_H_csf (psi_det,                  &
                                   CI_eigenvectors,          &
                                   size(CI_eigenvectors,1),  &
@ -73,14 +84,14 @@ END_PROVIDER
                                   N_int,                    &
                                   0,                        &
                                   converged)
-!       else if (sigma_vector_algorithm == 'cfg') then
-!       call davidson_diag_H_csf(psi_det,CI_eigenvectors, &
-!         size(CI_eigenvectors,1),CI_electronic_energy,               &
-!         N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
-!       else
-!         print *, irp_here
-!         stop 'bug'
-!      endif
+       else if (sigma_vector_algorithm == 'cfg') then
+          call davidson_diag_H_cfg(psi_det,CI_eigenvectors, &
+          size(CI_eigenvectors,1),CI_electronic_energy,               &
+          N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
+       else
+          print *, irp_here
+          stop 'bug'
+       endif
     else
       call davidson_diag_HS2(psi_det,                  &
                              CI_eigenvectors,          &
--- a/src/determinants/EZFIO.cfg
+++ b/src/determinants/EZFIO.cfg
@ -136,9 +136,8 @@ doc: If |true|, discard any Slater determinants with an interaction smaller than
 interface: ezfio,provider,ocaml
 default: False

-
-[thresh_save_wf]
+[save_threshold]
 type: Threshold
-doc: Thresholds to save wave function
+doc: Cut-off to apply to the CI coefficients when the wave function is stored
 interface: ezfio,provider,ocaml
-default: 1.e-15
+default: 1.e-14
--- a/src/mo_localization/EZFIO.cfg
+++ b/src/mo_localization/EZFIO.cfg
@ -0,0 +1,54 @@
+[localization_method]
+type: character*(32)
+doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey.
+interface: ezfio,provider,ocaml
+default: boys
+
+[localization_max_nb_iter]
+type: integer
+doc: Maximal number of iterations for the orbital localization.
+interface: ezfio,provider,ocaml
+default: 1000
+
+[localization_use_hessian]
+type: logical
+doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option requieres a way smaller amount of memory but is not easy to converge. 
+interface: ezfio,provider,ocaml
+default: true
+
+[security_mo_class]
+type: logical
+doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization.
+interface: ezfio,provider,ocaml
+default: true
+
+[thresh_loc_max_elem_grad]
+type: double precision
+doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad.
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[kick_in_mos]
+type: logical
+doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization.
+interface: ezfio,provider,ocaml
+default: true
+
+[angle_pre_rot]
+type: double precision
+doc: To define the angle for the rotation of the MOs before the localization (in rad).
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[sort_mos_by_e]
+type: logical
+doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix.
+interface: ezfio,provider,ocaml
+default: false
+
+[debug_hf]
+type: logical
+doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging.
+interface: ezfio,provider,ocaml
+default: false
+
--- a/src/mo_localization/NEED
+++ b/src/mo_localization/NEED
@ -0,0 +1,2 @@
+hartree_fock
+utils_trust_region
--- a/src/mo_localization/README.md
+++ b/src/mo_localization/README.md
@ -0,0 +1,108 @@
+# mo_localization
+
+Some parameters can be changed with qp edit in the mo_localization section
+(cf below). Similarly for the trust region parameters in the 
+utils_trust_region section. The localization without the trust region 
+is not available for the moment.  
+
+The irf.f files can be generated from the org ones using emacs. 
+If you modify the .org files, don't forget to do (you need emacs):  
+``` 
+./TANGLE_org_mode.sh  
+ninja  
+```  
+
+# Orbital localisation
+To localize the MOs:  
+```
+qp run localization  
+```
+After that the ezfio directory contains the localized MOs  
+ 
+But to do so the mo_class must be defined before, run 
+```
+qp set_mo_class -q
+```
+for more information or  
+```
+qp set_mo_class -c [] -a [] -v [] -i [] -d [] 
+```
+to set the mo classes. We don't care about the name of the   
+mo classes. The algorithm just localizes all the MOs of  
+a given class between them, for all the classes, except the deleted MOs.  
+
+If you just on kind of mo class to localize all the MOs between them  
+you have to put:
+```
+qp set mo_localization security_mo_class false
+```
+
+Before the localization, a kick is done for each mo class  
+(except the deleted ones) to break the MOs. This is done by   
+doing a rotation between the MOs.
+This feature can be removed by setting:
+```
+qp set mo_localization kick_in_mos false
+```
+and the default angle for the rotation can be changed with:
+```
+qp set mo_localization angle_pre_rot 1e-3 # or something else
+```
+
+After the localization, the MOs of each class (except the deleted ones)  
+can be sorted between them using the diagonal elements of  
+the fock matrix with:
+```
+qp set mo_localization sort_mos_by_e true
+```
+
+You can check the Hartree-Fock energy before/during/after the localization  
+by putting (only for debugging):
+```
+qp set mo_localization debug_hf true 
+```
+
+## Foster-Boys & Pipek-Mezey
+Foster-Boys:  
+``` 
+qp set mo_localization localization_method boys 
+``` 
+ 
+Pipek-Mezey:  
+``` 
+qp set mo_localization localization_method pipek 
+``` 
+
+# Break the spatial symmetry of the MOs
+To break the spatial symmetry of the MOs:   
+```
+qp run break_spatial_sym
+```
+The default angle for the rotations is too big for this kind of
+application, a value between 1e-3 and 1e-6 should break the spatial
+symmetry with just a small change in the energy:
+```
+qp set mo_localization angle_pre_rot 1e-3
+``` 
+
+# With or without hessian + trust region
+With hessian +  trust region
+```
+qp set mo_localization localisation_use_hessian true
+```
+It uses the trust region algorithm with the diagonal of the hessian of the
+localization criterion with respect to the MO rotations.
+
+Without the hessian and the trust region
+```
+qp set mo_localization localisation_use_hessian false
+```
+By doing so it does not require to store the hessian but the
+convergence is not easy, in particular for virtual MOs.
+It seems that it not possible to converge with Pipek-Mezey
+localization with this approach.
+
+# Further improvements: 
+- Cleaner repo 
+- Correction of the errors in the documentations 
+- option with/without trust region 
--- a/src/mo_localization/TANGLE_org_mode.sh
+++ b/src/mo_localization/TANGLE_org_mode.sh
@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
--- a/src/mo_localization/break_spatial_sym.irp.f
+++ b/src/mo_localization/break_spatial_sym.irp.f
@ -0,0 +1,42 @@
+! ! A small program to break the spatial symmetry of the MOs.
+
+! ! You have to defined your MO classes or set security_mo_class to false
+! ! with:
+! ! qp set orbital_optimization security_mo_class false
+
+! ! The default angle for the rotations is too big for this kind of
+! ! application, a value between 1e-3 and 1e-6 should break the spatial
+! ! symmetry with just a small change in the energy. 
+
+
+program break_spatial_sym
+
+  !BEGIN_DOC
+  ! Break the symmetry of the MOs with a rotation
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbitals:'
+    print*, 'qp set orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
--- a/src/mo_localization/break_spatial_sym.org
+++ b/src/mo_localization/break_spatial_sym.org
@ -0,0 +1,43 @@
+! A small program to break the spatial symmetry of the MOs.
+
+! You have to defined your MO classes or set security_mo_class to false
+! with:
+! qp set orbital_optimization security_mo_class false
+
+! The default angle for the rotations is too big for this kind of
+! application, a value between 1e-3 and 1e-6 should break the spatial
+! symmetry with just a small change in the energy. 
+
+#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f
+program break_spatial_sym
+
+  !BEGIN_DOC
+  ! Break the symmetry of the MOs with a rotation
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbitals:'
+    print*, 'qp set orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
+#+END_SRC
--- a/src/mo_localization/debug_gradient_loc.irp.f
+++ b/src/mo_localization/debug_gradient_loc.irp.f
@ -0,0 +1,62 @@
+program debug_gradient_loc
+
+  !BEGIN_DOC
+  ! Check if the gradient is correct
+  !END_DOC
+
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: v_grad(:), v_grad2(:)
+  double precision :: norm, max_elem, threshold, max_error
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(v_grad(n),v_grad2(n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call gradient_FB(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm)
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call gradient_PM(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) 
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,v_grad(i)
+  enddo
+
+  v_grad = v_grad - v_grad2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(v_grad(i)) > threshold) then
+      print*,v_grad(i)
+      nb_error = nb_error + 1
+      if (dabs(v_grad(i)) > max_elem) then
+        max_elem = v_grad(i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(v_grad,v_grad2)
+ 
+end
--- a/src/mo_localization/debug_gradient_loc.org
+++ b/src/mo_localization/debug_gradient_loc.org
@ -0,0 +1,64 @@
+#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f
+program debug_gradient_loc
+
+  !BEGIN_DOC
+  ! Check if the gradient is correct
+  !END_DOC
+
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: v_grad(:), v_grad2(:)
+  double precision :: norm, max_elem, threshold, max_error
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(v_grad(n),v_grad2(n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call gradient_FB(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm)
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call gradient_PM(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) 
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,v_grad(i)
+  enddo
+
+  v_grad = v_grad - v_grad2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(v_grad(i)) > threshold) then
+      print*,v_grad(i)
+      nb_error = nb_error + 1
+      if (dabs(v_grad(i)) > max_elem) then
+        max_elem = v_grad(i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(v_grad,v_grad2)
+ 
+end
+#+END_SRC
--- a/src/mo_localization/debug_hessian_loc.irp.f
+++ b/src/mo_localization/debug_hessian_loc.irp.f
@ -0,0 +1,62 @@
+program debug_hessian_loc
+
+  !BEGIN_DOC
+  ! Check if the hessian is correct
+  !END_DOC
+  
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: H(:,:), H2(:,:)
+  double precision :: threshold, max_error, max_elem
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(H(n,n),H2(n,n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call hessian_FB(n,list_size,list,H)
+    call hessian_FB_omp(n,list_size,list,H2)
+  elseif(localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call hessian_PM(n,list_size,list,H)
+    call hessian_PM(n,list_size,list,H2)
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,H(i,i)
+  enddo
+
+  H = H - H2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(H(i,i)) > threshold) then
+      print*,H(i,i)
+      nb_error = nb_error + 1
+      if (dabs(H(i,i)) > max_elem) then
+        max_elem = H(i,i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(H,H2)
+  
+end
--- a/src/mo_localization/debug_hessian_loc.org
+++ b/src/mo_localization/debug_hessian_loc.org
@ -0,0 +1,64 @@
+#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f
+program debug_hessian_loc
+
+  !BEGIN_DOC
+  ! Check if the hessian is correct
+  !END_DOC
+  
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: H(:,:), H2(:,:)
+  double precision :: threshold, max_error, max_elem
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(H(n,n),H2(n,n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call hessian_FB(n,list_size,list,H)
+    call hessian_FB_omp(n,list_size,list,H2)
+  elseif(localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call hessian_PM(n,list_size,list,H)
+    call hessian_PM(n,list_size,list,H2)
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,H(i,i)
+  enddo
+
+  H = H - H2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(H(i,i)) > threshold) then
+      print*,H(i,i)
+      nb_error = nb_error + 1
+      if (dabs(H(i,i)) > max_elem) then
+        max_elem = H(i,i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(H,H2)
+  
+end
+#+END_SRC
--- a/src/mo_localization/kick_the_mos.irp.f
+++ b/src/mo_localization/kick_the_mos.irp.f
@ -0,0 +1,31 @@
+program kick_the_mos
+
+  !BEGIN_DOC
+  ! To do a small rotation of the MOs
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbital:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
--- a/src/mo_localization/kick_the_mos.org
+++ b/src/mo_localization/kick_the_mos.org
@ -0,0 +1,33 @@
+#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f
+program kick_the_mos
+
+  !BEGIN_DOC
+  ! To do a small rotation of the MOs
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbital:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
+#+END_SRC
--- a/src/mo_localization/localization.irp.f
+++ b/src/mo_localization/localization.irp.f
@ -0,0 +1,531 @@
+program localization
+ call run_localization
+end
+
+
+
+
+! Variables:
+! | pre_rot(mo_num, mo_num)   | double precision | Matrix for the pre rotation                       |
+! | R(mo_num,mo_num)          | double precision | Rotation matrix                                   |
+! | tmp_R(:,:)                | double precision | Rottation matrix in a subsapce                    |
+! | prev_mos(ao_num, mo_num)  | double precision | Previous mo_coef                                  |
+! | spatial_extent(mo_num)    | double precision | Spatial extent of the orbitals                    |
+! | criterion                 | double precision | Localization criterion                            |
+! | prev_criterion            | double precision | Previous criterion                                |
+! | criterion_model           | double precision | Estimated next criterion                          |
+! | rho                       | double precision | Ratio to measure the agreement between the model  |
+! |                           |                  | and the reality                                   |
+! | delta                     | double precision | Radisu of the trust region                        |
+! | norm_grad                 | double precision | Norm of the gradient                              |
+! | info                      | integer          | for dsyev from Lapack                             |
+! | max_elem                  | double precision | maximal element in the gradient                   |
+! | v_grad(:)                 | double precision | Gradient                                          |
+! | H(:,:)                    | double precision | Hessian (diagonal)                                |
+! | e_val(:)                  | double precision | Eigenvalues of the hessian                        |
+! | W(:,:)                    | double precision | Eigenvectors of the hessian                       |
+! | tmp_x(:)                  | double precision | Step in 1D (in a subaspace)                       |
+! | tmp_m_x(:,:)              | double precision | Step in 2D (in a subaspace)                       |
+! | tmp_list(:)               | double precision | List of MOs in a mo_class                         |
+! | i,j,k                     | integer          | Indexes in the full MO space                      |
+! | tmp_i, tmp_j, tmp_k       | integer          | Indexes in a subspace                             |
+! | l                         | integer          | Index for the mo_class                            |
+! | key(:)                    | integer          | Key to sort the eigenvalues of the hessian        |
+! | nb_iter                   | integer          | Number of iterations                              |
+! | must_exit                 | logical          | To exit the trust region loop                     |
+! | cancel_step               | logical          | To cancel a step                                  |
+! | not_*converged            | logical          | To localize the different mo classes              |
+! | t*                        | double precision | To measure the time                               |
+! | n                         | integer          | mo_num*(mo_num-1)/2, number of orbital parameters |
+! | tmp_n                     | integer          | dim_subspace*(dim_subspace-1)/2                   |
+! |                           |                  | Number of dimension in the subspace               |
+
+! Variables in qp_edit for the localization:
+! | localization_method      |
+! | localization_max_nb_iter |
+! | default_mo_class         |
+! | thresh_loc_max_elem_grad |
+! | kick_in_mos              |
+! | angle_pre_rot            |
+
+! + all the variables for the trust region
+
+! Cf. qp_edit orbital optimization
+
+
+subroutine run_localization
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Orbital localization
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  double precision, allocatable :: pre_rot(:,:), R(:,:)
+  double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:)
+  double precision              :: criterion, norm_grad
+  integer                       :: i,j,k,l,p, tmp_i, tmp_j, tmp_k
+  integer                       :: info
+  integer                       :: n, tmp_n, tmp_list_size
+  double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:)
+  double precision              :: max_elem, t1, t2, t3, t4, t5, t6
+  integer, allocatable          :: tmp_list(:), key(:)
+  double precision              :: prev_criterion, rho, delta, criterion_model
+  integer                       :: nb_iter, nb_sub_iter
+  logical                       :: not_converged, not_core_converged
+  logical                       :: not_act_converged, not_inact_converged, not_virt_converged
+  logical                       :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation
+
+  n = mo_num*(mo_num-1)/2
+
+  ! Allocation
+  allocate(spatial_extent(mo_num))
+  allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Locality before the localization
+  call compute_spatial_extent(spatial_extent)
+
+  ! Choice of the method (with qp_edit)
+  print*,''
+  print*,'Localization method:',localization_method
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys localization'
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey localization'
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+  print*,''
+
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### Before the pre rotation'
+  
+  ! Debug
+  if (debug_hf) then
+    print*,'HF energy:', HF_energy
+  endif
+  
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+     ! Allocation tmp array  
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    if (tmp_list_size >= 2) then
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, mo_class(tmp_list(1))
+    endif
+
+    deallocate(tmp_list)    
+
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (n_act_orb == mo_num .or. &
+      n_core_orb + n_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to localize all the orbitals:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+
+! Loc
+
+  ! Pre rotation, to give a little kick in the MOs
+  call apply_pre_rotation()
+
+  ! Criterion after the pre rotation
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### After the pre rotation'
+  
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+  
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+    
+    if (tmp_list_size >= 2) then
+      ! Allocation tmp array  
+      allocate(tmp_list(tmp_list_size))
+
+      ! To give the list of MOs in a mo_class
+      if (l==1) then ! core
+        tmp_list = list_core
+      elseif (l==2) then
+        tmp_list = list_act
+      elseif (l==3) then
+        tmp_list = list_inact
+      else
+        tmp_list = list_virt
+      endif
+
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, trim(mo_class(tmp_list(1)))
+      
+      deallocate(tmp_list)    
+    endif
+    
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*,''
+  print*,'========================'
+  print*,'  Orbital localization'
+  print*,'========================'
+  print*,'' 
+
+  !Initialization
+  not_converged = .TRUE.
+
+  ! To do the localization only if there is at least 2 MOs
+  if (dim_list_core_orb >= 2) then
+    not_core_converged = .TRUE.
+  else
+    not_core_converged = .FALSE. 
+  endif
+  
+  if (dim_list_act_orb >= 2) then
+    not_act_converged = .TRUE.
+  else
+    not_act_converged = .FALSE.
+  endif
+
+  if (dim_list_inact_orb >= 2) then
+    not_inact_converged = .TRUE.
+  else
+    not_inact_converged = .FALSE.
+  endif
+
+  if (dim_list_virt_orb >= 2) then
+    not_virt_converged = .TRUE.
+  else
+    not_virt_converged = .FALSE.
+  endif
+ 
+  ! Loop over the mo_classes
+  do l = 1, 4
+
+    if (l==1) then ! core
+      not_converged = not_core_converged
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      not_converged = not_act_converged
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      not_converged = not_inact_converged
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      not_converged = not_virt_converged
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    ! Next iteration if converged = true 
+    if (.not. not_converged) then
+      cycle
+    endif
+ 
+    ! Allocation tmp array  
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    ! Display
+    if (not_converged) then
+      print*,''
+      print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###'
+      print*,''
+    endif
+
+    ! Size for the 2D -> 1D transformation 
+    tmp_n = tmp_list_size * (tmp_list_size - 1)/2
+
+    ! Without hessian + trust region 
+    if (.not. localization_use_hessian) then
+       
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n))
+
+      ! Criterion
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Init
+      nb_iter = 0
+      delta = 1d0
+
+      !Loop
+      do while (not_converged)
+         
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+        
+        ! Angles of rotation
+        call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+        tmp_m_x = - tmp_m_x * delta
+
+        ! Rotation submatrix
+        call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+             info, enforce_step_cancellation)
+
+        ! To ensure that the rotation matrix is unitary
+        if (enforce_step_cancellation) then
+          print*, 'Step cancellation, too large error in the rotation matrix'
+          delta = delta * 0.5d0
+          cycle
+        else
+          delta = min(delta * 2d0, 1d0)
+        endif
+
+        ! Full rotation matrix and application of the rotation
+        call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+        call apply_mo_rotation(R, prev_mos)
+
+        ! Update the needed data
+        call update_data_localization()
+
+        ! New criterion
+        call criterion_localization(tmp_list_size, tmp_list, criterion)
+        print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+        print*,'Max elem :', max_elem
+        print*,'Delta    :', delta
+        
+        nb_iter = nb_iter + 1
+
+        ! Exit
+        if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then
+           not_converged = .False.
+        endif
+      enddo
+
+      ! Save the changes
+      call update_data_localization()
+      call save_mos()
+      TOUCH mo_coef
+
+      ! Deallocate
+      deallocate(v_grad, tmp_m_x, tmp_list)
+      deallocate(tmp_R, tmp_x)
+
+    ! Trust region
+    else
+    
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size))
+      allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n))
+
+      ! ### Initialization ###
+      delta = 0d0 ! can be deleted (normally)
+      nb_iter = 0 ! Must start at 0 !!!
+      rho = 0.5d0 ! Must be 0.5
+
+      ! Compute the criterion before the loop
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Loop until the convergence
+      do while (not_converged)
+
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+  
+        ! Gradient
+        call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+        ! Diagonal hessian
+        call hessian_localization(tmp_n, tmp_list_size, tmp_list, H)
+        
+        ! Diagonalization of the diagonal hessian by hands
+        !call diagonalization_hessian(tmp_n,H,e_val,w)
+        do i = 1, tmp_n
+          e_val(i) = H(i,i)
+        enddo
+
+        ! Key list for dsort
+        do i = 1, tmp_n 
+          key(i) = i
+        enddo
+
+        ! Sort of the eigenvalues
+        call dsort(e_val, key, tmp_n)
+
+        ! Eigenvectors
+        W = 0d0
+        do i = 1, tmp_n
+          j = key(i)
+          W(j,i) = 1d0
+        enddo
+
+        ! To enter in the loop just after
+        cancel_step = .True.
+        nb_sub_iter = 0 
+
+        ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho
+        do while (cancel_step)
+          print*,'-----------------------------'
+          print*, mo_class(tmp_list(1))
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, &
+               rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          ! Internal loop exit condition
+          if (must_exit) then
+            print*,'trust_region_step_w_expected_e sent: Exit'
+            exit 
+          endif
+
+          ! 1D tmp -> 2D tmp 
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+               info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+        
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)   
+
+          ! Update the things related to mo_coef
+          call update_data_localization()
+
+          ! Update the criterion
+          call criterion_localization(tmp_list_size, tmp_list, criterion)
+          print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, &
+               criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step, previous MOs
+          if (cancel_step) then
+            mo_coef = prev_mos
+          endif
+
+          nb_sub_iter = nb_sub_iter + 1
+        enddo
+        !call save_mos() !### depend of the time for 1 iteration
+
+        ! To exit the external loop if must_exti = .True.
+        if (must_exit) then
+          exit
+        endif 
+
+        ! Step accepted, nb iteration + 1
+        nb_iter = nb_iter + 1
+
+        ! External loop exit conditions
+        if (DABS(max_elem) < thresh_loc_max_elem_grad) then
+          not_converged = .False.
+        endif
+        if (nb_iter > localization_max_nb_iter) then
+          not_converged = .False.
+        endif
+      enddo
+
+      ! Deallocation of temporary arrays
+      deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key)
+      
+      ! Save the MOs
+      call save_mos()
+      TOUCH mo_coef
+ 
+      ! Debug
+      if (debug_hf) then
+        touch mo_coef
+        print*,'HF energy:', HF_energy
+      endif
+      
+    endif
+  enddo
+
+
+  TOUCH mo_coef 
+
+  ! To sort the MOs using the diagonal elements of the Fock matrix
+  if (sort_mos_by_e) then
+    call run_sort_by_fock_energies()
+  endif
+
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+
+  ! Locality after the localization
+  call compute_spatial_extent(spatial_extent)
+
+end
--- a/src/mo_localization/localization.org
+++ b/src/mo_localization/localization.org
--- a/src/mo_localization/localization_sub.irp.f
+++ b/src/mo_localization/localization_sub.irp.f
--- a/src/mo_two_e_ints/core_quantities.irp.f
+++ b/src/mo_two_e_ints/core_quantities.irp.f
@ -59,3 +59,45 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
 enddo
 END_PROVIDER

+
+BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Active Hamiltonian with 3-index exchange integrals:
+ !
+ ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$
+ END_DOC
+
+ integer :: i,j, k
+ integer :: p,q, r
+ ! core-core contribution
+ h_act_ri = core_fock_operator
+ !print *,' Bef----hact(1,14)=',h_act_ri(4,14)
+ ! act-act contribution
+ do p=1,n_act_orb
+   j=list_act(p)
+   do q=1,n_act_orb
+     i=list_act(q)
+     h_act_ri(i,j) = mo_one_e_integrals(i,j)
+   enddo
+   do r=1,n_act_orb
+     k=list_act(r)
+     do q=1,n_act_orb
+       i=list_act(q)
+       h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+     enddo
+   enddo
+ enddo
+ ! core-act contribution
+ !do p=1,n_act_orb
+ !  j=list_core(p)
+ !  do k=1,n_core_orb
+ !    do q=1,n_act_orb
+ !      i=list_act(q)
+ !      h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+ !    enddo
+ !  enddo
+ !enddo
+ !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14)
+END_PROVIDER
+
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
  !
-  ! WARNING ::: IF fact_k is too smal then: 
+  ! WARNING ::: IF fact_k is too smal then:
  ! returns a "s" function centered in zero
  ! with an inifinite exponent and a zero polynom coef
  END_DOC
@ -86,13 +86,11 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
  !DIR$ FORCEINLINE
  call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
  if (fact_k < thresh) then
-    ! IF fact_k is too smal then: 
+    ! IF fact_k is too smal then:
    ! returns a "s" function centered in zero
    ! with an inifinite exponent and a zero polynom coef
    P_center = 0.d0
    p = 1.d+15
-    P_new = 0.d0
-    iorder = 0
    fact_k = 0.d0
    return
  endif
@ -129,6 +127,91 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,

 end

+!---
+
+subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points)
+  BEGIN_DOC
+  ! Transforms the product of
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+  !
+  ! WARNING                      :: : IF fact_k is too smal then:
+  ! returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  END_DOC
+  implicit none
+  include 'constants.include.F'
+  integer, intent(in)            :: n_points, ldp
+  integer, intent(in)            :: a(3),b(3)         ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)   :: alpha, beta       ! exponents
+  double precision, intent(in)   :: A_center(n_points,3) ! A center
+  double precision, intent(in)   :: B_center (3)         ! B center
+  double precision, intent(out)  :: P_center(n_points,3) ! new center
+  double precision, intent(out)  :: p                 ! new exponent
+  double precision, intent(out)  :: fact_k(n_points)            ! constant factor
+  double precision, intent(out)  :: P_new(n_points,0:ldp,3)! polynomial
+  integer, intent(out)           :: iorder(3)         ! i_order(i) = order of the polynomials
+
+  double precision, allocatable  :: P_a(:,:,:), P_b(:,:,:)
+
+  integer                        :: n_new,i,j, ipoint, lda, ldb, xyz
+
+  call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points)
+
+  if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then  ! b == (0,0,0)
+
+    lda = maxval(a)
+    ldb = 0
+    allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3))
+
+    call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points)
+
+    iorder(1:3) = a(1:3)
+    do ipoint=1,n_points
+      do xyz=1,3
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        do i=1,a(xyz)
+          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    enddo
+
+    return
+
+  endif
+
+  lda = maxval(a)
+  ldb = maxval(b)
+  allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3))
+
+  call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points)
+
+  iorder(1:3) = a(1:3) + b(1:3)
+
+  do xyz=1,3
+    if (b(xyz) == 0) then
+      do ipoint=1,n_points
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        do i=1,a(xyz)
+          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    else
+      do i=0,iorder(xyz)
+        do ipoint=1,n_points
+          P_new(ipoint,i,xyz) = 0.d0
+        enddo
+      enddo
+      call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points)
+    endif
+  enddo
+
+end
+
+!-

 subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
  BEGIN_DOC
@ -231,6 +314,59 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
  xp(3) = (a*xa(3)+b*xb(3))*p_inv
 end subroutine

+!---
+subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Gaussian product in 1D.
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  ! Using multiple A centers
+  END_DOC
+
+  integer, intent(in)            :: n_points
+  double precision, intent(in)   :: a,b         ! Exponents
+  double precision, intent(in)   :: xa(n_points,3),xb(3) ! Centers
+  double precision, intent(out)  :: p ! New exponent
+  double precision, intent(out)  :: xp(n_points,3)       ! New center
+  double precision, intent(out)  :: k(n_points) ! Constant
+
+  double precision               :: p_inv
+
+  integer :: ipoint
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  double precision               :: xab(3), ab, ap, bp, bpxb(3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  ap = a*p_inv
+  bp = b*p_inv
+  bpxb(1) = bp*xb(1)
+  bpxb(2) = bp*xb(2)
+  bpxb(3) = bp*xb(3)
+
+  do ipoint=1,n_points
+    xab(1) = xa(ipoint,1)-xb(1)
+    xab(2) = xa(ipoint,2)-xb(2)
+    xab(3) = xa(ipoint,3)-xb(3)
+    k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3))
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      xp(ipoint,1) = 0.d0
+      xp(ipoint,2) = 0.d0
+      xp(ipoint,3) = 0.d0
+    else
+      k(ipoint) = dexp(-k(ipoint))
+      xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1)
+      xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2)
+      xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3)
+    endif
+  enddo
+end subroutine
+



@ -269,6 +405,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
 end subroutine


+!-
+
+subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Gaussian product in 1D with multiple xa
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  END_DOC
+
+  integer, intent(in) :: n_points
+  double precision  , intent(in) :: a,b      ! Exponents
+  double precision  , intent(in) :: xa(n_points),xb    ! Centers
+  double precision  , intent(out) :: p(n_points)       ! New exponent
+  double precision  , intent(out) :: xp(n_points) ! New center
+  double precision  , intent(out) :: k(n_points)       ! Constant
+
+  double precision               :: p_inv
+  integer :: ipoint
+
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  double precision               :: xab, ab
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  do ipoint = 1, n_points
+    xab = xa(ipoint)-xb
+    k(ipoint) = ab*xab*xab
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      cycle
+    endif
+    k(ipoint) = exp(-k(ipoint))
+    xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv
+  enddo
+end subroutine
+
+



@ -313,6 +489,45 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)

 end

+subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Multiply pairs of polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb, nc, n_points
+  integer, intent(in)            :: nd
+  double precision, intent(in)   :: b(n_points,0:nb), c(n_points,0:nc)
+  double precision, intent(inout) :: d(n_points,0:nd)
+
+  integer                        :: ib, ic, id, k, ipoint
+  if (nd < nb+nc) then
+     print *, nd,  nb, nc
+     print *, irp_here, ': nd < nb+nc'
+     stop 1
+  endif
+
+  do ic = 0,nc
+    do ipoint=1, n_points
+      d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0)
+    enddo
+  enddo
+
+  do ib=1,nb
+    do ipoint=1, n_points
+      d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib)
+    enddo
+    do ic = 1,nc
+      do ipoint=1, n_points
+        d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib)
+      enddo
+    enddo
+  enddo
+
+end
+
+
 subroutine add_poly(b,nb,c,nc,d,nd)
  implicit none
  BEGIN_DOC
@ -404,22 +619,152 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
  do i =  minab+1,min(b,20)
    P_new2(i) =  binom_transp(b-i,b) * pows_b(b-i)
  enddo
-  do i =  101,a
+  do i =  21,a
    P_new(i) =  binom_func(a,a-i) * pows_a(a-i)
  enddo
-  do i =  101,b
+  do i =  21,b
    P_new2(i) =  binom_func(b,b-i) * pows_b(b-i)
  enddo
 end

-subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) 
+!-
+subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Recenter two polynomials
+  END_DOC
+  integer, intent(in)            :: a(3),b(3), n_points, lda, ldb
+  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, minab(3), maxab(3),ipoint, xyz
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision :: fa, fb
+
+  maxab(1:3) = max(a(1:3),b(1:3))
+  minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+
+  do xyz=1,3
+    if ((a(xyz)<0).or.(b(xyz)<0) ) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i =  2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,0,xyz) =  pows_b(ipoint,b(xyz))
+    enddo
+    do i =  1,min(minab(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(a(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(b(xyz),20)
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  21,a(xyz)
+      fa =  binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  21,b(xyz)
+      fb = binom_func(b(xyz),b(xyz)-i)
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+  enddo
+end
+
+
+subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Recenter two polynomials. Special case for b=(0,0,0)
+  END_DOC
+  integer, intent(in)            :: a(3), n_points, lda
+  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, xyz, ipoint, maxab(3)
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision               :: fa
+
+  maxab(1:3) = max(a(1:3),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+  do xyz=1,3
+    if (a(xyz)<0) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i = 2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,xyz) =  pows_b(ipoint,0)
+    enddo
+    do i =  1,min(a(xyz),20)
+      fa = binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i = 21,a(xyz)
+      fa = binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+
+  enddo !xyz
+
+  deallocate(pows_a, pows_b)
+end
+
+!--
+!--
+
+subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)

  BEGIN_DOC
-  ! 
+  !
  ! Transform the pol centerd on A:
-  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] 
+  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ]
  ! to a pol centered on B
-  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] 
+  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ]
  !
  END_DOC

@ -437,7 +782,7 @@ subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)

  do i = 1, 3
    Lmax = iorder(i)
-    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) 
+    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) )
  enddo

  return
@ -445,14 +790,14 @@ end subroutine pol_modif_center



-subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) 
+subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol)

  BEGIN_DOC
-  ! 
+  !
  ! Transform the pol centerd on A:
-  !       [ \sum_i ax_i (x-x_A)^i ] 
+  !       [ \sum_i ax_i (x-x_A)^i ]
  ! to a pol centered on B
-  !       [ \sum_i bx_i (x-x_B)^i ] 
+  !       [ \sum_i bx_i (x-x_B)^i ]
  !
  ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ]
  !      = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i)
@ -591,7 +936,7 @@ double precision function rint_sum(n_pt_out,rho,d1)
      u_inv=1.d0/dsqrt(rho)
      u=rho*u_inv
      rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0)
-!      print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) 
+!      print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u)
    endif

    do i=2,n_pt_out,2
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@ -1136,6 +1136,104 @@ subroutine ortho_svd(A,LDA,m,n)

 end

+! QR to orthonormalize CSFs does not work :-(
+!subroutine ortho_qr_withB(A,LDA,B,m,n)
+!  implicit none
+!  BEGIN_DOC
+!  ! Orthogonalization using Q.R factorization
+!  !
+!  ! A : Overlap Matrix
+!  !
+!  ! LDA : leftmost dimension of A
+!  !
+!  ! m : Number of rows of A
+!  !
+!  ! n : Number of columns of A
+!  !
+!  ! B : Output orthogonal basis
+!  !
+!  END_DOC
+!  integer, intent(in)            :: m,n, LDA
+!  double precision, intent(inout) :: A(LDA,n)
+!  double precision, intent(inout) :: B(LDA,n)
+!
+!  integer                        :: LWORK, INFO
+!  integer, allocatable           :: jpvt(:)
+!  double precision, allocatable  :: TAU(:), WORK(:)
+!  double precision, allocatable  :: C(:,:)
+!  double precision :: norm
+!  integer :: i,j
+!
+!  allocate (TAU(min(m,n)), WORK(1))
+!  allocate (jpvt(n))
+!  !print *," In function ortho"
+!  B = A
+!
+!  jpvt(1:n)=1
+!
+!  LWORK=-1
+!  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!
+!  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  LWORK=max(n,int(WORK(1)))
+!
+!  deallocate(WORK)
+!  allocate(WORK(LWORK))
+!  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!  print *,A
+!  print *,jpvt
+!  deallocate(WORK,TAU)
+!  !stop
+!
+!  !LWORK=-1
+!  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!
+!  !LWORK=-1
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !
+!  !allocate(C(LDA,n))
+!  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
+!  !norm = 0.0d0
+!  !B = 0.0d0
+!  !!print *,C
+!  !do i=1,m
+!  !  norm = 0.0d0
+!  !  do j=1,n
+!  !    norm = norm + C(j,i)*C(j,i)
+!  !  end do
+!  !  norm = 1.0d0/dsqrt(norm)
+!  !  do j=1,n
+!  !    B(j,i) = C(j,i)
+!  !  end do
+!  !end do
+!  !print *,B
+!
+!
+!  !deallocate(WORK,TAU)
+!end
+
+!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
+!  use iso_c_binding
+!  integer(c_int32_t), value      :: LDA
+!  integer(c_int32_t), value      :: m
+!  integer(c_int32_t), value      :: n
+!  integer(c_int16_t)             :: A(LDA,n)
+!  integer(c_int16_t)             :: B(LDA,n)
+!  call ortho_qr_withB(A,LDA,B,m,n)
+!end subroutine ortho_qr_csf
+
 subroutine ortho_qr(A,LDA,m,n)
  implicit none
  BEGIN_DOC
--- a/src/utils/loc.f
+++ b/src/utils/loc.f
@ -1,6 +1,6 @@
 c************************************************************************
      subroutine maxovl(n,m,s,t,w)
-C 
+C
 C     This subprogram contains an iterative procedure to find the
 C     unitary transformation of a set of n vectors which maximizes
 C     the sum of their square overlaps with a set of m reference
@ -10,7 +10,7 @@ C     S: overlap matrix <ref|vec>
 C     T: rotation matrix
 C     W: new overlap matrix
 C
-C 
+C
      implicit real*8(a-h,o-y),logical*1(z)
 !      parameter (id1=700)
 !      dimension s(id1,id1),t(id1,id1),w(id1,id1)
@ -29,23 +29,26 @@ C      conv=1.d-6
     * 5x,'following the principle of maximum overlap with a set of',
     * i3,' reference vectors'/5x,'required convergence on rotation ',
     * 'angle =',f13.10///5x,'Starting overlap matrix'/)
-      do 6 i=1,m
-      write (6,145) i
-    6 write (6,150) (s(i,j),j=1,n)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (s(i,j),j=1,n)
+      end do
    8 mm=m-1
      if (m.lt.n) mm=m
      iter=0
-      do 20 j=1,n
-      do 16 i=1,n
-      t(i,j)=0.d0
-   16 continue
-      do 18 i=1,m
-   18 w(i,j)=s(i,j)
-   20 t(j,j)=1.d0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        end do
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
      sum=0.d0
-      do 10 i=1,m
-      sum=sum+s(i,i)*s(i,i)
-   10 continue
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      end do
      sum=sum/m
      if (zprt) write (6,12) sum
   12 format (//5x,'Average square overlap =',f10.6)
@ -54,18 +57,18 @@ C      conv=1.d-6
      j=1
   21 if (j.ge.last) goto 30
      sum=0.d0
-                                                                
-      do 22 i=1,n
-   22 sum=sum+s(i,j)*s(i,j)
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
      if (sum.gt.small) goto 28
-      do 24 i=1,n
-      sij=s(i,j)
-      s(i,j)=-s(i,last)
-      s(i,last)=sij
-      tij=t(i,j)
-      t(i,j)=-t(i,last)
-      t(i,last)=tij
-   24 continue
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
      last=last-1
      goto 21
   28 j=j+1
@ -101,17 +104,18 @@ C      conv=1.d-6
      sine=1.d0
   34 delta=sine*(a*sine+b*cosine)
      if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta
-      do 35 k=1,m
-      p=s(k,i)*cosine-s(k,j)*sine
-      q=s(k,i)*sine+s(k,j)*cosine
-      s(k,i)=p
-   35 s(k,j)=q
-      do 40 k=1,n
-      p=t(k,i)*cosine-t(k,j)*sine
-      q=t(k,i)*sine+t(k,j)*cosine
-      t(k,i)=p
-      t(k,j)=q
-   40 continue
+      do k=1,m
+        p=s(k,i)*cosine-s(k,j)*sine
+        q=s(k,i)*sine+s(k,j)*cosine
+        s(k,i)=p
+        s(k,j)=q
+      enddo
+      do k=1,n
+        p=t(k,i)*cosine-t(k,j)*sine
+        q=t(k,i)*sine+t(k,j)*cosine
+        t(k,i)=p
+        t(k,j)=q
+      enddo
   45 d=dabs(sine)
      if (d.le.amax) goto 50
      imax=i
@ -132,43 +136,50 @@ C      conv=1.d-6
     * 'in subroutine maxovl ***'//)
      stop
  100 continue
-      do 120 j=1,n
-      if (s(j,j).gt.0.d0) goto 120
-      do 105 i=1,m
-  105 s(i,j)=-s(i,j)
-      do 110 i=1,n
-  110 t(i,j)=-t(i,j)
-  120 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
      sum=0.d0
-      do 125 i=1,m
-  125 sum=sum+s(i,i)*s(i,i)
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
      sum=sum/m
-      do 122 i=1,m
-      do 122 j=1,n
-      sw=s(i,j)
-      s(i,j)=w(i,j)
-  122 w(i,j)=sw
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
      if (.not.zprt) return
      write (6,12) sum
      write (6,130)
  130 format (//5x,'transformation matrix')
-      do 140 i=1,n
-      write (6,145) i
-  140 write (6,150) (t(i,j),j=1,n)
+      do i=1,n
+        write (6,145) i
+        write (6,150) (t(i,j),j=1,n)
+      enddo
  145 format (i8)
  150 format (2x,10f12.8)
      write (6,160)
  160 format (//5x,'new overlap matrix'/)
-      do 170 i=1,m
-      write (6,145) i
-  170 write (6,150) (w(i,j),j=1,n)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (w(i,j),j=1,n)
+      enddo
      return
      end


 c************************************************************************
      subroutine maxovl_no_print(n,m,s,t,w)
-C 
+C
 C     This subprogram contains an iterative procedure to find the
 C     unitary transformation of a set of n vectors which maximizes
 C     the sum of their square overlaps with a set of m reference
@ -178,7 +189,7 @@ C     S: overlap matrix <ref|vec>
 C     T: rotation matrix
 C     W: new overlap matrix
 C
-C 
+C
      implicit real*8(a-h,o-y),logical*1(z)
      parameter (id1=300)
      dimension s(id1,id1),t(id1,id1),w(id1,id1)
@ -193,17 +204,19 @@ C      conv=1.d-6
    8 mm=m-1
      if (m.lt.n) mm=m
      iter=0
-      do 20 j=1,n
-      do 16 i=1,n
-      t(i,j)=0.d0
-   16 continue
-      do 18 i=1,m
-   18 w(i,j)=s(i,j)
-   20 t(j,j)=1.d0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        enddo
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
      sum=0.d0
-      do 10 i=1,m
-      sum=sum+s(i,i)*s(i,i)
-   10 continue
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
      sum=sum/m
   12 format (//5x,'Average square overlap =',f10.6)
      if (n.eq.1) goto 100
@ -211,18 +224,19 @@ C      conv=1.d-6
      j=1
   21 if (j.ge.last) goto 30
      sum=0.d0
-                                                                
-      do 22 i=1,n
-   22 sum=sum+s(i,j)*s(i,j)
+
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
      if (sum.gt.small) goto 28
-      do 24 i=1,n
-      sij=s(i,j)
-      s(i,j)=-s(i,last)
-      s(i,last)=sij
-      tij=t(i,j)
-      t(i,j)=-t(i,last)
-      t(i,last)=tij
-   24 continue
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
      last=last-1
      goto 21
   28 j=j+1
@ -232,50 +246,52 @@ C      conv=1.d-6
      jmax=0
      dmax=0.d0
      amax=0.d0
-      do 60 i=1,mm
-      ip=i+1
-      do 50 j=ip,n
-      a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
-      b=-s(i,i)*s(i,j)
-      if (j.gt.m) goto 31
-      a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
-      b=b+s(j,i)*s(j,j)
-   31 b=b+b
-      if (a.eq.0.d0) goto 32
-      ba=b/a
-      if (dabs(ba).gt.small) goto 32
-      if (a.gt.0.d0) goto 33
-      tang=-0.5d0*ba
-      cosine=1.d0/dsqrt(1.d0+tang*tang)
-      sine=tang*cosine
-      goto 34
-   32 tang=0.d0
-      if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
-      cosine=1.d0/dsqrt(1.d0+tang*tang)
-      sine=tang*cosine
-      goto 34
-   33 cosine=0.d0
-      sine=1.d0
-   34 delta=sine*(a*sine+b*cosine)
-      do 35 k=1,m
-      p=s(k,i)*cosine-s(k,j)*sine
-      q=s(k,i)*sine+s(k,j)*cosine
-      s(k,i)=p
-   35 s(k,j)=q
-      do 40 k=1,n
-      p=t(k,i)*cosine-t(k,j)*sine
-      q=t(k,i)*sine+t(k,j)*cosine
-      t(k,i)=p
-      t(k,j)=q
-   40 continue
-   45 d=dabs(sine)
-      if (d.le.amax) goto 50
-      imax=i
-      jmax=j
-      amax=d
-      dmax=delta
-   50 continue
-   60 continue
+      do i=1,mm
+        ip=i+1
+        do j=ip,n
+          a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+          b=-s(i,i)*s(i,j)
+          if (j.gt.m) goto 31
+          a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+          b=b+s(j,i)*s(j,j)
+  31      b=b+b
+          if (a.eq.0.d0) goto 32
+          ba=b/a
+          if (dabs(ba).gt.small) goto 32
+          if (a.gt.0.d0) goto 33
+          tang=-0.5d0*ba
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  32      tang=0.d0
+          if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  33      cosine=0.d0
+          sine=1.d0
+  34      delta=sine*(a*sine+b*cosine)
+          do k=1,m
+            p=s(k,i)*cosine-s(k,j)*sine
+            q=s(k,i)*sine+s(k,j)*cosine
+            s(k,i)=p
+            s(k,j)=q
+          enddo
+          do k=1,n
+            p=t(k,i)*cosine-t(k,j)*sine
+            q=t(k,i)*sine+t(k,j)*cosine
+            t(k,i)=p
+            t(k,j)=q
+          enddo
+  45      d=dabs(sine)
+          if (d.le.amax) goto 50
+          imax=i
+          jmax=j
+          amax=d
+          dmax=delta
+  50      continue
+        end do
+      end do
   70 format (' iter=',i4,' largest rotation=',f12.8,
     * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
   71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
@ -285,22 +301,27 @@ C      conv=1.d-6
     * 'in subroutine maxovl ***'//)
      stop
  100 continue
-      do 120 j=1,n
-      if (s(j,j).gt.0.d0) goto 120
-      do 105 i=1,m
-  105 s(i,j)=-s(i,j)
-      do 110 i=1,n
-  110 t(i,j)=-t(i,j)
-  120 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
      sum=0.d0
-      do 125 i=1,m
-  125 sum=sum+s(i,i)*s(i,i)
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
      sum=sum/m
-      do 122 i=1,m
-      do 122 j=1,n
-      sw=s(i,j)
-      s(i,j)=w(i,j)
-  122 w(i,j)=sw
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
      return
      end

--- a/src/utils/map_module.f90
+++ b/src/utils/map_module.f90
@ -238,11 +238,11 @@ subroutine cache_map_sort(map)
      iorder(i) = i
    enddo
    if (cache_key_kind == 2) then
-      call i2radix_sort(map%key,iorder,map%n_elements,-1)
+      call i2sort(map%key,iorder,map%n_elements,-1)
    else if (cache_key_kind == 4) then
-      call iradix_sort(map%key,iorder,map%n_elements,-1)
+      call isort(map%key,iorder,map%n_elements,-1)
    else if (cache_key_kind == 8) then
-      call i8radix_sort(map%key,iorder,map%n_elements,-1)
+      call i8sort(map%key,iorder,map%n_elements,-1)
    endif
    if (integral_kind == 4) then
      call set_order(map%value,iorder,map%n_elements)
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@ -92,9 +92,9 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
  overlap = overlap_x * overlap_y * overlap_z

 end
-              
+
 ! ---
-              
+
 subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx)

  BEGIN_DOC
@ -151,4 +151,71 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over
 end


+! ---

+subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,&
+      power_B,overlap,dim, n_points)
+  implicit none
+  BEGIN_DOC
+  !.. math::
+  !
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2)  (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
+  !   S = S_x S_y S_z
+  !
+  END_DOC
+  include 'constants.include.F'
+  integer,intent(in)             :: dim, n_points
+  double precision,intent(in)    :: A_center(n_points,3),B_center(3)  ! center of the x1 functions
+  double precision, intent(in)   :: alpha,beta
+  integer,intent(in)             :: power_A(3), power_B(3) ! power of the x1 functions
+  double precision, intent(out)  :: overlap(n_points)
+  double precision               :: F_integral_tab(0:max_dim)
+  double precision               :: p, overlap_x, overlap_y, overlap_z
+  double precision, allocatable  :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:)
+  integer                        :: iorder_p(3), ipoint, ldp
+  integer                        :: nmax
+  double precision               :: F_integral
+
+  ldp = maxval( power_A(1:3) + power_B(1:3) )
+  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), &
+           fact_pp(n_points), pp(n_points))
+
+  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points)
+
+  nmax = maxval(iorder_p)
+  do i=0, nmax
+    F_integral_tab(i) = F_integral(i,p)
+  enddo
+
+  integer                        :: i
+
+  call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points)
+
+  do ipoint=1,n_points
+    if(fact_p(ipoint).lt.1d-20)then
+      overlap(ipoint) = 1.d-10
+      cycle
+    endif
+
+    overlap_x = P_new(ipoint,0,1) * F_integral_tab(0)
+    do i = 1,iorder_p(1)
+      overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i)
+    enddo
+
+    overlap_y = P_new(ipoint,0,2) * F_integral_tab(0)
+    do i = 1,iorder_p(2)
+      overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i)
+    enddo
+
+    overlap_z = P_new(ipoint,0,3) * F_integral_tab(0)
+    do i = 1,iorder_p(3)
+      overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i)
+    enddo
+
+    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint)
+  enddo
+
+  deallocate(P_new, P_center, fact_p, pp, fact_pp)
+end
+
+! ---
--- a/src/utils/qsort.c
+++ b/src/utils/qsort.c
@ -0,0 +1,373 @@
+/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
+#include <stdlib.h>
+#include <stdint.h>
+
+struct int16_t_comp {
+  int16_t    x;
+  int32_t i;
+};
+
+int compare_int16_t( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
+}
+
+
+struct int16_t_comp_big {
+  int16_t    x;
+  int64_t i;
+};
+
+int compare_int16_t_big( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
+}
+
+
+struct int32_t_comp {
+  int32_t    x;
+  int32_t i;
+};
+
+int compare_int32_t( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
+}
+
+
+struct int32_t_comp_big {
+  int32_t    x;
+  int64_t i;
+};
+
+int compare_int32_t_big( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
+}
+
+
+struct int64_t_comp {
+  int64_t    x;
+  int32_t i;
+};
+
+int compare_int64_t( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
+}
+
+
+struct int64_t_comp_big {
+  int64_t    x;
+  int64_t i;
+};
+
+int compare_int64_t_big( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
+}
+
+
+struct double_comp {
+  double    x;
+  int32_t i;
+};
+
+int compare_double( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct double_comp* A = malloc(isize * sizeof(struct double_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx(double* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
+}
+
+
+struct double_comp_big {
+  double    x;
+  int64_t i;
+};
+
+int compare_double_big( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx_big(double* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
+}
+
+
+struct float_comp {
+  float    x;
+  int32_t i;
+};
+
+int compare_float( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct float_comp* A = malloc(isize * sizeof(struct float_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx(float* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
+}
+
+
+struct float_comp_big {
+  float    x;
+  int64_t i;
+};
+
+int compare_float_big( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx_big(float* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
+}
+/* Generated C file:1 ends here */
--- a/src/utils/qsort.org
+++ b/src/utils/qsort.org
@ -0,0 +1,169 @@
+#+TITLE: Quick sort binding for Fortran
+
+* C template
+
+#+NAME: c_template
+#+BEGIN_SRC c
+struct TYPE_comp_big {
+  TYPE    x;
+  int32_t i;
+};
+
+int compare_TYPE_big( const void * l, const void * r )
+{
+  const TYPE * restrict _l= l;
+  const TYPE * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
+}
+#+END_SRC
+
+* Fortran template
+
+#+NAME:f_template
+#+BEGIN_SRC f90
+subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_big_c
+
+subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_noidx_big_c
+
+#+END_SRC
+
+#+NAME:f_template2
+#+BEGIN_SRC f90
+subroutine Lsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+  call Lsort_big_c(A, iorder, isize)
+end subroutine Lsort_big
+
+subroutine Lsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_TYPE)    :: A(isize)
+  call Lsort_noidx_big_c(A, isize)
+end subroutine Lsort_noidx_big
+
+#+END_SRC
+
+* Python scripts for type replacements
+
+#+NAME: replaced
+#+begin_src python :results output :noweb yes
+data = """
+<<c_template>>
+"""
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f2
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template2>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+* Generated C file
+  
+#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
+#include <stdlib.h>
+#include <stdint.h>
+<<replaced()>>
+#+END_SRC
+
+* Generated Fortran file
+
+#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     <<replaced_f()>>
+  end interface
+
+end module qsort_module
+
+<<replaced_f2()>>
+
+#+END_SRC
+
--- a/src/utils/qsort_module.f90
+++ b/src/utils/qsort_module.f90
@ -0,0 +1,347 @@
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     
+     subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_c
+     
+     subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_c
+     
+     
+     
+     subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_big_c
+     
+     subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_big_c
+     
+     
+     
+     subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_c
+     
+     subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_c
+     
+     
+     
+     subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_big_c
+     
+     subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_big_c
+     
+     
+     
+     subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_c
+     
+     subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_c
+     
+     
+     
+     subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_big_c
+     
+     subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_big_c
+     
+     
+     
+     subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_c
+     
+     subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_c
+     
+     
+     
+     subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_big_c
+     
+     subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_big_c
+     
+     
+     
+     subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_c
+     
+     subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_c
+     
+     
+     
+     subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_big_c
+     
+     subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_big_c
+     
+     
+     
+  end interface
+
+end module qsort_module
+
+
+subroutine i2sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_c(A, iorder, isize)
+end subroutine i2sort
+
+subroutine i2sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_c(A, isize)
+end subroutine i2sort_noidx
+
+
+
+subroutine i2sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_big_c(A, iorder, isize)
+end subroutine i2sort_big
+
+subroutine i2sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_big_c(A, isize)
+end subroutine i2sort_noidx_big
+
+
+
+subroutine isort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_c(A, iorder, isize)
+end subroutine isort
+
+subroutine isort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_c(A, isize)
+end subroutine isort_noidx
+
+
+
+subroutine isort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_big_c(A, iorder, isize)
+end subroutine isort_big
+
+subroutine isort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_big_c(A, isize)
+end subroutine isort_noidx_big
+
+
+
+subroutine i8sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_c(A, iorder, isize)
+end subroutine i8sort
+
+subroutine i8sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_c(A, isize)
+end subroutine i8sort_noidx
+
+
+
+subroutine i8sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_big_c(A, iorder, isize)
+end subroutine i8sort_big
+
+subroutine i8sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_big_c(A, isize)
+end subroutine i8sort_noidx_big
+
+
+
+subroutine dsort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_c(A, iorder, isize)
+end subroutine dsort
+
+subroutine dsort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_c(A, isize)
+end subroutine dsort_noidx
+
+
+
+subroutine dsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_big_c(A, iorder, isize)
+end subroutine dsort_big
+
+subroutine dsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_big_c(A, isize)
+end subroutine dsort_noidx_big
+
+
+
+subroutine sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_c(A, iorder, isize)
+end subroutine sort
+
+subroutine sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_c(A, isize)
+end subroutine sort_noidx
+
+
+
+subroutine sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_big_c(A, iorder, isize)
+end subroutine sort_big
+
+subroutine sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_big_c(A, isize)
+end subroutine sort_noidx_big
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@ -1,222 +1,4 @@
 BEGIN_TEMPLATE
- subroutine insertion_$Xsort (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  $type                          :: xtmp
-  integer                        :: i, i0, j, jmax
-
-  do i=2,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j=i-1
-    do while (j>0)
-      if ((x(j) <= xtmp)) exit
-      x(j+1) = x(j)
-      iorder(j+1) = iorder(j)
-      j=j-1
-    enddo
-    x(j+1) = xtmp
-    iorder(j+1) = i0
-  enddo
- end subroutine insertion_$Xsort
-
- subroutine quick_$Xsort(x, iorder, isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the quicksort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer, external              :: omp_get_num_threads
-  call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
- end
-
- recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
-  implicit none
-  integer, intent(in)            :: isize, first, last, level
-  integer,intent(inout)          :: iorder(isize)
-  $type, intent(inout)           :: x(isize)
-  $type                          :: c, tmp
-  integer                        :: itmp
-  integer                        :: i, j
-
-  if(isize<2)return
-
-  c = x( shiftr(first+last,1) )
-  i = first
-  j = last
-  do
-    do while (x(i) < c)
-      i=i+1
-    end do
-    do while (c < x(j))
-      j=j-1
-    end do
-    if (i >= j) exit
-    tmp  = x(i)
-    x(i) = x(j)
-    x(j) = tmp
-    itmp      = iorder(i)
-    iorder(i) = iorder(j)
-    iorder(j) = itmp
-    i=i+1
-    j=j-1
-  enddo
-  if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  else
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  endif
- end
-
- subroutine heap_$Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-
-  integer                        :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
- end subroutine heap_$Xsort
-
- subroutine heap_$Xsort_big(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-
-  integer*8                      :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
-
- end subroutine heap_$Xsort_big

 subroutine sorted_$Xnumber(x,isize,n)
  implicit none
@ -250,220 +32,6 @@ SUBST [ X, type ]
 END_TEMPLATE


-!---------------------- INTEL
-IRP_IF INTEL
-
-BEGIN_TEMPLATE
- subroutine $Xsort(x,iorder,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
-  deallocate(tmp)
-  iorder(1:isize) = iorder(1:isize)+1
-  call $Xset_order(x,iorder,isize)
- end
-
- subroutine $Xsort_noidx(x,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
-  deallocate(tmp)
- end
-
-SUBST [ X, type, ityp, n, ippsz ]
-   ; real ; 32f ; 4 ; 13 ;;
- i ; integer ; 32s ; 4 ; 11 ;;
- i2 ; integer*2 ; 16s ; 2 ; 7 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-    call $Xradix_sort(x,iorder,isize,-1)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i8 ; integer*8 ;;
-END_TEMPLATE
-
-!---------------------- END INTEL
-IRP_ELSE
-!---------------------- NON-INTEL
-BEGIN_TEMPLATE
-
- subroutine $Xsort_noidx(x,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer, allocatable           :: iorder(:)
-  integer                        :: i
-  allocate(iorder(isize))
-  do i=1,isize
-   iorder(i)=i
-  enddo
-  call $Xsort(x,iorder,isize)
-  deallocate(iorder)
- end subroutine $Xsort_noidx
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-    call $Xradix_sort(x,iorder,isize,-1)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-IRP_ENDIF
-!---------------------- END NON-INTEL
-
-

 BEGIN_TEMPLATE
 subroutine $Xset_order(x,iorder,isize)
@ -489,47 +57,6 @@ BEGIN_TEMPLATE
  deallocate(xtmp)
 end

-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8; integer*8 ;;
- i2; integer*2 ;;
-END_TEMPLATE
-
-
-BEGIN_TEMPLATE
- subroutine insertion_$Xsort_big (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-  $type                          :: xtmp
-  integer*8                      :: i, i0, j, jmax
-
-  do i=2_8,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j = i-1_8
-    do while (j>0_8)
-      if (x(j)<=xtmp) exit
-      x(j+1_8) = x(j)
-      iorder(j+1_8) = iorder(j)
-      j = j-1_8
-    enddo
-    x(j+1_8) = xtmp
-    iorder(j+1_8) = i0
-  enddo
-
- end subroutine insertion_$Xsort_big
-
 subroutine $Xset_order_big(x,iorder,isize)
  implicit none
  BEGIN_DOC
@ -563,223 +90,3 @@ SUBST [ X, type ]
 END_TEMPLATE


-BEGIN_TEMPLATE
-
-recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
-  implicit none
-
-  BEGIN_DOC
-  ! Sort integer array x(isize) using the radix sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! iradix should be -1 in input.
-  END_DOC
-  integer*$int_type, intent(in)  :: isize
-  integer*$int_type, intent(inout) :: iorder(isize)
-  integer*$type, intent(inout)   :: x(isize)
-  integer, intent(in)            :: iradix
-  integer                        :: iradix_new
-  integer*$type, allocatable     :: x2(:), x1(:)
-  integer*$type                  :: i4               ! data type
-  integer*$int_type, allocatable :: iorder1(:),iorder2(:)
-  integer*$int_type              :: i0, i1, i2, i3, i ! index type
-  integer*$type                  :: mask
-  integer                        :: err
-  !DIR$ ATTRIBUTES ALIGN : 128   :: iorder1,iorder2, x2, x1
-
-  if (isize < 2) then
-    return
-  endif
-
-  if (iradix == -1) then ! Sort Positive and negative
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-    do i=1_$int_type,isize
-      if (x(i) < 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = -x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i2
-      iorder(i1+i) = iorder2(i)
-      x(i1+i) = x2(i)
-    enddo
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i1 > 1_$int_type) then
-      call $Xradix_sort$big(x1,iorder1,i1,-2)
-      do i=1_$int_type,i1
-        x(i) = -x1(1_$int_type+i1-i)
-        iorder(i) = iorder1(1_$int_type+i1-i)
-      enddo
-    endif
-
-    if (i2>1_$int_type) then
-      call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
-    endif
-
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-    return
-
-  else if (iradix == -2) then ! Positive
-
-    ! Find most significant bit
-
-    i0 = 0_$int_type
-    i4 = maxval(x)
-
-    iradix_new = max($integer_size-1-leadz(i4),1)
-    mask = ibset(0_$type,iradix_new)
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-
-    do i=1_$int_type,isize
-      if (iand(mask,x(i)) == 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i1
-      iorder(i0+i) = iorder1(i)
-      x(i0+i) = x1(i)
-    enddo
-    i0 = i0+i1
-    i3 = i0
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-
-
-    do i=1_$int_type,i2
-      iorder(i0+i) = iorder2(i)
-      x(i0+i) = x2(i)
-    enddo
-    i0 = i0+i2
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i3>1_$int_type) then
-      call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
-    endif
-
-    if (isize-i3>1_$int_type) then
-      call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
-    endif
-
-    return
-  endif
-
-  ASSERT (iradix >= 0)
-
-  if (isize < 48) then
-    call insertion_$Xsort$big(x,iorder,isize)
-    return
-  endif
-
-
-  allocate(x2(isize),iorder2(isize),stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x1, iorder1'
-    stop
-  endif
-
-
-  mask = ibset(0_$type,iradix)
-  i0=1_$int_type
-  i1=1_$int_type
-
-  do i=1_$int_type,isize
-    if (iand(mask,x(i)) == 0_$type) then
-      iorder(i0) = iorder(i)
-      x(i0) = x(i)
-      i0 = i0+1_$int_type
-    else
-      iorder2(i1) = iorder(i)
-      x2(i1) = x(i)
-      i1 = i1+1_$int_type
-    endif
-  enddo
-  i0=i0-1_$int_type
-  i1=i1-1_$int_type
-
-  do i=1_$int_type,i1
-    iorder(i0+i) = iorder2(i)
-    x(i0+i) = x2(i)
-  enddo
-
-  deallocate(x2,iorder2,stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x2, iorder2'
-    stop
-  endif
-
-
-  if (iradix == 0) then
-    return
-  endif
-
-
-  if (i1>1_$int_type) then
-    call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
-  endif
-  if (i0>1) then
-    call $Xradix_sort$big(x,iorder,i0,iradix-1)
-  endif
-
- end
-
-SUBST [ X, type, integer_size, is_big, big, int_type ]
- i  ; 4 ; 32 ; .False. ;      ; 4 ;;
- i8 ; 8 ; 64 ; .False. ;      ; 4 ;;
- i2 ; 2 ; 16 ; .False. ;      ; 4 ;;
- i  ; 4 ; 32 ; .True.  ; _big ; 8 ;;
- i8 ; 8 ; 64 ; .True.  ; _big ; 8 ;;
-END_TEMPLATE
-
-
-
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@ -430,3 +430,28 @@ subroutine lowercase(txt,n)
  enddo
 end

+subroutine v2_over_x(v,x,res)
+
+  !BEGIN_DOC
+  ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0
+  !END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: v, x
+  double precision, intent(out) :: res
+
+  double precision :: delta_E, tmp, val
+
+  res = 0d0
+  delta_E = x
+  if (v == 0.d0) return
+
+  val = 2d0 * v
+  tmp = dsqrt(delta_E * delta_E + val * val)
+  if (delta_E < 0.d0) then
+      tmp = -tmp
+  endif
+  res = 0.5d0 * (tmp - delta_E)
+
+end
--- a/src/utils_trust_region/EZFIO.cfg
+++ b/src/utils_trust_region/EZFIO.cfg
@ -0,0 +1,89 @@
+[thresh_delta]
+type: double precision
+doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[thresh_rho]
+type: double precision
+doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho)
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_eig]
+type: double precision
+doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[thresh_model]
+type: double precision
+doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[absolute_eig]
+type: logical
+doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region)
+interface: ezfio,provider,ocaml
+default: false
+
+[thresh_wtg]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_wtg2]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[avoid_saddle]
+type: logical
+doc: Test to avoid saddle point, active if true
+interface: ezfio,provider,ocaml
+default: false
+
+[version_avoid_saddle]
+type: integer
+doc: cf. trust region, not stable
+interface: ezfio,provider,ocaml
+default: 3
+
+[thresh_rho_2]
+type: double precision
+doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_cc]
+type: double precision
+doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc)
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_model_2]
+type: double precision
+doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[version_lambda_search]
+type: integer
+doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 ->  1/||x||^2 - 1/delta^2 .eq. 0
+interface: ezfio,provider,ocaml
+default: 2
+
+[nb_it_max_lambda]
+type: integer
+doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 100
+
+[nb_it_max_pre_search]
+type: integer
+doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 40
--- a/src/utils_trust_region/NEED
+++ b/src/utils_trust_region/NEED
@ -0,0 +1 @@
+hartree_fock
--- a/src/utils_trust_region/README.rst
+++ b/src/utils_trust_region/README.rst
@ -0,0 +1,5 @@
+============
+trust_region
+============
+
+The documentation can be found in the org files.
--- a/src/utils_trust_region/TANGLE_org_mode.sh
+++ b/src/utils_trust_region/TANGLE_org_mode.sh
@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
--- a/src/utils_trust_region/algo_trust.irp.f
+++ b/src/utils_trust_region/algo_trust.irp.f
@ -0,0 +1,248 @@
+! Algorithm for the trust region
+
+! step_in_trust_region:
+! Computes the step in the trust region (delta)
+! (automatically sets at the iteration 0 and which evolves during the
+! process in function of the evolution of rho). The step is computing by
+! constraining its norm with a lagrange multiplier.
+! Since the calculation of the step is based on the Newton method, an
+! estimation of the gain in energy is given using the Taylors series
+! truncated at the second order (criterion_model).
+! If (DABS(criterion-criterion_model) < 1d-12) then 
+!   must_exit = .True.
+! else 
+!   must_exit = .False.
+
+! This estimation of the gain in energy is used by
+! is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+! If the step must be cancelled, the calculation restart from the same
+! hessian and gradient and recomputes the step but in a smaller trust
+! region and so on until the step is accepted. If the step is accepted
+! the hessian and the gradient are recomputed to produce a new step.
+
+! Example:
+
+
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+
+
+
+! Variables:
+
+! Input:
+! | n              | integer          | m*(m-1)/2                                                                      |
+! | m              | integer          | number of mo in the mo_class                                                   |
+! | H(n,n)         | double precision | Hessian                                                                        |
+! | v_grad(n)      | double precision | Gradient                                                                       |
+! | W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+! | e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+! | criterion      | double precision | Actual criterion                                                               |
+! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+! | rho            | double precision | Given by is_step_cancel_trus_region                                            |
+! |                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+! | nb_iter        | integer          | Actual number of iterations                                                    |
+
+! Input/output:
+! | delta           | double precision | Radius of the trust region                                                      |
+
+! Output:
+! | criterion_model | double precision | Predicted criterion after the rotation                                          |
+! | x(n)            | double precision | Step                                                                            |
+! | must_exit       | logical          | If the program must exit the loop                                               |
+
+
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+
+
+
+! Variables:
+
+! Input:
+! | nb_iter         | integer          | actual number of iterations                    |
+! | prev_criterion  | double precision | criterion before the application of the step x |
+! | criterion       | double precision | criterion after the application of the step x  |
+! | criterion_model | double precision | predicted criterion after the application of x |
+
+! Output:
+! | rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+! | cancel_step | logical          | If the step must be cancelled                                        |
+
+
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
--- a/src/utils_trust_region/algo_trust.org
+++ b/src/utils_trust_region/algo_trust.org
@ -0,0 +1,593 @@
+* Algorithm for the trust region
+
+step_in_trust_region:
+Computes the step in the trust region (delta)
+(automatically sets at the iteration 0 and which evolves during the
+process in function of the evolution of rho). The step is computing by
+constraining its norm with a lagrange multiplier.
+Since the calculation of the step is based on the Newton method, an
+estimation of the gain in energy is given using the Taylors series
+truncated at the second order (criterion_model).
+If (DABS(criterion-criterion_model) < 1d-12) then 
+  must_exit = .True.
+else 
+  must_exit = .False.
+
+This estimation of the gain in energy is used by
+is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+If the step must be cancelled, the calculation restart from the same
+hessian and gradient and recomputes the step but in a smaller trust
+region and so on until the step is accepted. If the step is accepted
+the hessian and the gradient are recomputed to produce a new step.
+
+Example:
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+#+END_SRC
+
+Variables:
+
+Input:
+| n              | integer          | m*(m-1)/2                                                                      |
+| m              | integer          | number of mo in the mo_class                                                   |
+| H(n,n)         | double precision | Hessian                                                                        |
+| v_grad(n)      | double precision | Gradient                                                                       |
+| W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+| e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+| criterion      | double precision | Actual criterion                                                               |
+| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+| rho            | double precision | Given by is_step_cancel_trus_region                                            |
+|                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+| nb_iter        | integer          | Actual number of iterations                                                    |
+
+Input/output:
+| delta           | double precision | Radius of the trust region                                                      |
+
+Output:
+| criterion_model | double precision | Predicted criterion after the rotation                                          |
+| x(n)            | double precision | Step                                                                            |
+| must_exit       | logical          | If the program must exit the loop                                               |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+#+END_SRC
+
+Variables:
+
+Input:
+| nb_iter         | integer          | actual number of iterations                    |
+| prev_criterion  | double precision | criterion before the application of the step x |
+| criterion       | double precision | criterion after the application of the step x  |
+| criterion_model | double precision | predicted criterion after the application of x |
+
+Output:
+| rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+| cancel_step | logical          | If the step must be cancelled                                        |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
+#+END_SRC
+
+** Template for MOs
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt
+subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:)
+  double precision, allocatable :: prev_mos(:,:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit, enforce_step_cancellation
+  integer                       :: nb_iter, info, nb_sub_iter
+  integer                       :: i,j,tmp_i,tmp_j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size))
+  allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Provide the criterion, but unnecessary because it's done 
+  ! automatically
+  PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      ! The new hessian and gradient are computed at the end of the previous iteration
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! 1D tmp -> 2D tmp 
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Forces the step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)
+
+          ! touch mo_coef
+          call clear_mo_map ! Only if you are using the bi-electronic integrals
+          ! mo_coef becomes valid
+          ! And avoid the recomputation of the providers which depend of mo_coef
+          TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! To enforce the program to provide new criterion after the update
+          ! of the parameters
+          FREE C_PROVIDER
+          PROVIDE C_PROVIDER
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step ?
+          if (cancel_step) then
+              ! Replacement by the previous MOs
+              mo_coef = prev_mos
+              ! call save_mos() ! depends of the time for 1 iteration
+
+              ! No need to clear_mo_map since we don't recompute the gradient and the hessian
+              ! mo_coef becomes valid
+              ! Avoid the recomputation of the providers which depend of mo_coef
+              TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          else
+              ! The step is accepted:
+              ! criterion -> prev criterion
+
+              ! The replacement "criterion -> prev criterion" is already done
+              ! in trust_region_rho, so if the criterion does not have a reason
+              ! to change, it will change nothing for the criterion and will 
+              ! force the program to provide the new hessian, gradient and 
+              ! convergence criterion for the next iteration.
+              ! But in the case of orbital optimization we diagonalize the CI 
+              ! matrix after the "FREE" statement, so the criterion will change
+      
+              FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              prev_criterion = C_PROVIDER
+              
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! call save_mos() ! depends of the time for 1 iteration
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      ! Provide the convergence criterion
+      ! Provide the gradient and the hessian for the next iteration
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+
+  ! Save the final MOs
+  call save_mos()
+
+  ! Diagonalization of the hessian 
+  ! (To see the eigenvalues at the end of the optimization)
+  call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+  
+  deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos)
+
+end
+#+END_SRC
+
+** Cartesian version
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt
+subroutine algo_trust_cartesian_template(tmp_n)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n
+  
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_x(:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit
+  integer                       :: nb_iter, nb_sub_iter
+  integer                       :: i,j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n))
+
+  PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      if (nb_iter > 0) then
+        PROVIDE H_PROVIDER g_PROVIDER
+      endif
+
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! New coordinates, check the sign 
+          X_PROVIDER = X_PROVIDER - tmp_x
+
+          ! touch X_PROVIDER
+          TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER 
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! New criterion
+          PROVIDE C_PROVIDER ! Unnecessary
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancel the previous step
+          if (cancel_step) then
+              ! Replacement by the previous coordinates, check the sign 
+              X_PROVIDER = X_PROVIDER + tmp_x
+
+              ! Avoid the recomputation of the hessian and the gradient
+              TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+  
+ deallocate(e_val, W, tmp_x)
+
+end
+#+END_SRC
+
+** Script template
+#+BEGIN_SRC bash :tangle script_template_mos.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
+#+BEGIN_SRC bash :tangle script_template_xyz.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_X_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file
+sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
--- a/src/utils_trust_region/apply_mo_rotation.irp.f
+++ b/src/utils_trust_region/apply_mo_rotation.irp.f
@ -0,0 +1,85 @@
+! Apply MO rotation
+! Subroutine to apply the rotation matrix to the coefficients of the
+! MOs.
+
+! New MOs = Old MOs . Rotation matrix
+
+! *Compute the new MOs with the previous MOs and a rotation matrix*
+
+! Provided:
+! | mo_num                 | integer          | number of MOs           |
+! | ao_num                 | integer          | number of AOs           |
+! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+! Intent in:
+! | R(mo_num,mo_num) | double precision | rotation matrix |
+
+! Intent out:
+! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+! Internal:
+! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+! | i,j                    | integer          | indexes                      |
+
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
--- a/src/utils_trust_region/apply_mo_rotation.org
+++ b/src/utils_trust_region/apply_mo_rotation.org
@ -0,0 +1,86 @@
+* Apply MO rotation
+Subroutine to apply the rotation matrix to the coefficients of the
+MOs.
+
+New MOs = Old MOs . Rotation matrix
+
+*Compute the new MOs with the previous MOs and a rotation matrix*
+
+Provided:
+| mo_num                 | integer          | number of MOs           |
+| ao_num                 | integer          | number of AOs           |
+| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+Intent in:
+| R(mo_num,mo_num) | double precision | rotation matrix |
+
+Intent out:
+| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+Internal:
+| new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+| i,j                    | integer          | indexes                      |
+#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
+#+END_SRC
--- a/src/utils_trust_region/mat_to_vec_index.irp.f
+++ b/src/utils_trust_region/mat_to_vec_index.irp.f
@ -0,0 +1,61 @@
+! Matrix to vector index
+
+! *Compute the index i of a vector element from the indexes p,q of a
+! matrix element*
+
+! Lower diagonal matrix (p,q), p > q -> vector (i)
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | p,q | integer | indexes of a matrix element in the lower diagonal |
+! |     |         | p > q, q -> column                                |
+! |     |         | p -> row,                                         |
+! |     |         | q -> column                                       |
+
+! Input:
+! | i | integer | corresponding index in the vector |
+
+
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
--- a/src/utils_trust_region/mat_to_vec_index.org
+++ b/src/utils_trust_region/mat_to_vec_index.org
@ -0,0 +1,63 @@
+* Matrix to vector index
+
+*Compute the index i of a vector element from the indexes p,q of a
+matrix element*
+
+Lower diagonal matrix (p,q), p > q -> vector (i)
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| p,q | integer | indexes of a matrix element in the lower diagonal |
+|     |         | p > q, q -> column                                |
+|     |         | p -> row,                                         |
+|     |         | q -> column                                       |
+
+Input:
+| i | integer | corresponding index in the vector |
+
+#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
+#+END_SRC
+
--- a/src/utils_trust_region/pi.h
+++ b/src/utils_trust_region/pi.h
@ -0,0 +1,2 @@
+  !logical, parameter :: debug=.False.
+  double precision, parameter :: pi = 3.1415926535897932d0
--- a/src/utils_trust_region/rotation_matrix.irp.f
+++ b/src/utils_trust_region/rotation_matrix.irp.f
@ -0,0 +1,443 @@
+! Rotation matrix
+
+! *Build a rotation matrix from an antisymmetric matrix*
+
+! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+! $$
+! \textbf{R}=\exp(\textbf{A})
+! $$
+
+! So :
+! \begin{align*}
+! \textbf{R}=& \exp(\textbf{A}) \\
+! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+! \end{align*}
+
+! With :
+! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+! $\tau$ : $\sqrt{-x}$
+! $x$ : eigenvalues of $\textbf{A}^2$
+
+! Input:
+! | A(n,n) | double precision | antisymmetric matrix                                            |
+! | n      | integer          | number of columns of the A matrix                               |
+! | LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+! | LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+! Output:
+! | R(n,n) | double precision | Rotation matrix                                      |
+! | info   | integer          | if info = 0, the execution is successful             |
+! |        |                  | if info = k, the k-th parameter has an illegal value |
+! |        |                  | if info = -k, the algorithm failed                   |
+
+! Internal:
+! | B(n,n)        | double precision | B = A.A                                                       |
+! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+! | lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+! | W(n,n)        | double precision | eigenvectors of B                                             |
+! | e_val(n)      | double precision | eigenvalues of B                                              |
+! | m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+! | cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+! | sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+! | tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+! | part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+! | part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+! | part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+! | part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+! | part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+! | part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+! | RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+! | norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+! | i,j           | integer          | indexes                                                       |
+
+! Functions:
+! | dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+! | disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+
+! Pre-conditions
+
+! Initialization
+info=0
+enforce_step_cancellation = .False.
+
+! Size of matrix A must be at least 1 by 1
+if (n<1) then
+  info = 3
+  print*, 'WARNING: invalid parameter 5'
+  print*, 'n<1'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDA < n) then
+  info = 25
+  print*, 'WARNING: invalid parameter 2 or 5'
+  print*, 'LDA < n'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDR < n) then
+  info = 4
+  print*, 'WARNING: invalid parameter 4'
+  print*, 'LDR < n'
+  return
+endif
+
+! Matrix elements of A must by non-NaN
+do j = 1, n
+  do i = 1, n
+    if (disnan(A(i,j))) then
+      info=1
+      print*, 'WARNING: invalid parameter 1'
+      print*, 'NaN element in A matrix'
+      return
+    endif
+  enddo
+enddo
+
+do i = 1, n
+  if (A(i,i) /= 0d0) then
+    print*, 'WARNING: matrix A is not antisymmetric'
+    print*, 'Non 0 element on the diagonal', i, A(i,i)
+    call ABORT
+  endif
+enddo
+
+do j = 1, n
+  do i = 1, n
+    if (A(i,j)+A(j,i)>1d-16) then
+      print*, 'WANRING: matrix A is not antisymmetric'
+      print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+      print*, 'diff:', A(i,j)+A(j,i)
+      call ABORT 
+    endif
+  enddo
+enddo
+
+! Fix for too big elements ! bad idea better to cancel if the error is too big
+!do j = 1, n
+!  do i = 1, n
+!    A(i,j) = mod(A(i,j),2d0*pi)
+!    if (dabs(A(i,j)) > pi) then
+!      A(i,j) = 0d0
+!    endif
+!  enddo
+!enddo
+
+max_elem_A = 0d0
+do j = 1, n
+  do i = 1, n
+    if (ABS(A(i,j)) > ABS(max_elem_A)) then
+      max_elem_A = A(i,j)
+    endif
+  enddo
+enddo
+print*,'max element in A', max_elem_A
+
+if (ABS(max_elem_A) > 2 * pi) then
+   print*,''
+   print*,'WARNING: ABS(max_elem_A) > 2 pi '
+   print*,''
+endif
+
+! B=A.A
+!     - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+!     - Diagonalization of $\textbf{B}$ 
+!     W, the eigenvectors
+!     e_val, the eigenvalues
+
+
+! Compute B=A.A
+
+call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+! Copy B in W, diagonalization will put the eigenvectors in W
+W=B
+
+! Diagonalization of B
+! Eigenvalues -> e_val
+! Eigenvectors -> W
+lwork = 3*n-1
+allocate(work(lwork,n))
+
+print*,'Starting diagonalization ...'
+
+call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+deallocate(work)
+
+if (info2 == 0) then
+   print*, 'Diagonalization : Done'
+elseif (info2 < 0) then
+   print*, 'WARNING: error in the diagonalization'
+   print*, 'Illegal value of the ', info2,'-th parameter'
+else
+   print*, "WARNING: Diagonalization failed to converge"
+endif
+
+! Tau^-1, cos(tau), sin(tau)
+!     $$\tau = \sqrt{-x}$$
+!     - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+!     - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+!     - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+!     These matrices are diagonals
+
+! Diagonal matrix m_diag
+do j = 1, n
+   if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+      e_val(j) = 0.d0
+   else
+      e_val(j) = - e_val(j)
+   endif
+enddo
+
+m_diag = 0.d0
+do i = 1, n
+   m_diag(i,i) = e_val(i)
+enddo
+
+! cos_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+      else
+         cos_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! sin_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+      else
+         sin_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! Debug, display the cos_tau and sin_tau matrix
+!if (debug) then
+!   print*, 'cos_tau'
+!   do i = 1, n
+!      print*, cos_tau(i,:)
+!   enddo
+!   print*, 'sin_tau'
+!   do i = 1, n
+!      print*, sin_tau(i,:)
+!   enddo
+!endif
+
+! tau^-1
+do j = 1, n
+   do i = 1, n
+      if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+         tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+      else
+         tau_m1(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+max_elem = 0d0
+do i = 1, n
+   if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+      max_elem = tau_m1(i,i)
+   endif
+enddo
+print*,'max elem tau^-1:', max_elem
+
+! Debug
+!print*,'eigenvalues:'
+!do i = 1, n 
+!  print*, e_val(i)
+!enddo
+
+!Debug, display tau^-1
+!if (debug) then
+!   print*, 'tau^-1'
+!   do i = 1, n
+!      print*,tau_m1(i,:)
+!   enddo
+!endif
+
+! Rotation matrix 
+!     \begin{align*}
+!     \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+
+!     First:
+!     part_1 = dgemm(W, dgemm(cos_tau, W^t))
+!     part_1a = dgemm(cos_tau, W^t)
+!     part_1 = dgemm(W, part_1a)
+!     And:
+!     part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+!     part_2a = dgemm(W^t, A)
+!     part_2b = dgemm(sin_tau, part_2a)
+!     part_2c = dgemm(tau_m1, part_2b)
+!     part_2 = dgemm(W, part_2c)
+!     Finally:
+!     Rotation matrix, R = part_1+part_2
+
+!     If $R$ is a rotation matrix:
+!     $R.R^T=R^T.R=\textbf{1}$
+
+! part_1
+call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+! part_2
+call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+! Rotation matrix R
+R = part_1 + part_2
+
+! Matrix check
+! R.R^t and R^t.R must be equal to identity matrix
+do j = 1, n
+   do i=1,n
+      if (i==j) then
+         RR_t(i,j) = 1d0
+      else
+         RR_t(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+norm = dnrm2(n*n,RR_t,1)
+print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+! Debug
+!if (debug) then
+!   print*, 'RR_t'
+!   do i = 1, n
+!      print*, RR_t(i,:)
+!   enddo
+!endif
+
+! Post conditions
+
+! Check if R.R^T=1
+max_elem = 0d0
+do j = 1, n
+   do i = 1, n 
+      if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+         max_elem = RR_t(i,j)
+      endif
+   enddo
+enddo
+
+print*, 'Max error in R.R^T:', max_elem
+print*, 'e_val(1):', e_val(1)
+print*, 'e_val(n):', e_val(n)
+print*, 'max elem in A:', max_elem_A
+
+if (ABS(max_elem) > 1d-12) then
+  print*, 'WARNING: max error in R.R^T > 1d-12'
+  print*, 'Enforce the step cancellation'
+  enforce_step_cancellation = .True.
+endif
+
+! Matrix elements of R must by non-NaN
+do j = 1,n
+   do i = 1,LDR
+      if (disnan(R(i,j))) then
+         info = 666
+         print*, 'NaN in rotation matrix'
+         call ABORT
+      endif
+   enddo
+enddo
+
+! Display
+!if (debug) then
+!   print*,'Rotation matrix :'
+!   do i = 1, n
+!      write(*,'(100(F10.5))') R(i,:)
+!   enddo
+!endif
+
+! Deallocation, end
+
+deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
--- a/src/utils_trust_region/rotation_matrix.org
+++ b/src/utils_trust_region/rotation_matrix.org
@ -0,0 +1,454 @@
+* Rotation matrix
+
+*Build a rotation matrix from an antisymmetric matrix*
+
+Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+$$
+\textbf{R}=\exp(\textbf{A})
+$$
+
+So :
+\begin{align*}
+\textbf{R}=& \exp(\textbf{A}) \\
+=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+\end{align*}
+
+With :
+$\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+$\tau$ : $\sqrt{-x}$
+$x$ : eigenvalues of $\textbf{A}^2$
+
+Input:
+| A(n,n) | double precision | antisymmetric matrix                                            |
+| n      | integer          | number of columns of the A matrix                               |
+| LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+| LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+Output:
+| R(n,n) | double precision | Rotation matrix                                      |
+| info   | integer          | if info = 0, the execution is successful             |
+|        |                  | if info = k, the k-th parameter has an illegal value |
+|        |                  | if info = -k, the algorithm failed                   |
+
+Internal:
+| B(n,n)        | double precision | B = A.A                                                       |
+| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+| lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+| W(n,n)        | double precision | eigenvectors of B                                             |
+| e_val(n)      | double precision | eigenvalues of B                                              |
+| m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+| cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+| sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+| tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+| part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+| part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+| part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+| part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+| part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+| part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+| RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+| norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+| i,j           | integer          | indexes                                                       |
+
+Functions:
+| dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+| disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+#+END_SRC
+
+** Pre-conditions
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Initialization
+  info=0
+  enforce_step_cancellation = .False.
+
+  ! Size of matrix A must be at least 1 by 1
+  if (n<1) then
+    info = 3
+    print*, 'WARNING: invalid parameter 5'
+    print*, 'n<1'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDA < n) then
+    info = 25
+    print*, 'WARNING: invalid parameter 2 or 5'
+    print*, 'LDA < n'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDR < n) then
+    info = 4
+    print*, 'WARNING: invalid parameter 4'
+    print*, 'LDR < n'
+    return
+  endif
+
+  ! Matrix elements of A must by non-NaN
+  do j = 1, n
+    do i = 1, n
+      if (disnan(A(i,j))) then
+        info=1
+        print*, 'WARNING: invalid parameter 1'
+        print*, 'NaN element in A matrix'
+        return
+      endif
+    enddo
+  enddo
+
+  do i = 1, n
+    if (A(i,i) /= 0d0) then
+      print*, 'WARNING: matrix A is not antisymmetric'
+      print*, 'Non 0 element on the diagonal', i, A(i,i)
+      call ABORT
+    endif
+  enddo
+
+  do j = 1, n
+    do i = 1, n
+      if (A(i,j)+A(j,i)>1d-16) then
+        print*, 'WANRING: matrix A is not antisymmetric'
+        print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+        print*, 'diff:', A(i,j)+A(j,i)
+        call ABORT 
+      endif
+    enddo
+  enddo
+
+  ! Fix for too big elements ! bad idea better to cancel if the error is too big
+  !do j = 1, n
+  !  do i = 1, n
+  !    A(i,j) = mod(A(i,j),2d0*pi)
+  !    if (dabs(A(i,j)) > pi) then
+  !      A(i,j) = 0d0
+  !    endif
+  !  enddo
+  !enddo
+
+  max_elem_A = 0d0
+  do j = 1, n
+    do i = 1, n
+      if (ABS(A(i,j)) > ABS(max_elem_A)) then
+        max_elem_A = A(i,j)
+      endif
+    enddo
+  enddo
+  print*,'max element in A', max_elem_A
+
+  if (ABS(max_elem_A) > 2 * pi) then
+     print*,''
+     print*,'WARNING: ABS(max_elem_A) > 2 pi '
+     print*,''
+  endif
+
+#+END_SRC
+
+** Calculations
+
+*** B=A.A
+    - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+    - Diagonalization of $\textbf{B}$ 
+    W, the eigenvectors
+    e_val, the eigenvalues
+
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Compute B=A.A
+
+  call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+  ! Copy B in W, diagonalization will put the eigenvectors in W
+  W=B
+
+  ! Diagonalization of B
+  ! Eigenvalues -> e_val
+  ! Eigenvectors -> W
+  lwork = 3*n-1
+  allocate(work(lwork,n))
+
+  print*,'Starting diagonalization ...'
+
+  call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+  deallocate(work)
+
+  if (info2 == 0) then
+     print*, 'Diagonalization : Done'
+  elseif (info2 < 0) then
+     print*, 'WARNING: error in the diagonalization'
+     print*, 'Illegal value of the ', info2,'-th parameter'
+  else
+     print*, "WARNING: Diagonalization failed to converge"
+  endif
+    #+END_SRC
+
+*** Tau^-1, cos(tau), sin(tau)
+    $$\tau = \sqrt{-x}$$
+    - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+    - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+    - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+    These matrices are diagonals
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Diagonal matrix m_diag
+  do j = 1, n
+     if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+        e_val(j) = 0.d0
+     else
+        e_val(j) = - e_val(j)
+     endif
+  enddo
+
+  m_diag = 0.d0
+  do i = 1, n
+     m_diag(i,i) = e_val(i)
+  enddo
+
+  ! cos_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+        else
+           cos_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! sin_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+        else
+           sin_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! Debug, display the cos_tau and sin_tau matrix
+  !if (debug) then
+  !   print*, 'cos_tau'
+  !   do i = 1, n
+  !      print*, cos_tau(i,:)
+  !   enddo
+  !   print*, 'sin_tau'
+  !   do i = 1, n
+  !      print*, sin_tau(i,:)
+  !   enddo
+  !endif
+
+  ! tau^-1
+  do j = 1, n
+     do i = 1, n
+        if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+           tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+        else
+           tau_m1(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  max_elem = 0d0
+  do i = 1, n
+     if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+        max_elem = tau_m1(i,i)
+     endif
+  enddo
+  print*,'max elem tau^-1:', max_elem
+
+  ! Debug
+  !print*,'eigenvalues:'
+  !do i = 1, n 
+  !  print*, e_val(i)
+  !enddo
+
+  !Debug, display tau^-1
+  !if (debug) then
+  !   print*, 'tau^-1'
+  !   do i = 1, n
+  !      print*,tau_m1(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Rotation matrix 
+    \begin{align*}
+    \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+
+    First:
+    part_1 = dgemm(W, dgemm(cos_tau, W^t))
+    part_1a = dgemm(cos_tau, W^t)
+    part_1 = dgemm(W, part_1a)
+    And:
+    part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+    part_2a = dgemm(W^t, A)
+    part_2b = dgemm(sin_tau, part_2a)
+    part_2c = dgemm(tau_m1, part_2b)
+    part_2 = dgemm(W, part_2c)
+    Finally:
+    Rotation matrix, R = part_1+part_2
+
+    If $R$ is a rotation matrix:
+    $R.R^T=R^T.R=\textbf{1}$
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! part_1
+  call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+  ! part_2
+  call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+  call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+  call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+  ! Rotation matrix R
+  R = part_1 + part_2
+
+  ! Matrix check
+  ! R.R^t and R^t.R must be equal to identity matrix
+  do j = 1, n
+     do i=1,n
+        if (i==j) then
+           RR_t(i,j) = 1d0
+        else
+           RR_t(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+  norm = dnrm2(n*n,RR_t,1)
+  print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+  ! Debug
+  !if (debug) then
+  !   print*, 'RR_t'
+  !   do i = 1, n
+  !      print*, RR_t(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Post conditions
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Check if R.R^T=1
+  max_elem = 0d0
+  do j = 1, n
+     do i = 1, n 
+        if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+           max_elem = RR_t(i,j)
+        endif
+     enddo
+  enddo
+
+  print*, 'Max error in R.R^T:', max_elem
+  print*, 'e_val(1):', e_val(1)
+  print*, 'e_val(n):', e_val(n)
+  print*, 'max elem in A:', max_elem_A
+
+  if (ABS(max_elem) > 1d-12) then
+    print*, 'WARNING: max error in R.R^T > 1d-12'
+    print*, 'Enforce the step cancellation'
+    enforce_step_cancellation = .True.
+  endif
+
+  ! Matrix elements of R must by non-NaN
+  do j = 1,n
+     do i = 1,LDR
+        if (disnan(R(i,j))) then
+           info = 666
+           print*, 'NaN in rotation matrix'
+           call ABORT
+        endif
+     enddo
+  enddo
+
+  ! Display
+  !if (debug) then
+  !   print*,'Rotation matrix :'
+  !   do i = 1, n
+  !      write(*,'(100(F10.5))') R(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+** Deallocation, end
+   #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
+   #+END_SRC
+
--- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
@ -0,0 +1,64 @@
+! Rotation matrix in a subspace to rotation matrix in the full space
+
+! Usually, we are using a list of MOs, for exemple the active ones. When
+! we compute a rotation matrix to rotate the MOs, we just compute a
+! rotation matrix for these MOs in order to reduce the size of the
+! matrix which has to be computed. Since the computation of a rotation
+! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+! reuce the number of MOs involved.
+! After that we replace the rotation matrix in the full space by
+! building the elements of the rotation matrix in the full space from
+! the elements of the rotation matrix in the subspace and adding some 0
+! on the extradiagonal elements and some 1 on the diagonal elements,
+! for the MOs that are not involved in the rotation.
+
+! Provided:
+! | mo_num      | integer | Number of MOs                 |
+
+! Input:
+! | m           | integer          | Size of tmp_list, m <= mo_num   |
+! | tmp_list(m) | integer          | List of MOs                     |
+! | tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+! |             |                  | the MOs containing by tmp_list  |
+
+! Output:
+! | R(mo_num,mo_num | double precision | Rotation matrix in the space |
+! |                 |                  | of all the MOs               |
+
+! Internal:
+! | i,j         | integer | indexes in the full space |
+! | tmp_i,tmp_j | integer | indexes in the subspace   |
+
+
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
--- a/src/utils_trust_region/sub_to_full_rotation_matrix.org
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org
@ -0,0 +1,65 @@
+* Rotation matrix in a subspace to rotation matrix in the full space
+
+Usually, we are using a list of MOs, for exemple the active ones. When
+we compute a rotation matrix to rotate the MOs, we just compute a
+rotation matrix for these MOs in order to reduce the size of the
+matrix which has to be computed. Since the computation of a rotation
+matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+reuce the number of MOs involved.
+After that we replace the rotation matrix in the full space by
+building the elements of the rotation matrix in the full space from
+the elements of the rotation matrix in the subspace and adding some 0
+on the extradiagonal elements and some 1 on the diagonal elements,
+for the MOs that are not involved in the rotation.
+
+Provided:
+| mo_num      | integer | Number of MOs                 |
+
+Input:
+| m           | integer          | Size of tmp_list, m <= mo_num   |
+| tmp_list(m) | integer          | List of MOs                     |
+| tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+|             |                  | the MOs containing by tmp_list  |
+
+Output:
+| R(mo_num,mo_num | double precision | Rotation matrix in the space |
+|                 |                  | of all the MOs               |
+
+Internal:
+| i,j         | integer | indexes in the full space |
+| tmp_i,tmp_j | integer | indexes in the subspace   |
+
+#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f 
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
+#+END_SRC
--- a/src/utils_trust_region/trust_region_expected_e.irp.f
+++ b/src/utils_trust_region/trust_region_expected_e.irp.f
@ -0,0 +1,119 @@
+! Predicted energy : e_model
+
+! *Compute the energy predicted by the Taylor series*
+
+! The energy is predicted using a Taylor expansion truncated at te 2nd
+! order :
+
+! \begin{align*}
+! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+! \end{align*}
+
+! Input:
+! | n           | integer          | m*(m-1)/2                |
+! | v_grad(n)   | double precision | gradient                 |
+! | H(n,n)      | double precision | hessian                  |
+! | x(n)        | double precision | Step in the trust region |
+! | prev_energy | double precision | previous energy          |
+
+! Output:
+! | e_model | double precision | predicted energy after the rotation of the MOs |
+
+! Internal:
+! | part_1  | double precision | v_grad^T.x    |
+! | part_2  | double precision | 1/2 . x^T.H.x |
+! | part_2a | double precision | H.x           |
+! | i,j     | integer          | indexes       |
+
+! Function:
+! | ddot | double precision | dot product (Lapack) |
+
+
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+
+! Calculations
+
+! part_1 corresponds to the product g.x
+! part_2a corresponds to the product H.x
+! part_2 corresponds to the product 0.5*(x^T.H.x)
+
+! TODO: remove the dot products
+
+
+! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine
--- a/src/utils_trust_region/trust_region_expected_e.org
+++ b/src/utils_trust_region/trust_region_expected_e.org
@ -0,0 +1,121 @@
+* Predicted energy : e_model
+
+*Compute the energy predicted by the Taylor series*
+
+The energy is predicted using a Taylor expansion truncated at te 2nd
+order :
+
+\begin{align*}
+E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+\end{align*}
+
+Input:
+| n           | integer          | m*(m-1)/2                |
+| v_grad(n)   | double precision | gradient                 |
+| H(n,n)      | double precision | hessian                  |
+| x(n)        | double precision | Step in the trust region |
+| prev_energy | double precision | previous energy          |
+
+Output:
+| e_model | double precision | predicted energy after the rotation of the MOs |
+
+Internal:
+| part_1  | double precision | v_grad^T.x    |
+| part_2  | double precision | 1/2 . x^T.H.x |
+| part_2a | double precision | H.x           |
+| i,j     | integer          | indexes       |
+
+Function:
+| ddot | double precision | dot product (Lapack) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+#+END_SRC
+
+** Calculations
+
+part_1 corresponds to the product g.x
+part_2a corresponds to the product H.x
+part_2 corresponds to the product 0.5*(x^T.H.x)
+
+TODO: remove the dot products
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+  ! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine 
+#+END_SRC 
--- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f
+++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f
--- a/src/utils_trust_region/trust_region_optimal_lambda.org
+++ b/src/utils_trust_region/trust_region_optimal_lambda.org
--- a/src/utils_trust_region/trust_region_rho.irp.f
+++ b/src/utils_trust_region/trust_region_rho.irp.f
@ -0,0 +1,121 @@
+! Agreement with the model: Rho
+
+! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+! Rho represents the agreement between the model (the predicted energy
+! by the Taylor expansion truncated at the 2nd order) and the real
+! energy : 
+
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+! With :
+! $E^{k}$ the energy at the previous iteration
+! $E^{k+1}$ the energy at the actual iteration
+! $m^{k+1}$ the predicted energy for the actual iteration
+! (cf. trust_e_model)
+
+! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+! If $\rho \leq 0$ the previous energy is lower than the actual 
+! energy. We have to cancel the last step and use a smaller trust
+! region.
+! Here we cancel the last step if $\rho < 0.1$, because even if
+! the energy decreases, the agreement is bad, i.e., the Taylor expansion
+! truncated at the second order doesn't represent correctly the energy
+! landscape. So it's better to cancel the step and restart with a
+! smaller trust region.
+
+! Provided in qp_edit:
+! | thresh_rho |
+
+! Input:
+! | prev_energy | double precision | previous energy (energy before the rotation) |
+! | e_model     | double precision | predicted energy after the rotation          |
+
+! Output:
+! | rho         | double precision | the agreement between the model (predicted) and the real energy |
+! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+! |             |                  | else the previous energy doesn't change                         |
+
+! Internal:
+! | energy | double precision | energy (real) after the rotation |
+! | i      | integer          | index                            |
+! | t*     | double precision | time                             |
+
+
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+
+! Rho
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+
+! In function of $\rho$ th step can be accepted or cancelled.
+
+! If we cancel the last step (k+1), the previous energy (k) doesn't
+! change!
+! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+
+! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
--- a/src/utils_trust_region/trust_region_rho.org
+++ b/src/utils_trust_region/trust_region_rho.org
@ -0,0 +1,123 @@
+* Agreement with the model: Rho
+
+*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+Rho represents the agreement between the model (the predicted energy
+by the Taylor expansion truncated at the 2nd order) and the real
+energy : 
+
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+With :
+$E^{k}$ the energy at the previous iteration
+$E^{k+1}$ the energy at the actual iteration
+$m^{k+1}$ the predicted energy for the actual iteration
+(cf. trust_e_model)
+
+If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+If $\rho \leq 0$ the previous energy is lower than the actual 
+energy. We have to cancel the last step and use a smaller trust
+region.
+Here we cancel the last step if $\rho < 0.1$, because even if
+the energy decreases, the agreement is bad, i.e., the Taylor expansion
+truncated at the second order doesn't represent correctly the energy
+landscape. So it's better to cancel the step and restart with a
+smaller trust region.
+
+Provided in qp_edit:
+| thresh_rho |
+
+Input:
+| prev_energy | double precision | previous energy (energy before the rotation) |
+| e_model     | double precision | predicted energy after the rotation          |
+
+Output:
+| rho         | double precision | the agreement between the model (predicted) and the real energy |
+| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+|             |                  | else the previous energy doesn't change                         |
+
+Internal:
+| energy | double precision | energy (real) after the rotation |
+| i      | integer          | index                            |
+| t*     | double precision | time                             |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+#+END_SRC
+
+** Rho
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+
+In function of $\rho$ th step can be accepted or cancelled.
+
+If we cancel the last step (k+1), the previous energy (k) doesn't
+change!
+If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+  ! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
+#+END_SRC
--- a/src/utils_trust_region/trust_region_step.irp.f
+++ b/src/utils_trust_region/trust_region_step.irp.f
@ -0,0 +1,716 @@
+! Trust region
+
+! *Compute the next step with the trust region algorithm*
+
+! The Newton method is an iterative method to find a minimum of a given
+! function. It uses a Taylor series truncated at the second order of the
+! targeted function and gives its minimizer. The minimizer is taken as
+! the new position and the same thing is done. And by doing so
+! iteratively the method find a minimum, a local or global one depending
+! of the starting point and the convexity/nonconvexity of the targeted
+! function.  
+
+! The goal of the trust region is to constrain the step size of the
+! Newton method in a certain area around the actual position, where the 
+! Taylor series is a good approximation of the targeted function. This
+! area is called the "trust region".
+
+! In addition, in function of the agreement between the Taylor
+! development of the energy and the real energy, the size of the trust
+! region will be updated at each iteration. By doing so, the step sizes
+! are not too larges. In addition, since we add a criterion to cancel the
+! step if the energy increases (more precisely if rho < 0.1), so it's
+! impossible to diverge. \newline
+
+! References: \newline
+! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+! ISBN: 978-0-387-40065-5 \newline
+
+! By using the first and the second derivatives, the Newton method gives
+! a step:
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+!   \textbf{g}_{(k)}
+! \end{align*}
+! which leads to the minimizer of the Taylor series.
+! !!! Warning: the Newton method gives the minimizer if and only if
+! $\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+! \begin{align*}
+!   ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+! \end{align*}
+! which is equivalent to 
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+! \end{align*}
+
+! with: \newline
+! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+! size n) \newline
+! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+! matrix) \newline
+! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+! size n) \newline
+! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+! \newline
+
+! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+! hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+! $\textbf{H}$ is positive definite, the
+! solution is the step given by the Newton method
+! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+! Else we have to constrain the step size. For simplicity we will remove
+! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+! to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+! Starting from the Taylor series of a function E (here, the energy)
+! truncated at the 2nd order, we have:
+! \begin{align*}
+!   E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+!   \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+!   \mathcal{O}(\textbf{x}^2)
+! \end{align*} 
+
+! With the constraint on the norm of $\textbf{x}$ we can write the
+! Lagrangian
+! \begin{align*}
+!   \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+!   + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+!   + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+! \end{align*}
+! Where: \newline
+! $\lambda$ is the Lagrange multiplier \newline
+! $E$ is the energy at the k-th iteration $\Leftrightarrow
+! E(\textbf{x} = \textbf{0})$ \newline
+
+! To solve this equation, we search a stationary point where the first
+! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+! \end{align*}
+
+! The derivative is:
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+!   = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+! \end{align*}
+
+! So, we search $\textbf{x}$ such as:
+! \begin{align*}
+! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+! \end{align*}
+
+! We can rewrite that as:
+! \begin{align*}
+!   \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+!   = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+! \end{align*}
+! with $\textbf{I}$ is the identity matrix. 
+
+! By doing so, the solution is:
+! \begin{align*}
+!   (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+! \end{align*}
+! \begin{align*}
+!   \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+! with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+! We have to solve this previous equation to find this $\textbf{x}$ in the
+! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+! just a one dimension problem because we can express $\textbf{x}$ as a
+! function of $\lambda$: 
+! \begin{align*}
+!   \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+
+! We start from the fact that the hessian is diagonalizable. So we have: 
+! \begin{align*}
+!   \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+! \end{align*}
+! with: \newline
+! $\textbf{H}$, the hessian matrix \newline
+! $\textbf{W}$, the matrix containing the eigenvectors \newline
+! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+! $h_i$, the i-th eigenvalue in ascending order \newline
+
+! Now we use the fact that adding a constant on the diagonal just shifts
+! the eigenvalues:
+! \begin{align*}
+!   \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+!   +\textbf{I} \lambda) \cdot \textbf{W}^T
+! \end{align*}
+
+! By doing so we can express $\textbf{x}$ as a function of $\lambda$
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+! with $\lambda \neq - h_i$.
+
+! An interesting thing in our case is the norm of $\textbf{x}$,
+! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+! \begin{align*}
+!   ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+!   \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+! And if we study the properties of this function we see that: 
+! \begin{align*}
+!   \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+! \end{align*}
+! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+! \begin{align*}
+!   \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+! \end{align*}
+
+! From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+! strictly decreasing function on the interval $\lambda \in
+! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+! solution. 
+
+! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+! Newton method is only defined for a positive definite hessian matrix,
+! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+! definite. Consequently, in the case where $\textbf{H}$ is not positive
+! definite, to ensure the positive definiteness, $\lambda$ must be
+! greater than $- h_1$.
+! \begin{align*}
+!   \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+! \end{align*} 
+
+! From that there are five cases:
+! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+!   \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+!   \lambda)$ 
+!   must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+!   similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+!   0)|| \leq \Delta$
+!   but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+
+! Thus to find the solution, we can write: 
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| = \Delta
+! \end{align*}
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| - \Delta = 0
+! \end{align*}
+
+! Taking the square of this equation
+! \begin{align*}
+!   (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+! \end{align*}
+! we have a function with one minimum for the optimal $\lambda$.
+! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+! \begin{align*}
+!   (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+! \end{align*}
+
+! But in practice, it is more effective to solve:
+! \begin{align*}
+!   (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+! \end{align*}
+
+! To do that, we just use the Newton method with "trust_newton" using
+! first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+! \Delta^2)^2$ with respect to $\textbf{x}$.
+! This will give the optimal $\lambda$ to compute the
+! solution $\textbf{x}$ with the formula seen previously:
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+
+
+
+! Evolution of the trust region
+
+! We initialize the trust region at the first iteration using a radius
+! \begin{align*}
+!   \Delta = ||\textbf{x}(\lambda=0)||
+! \end{align*}
+
+! And for the next iteration the trust region will evolves depending of
+! the agreement of the energy prediction based on the Taylor series
+! truncated at the 2nd order and the real energy. If the Taylor series
+! truncated at the 2nd order represents correctly the energy landscape
+! the trust region will be extent else it will be reduced. In order to
+! mesure this agreement we use the ratio rho cf. "rho_model" and
+! "trust_e_model". From that we use the following values:
+! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+! In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+! restarts with a smaller trust region until the energy decreases.
+
+
+
+
+! Summary
+
+! To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+! gradient and the radius of the trust region we can compute the norm of
+! the Newton step  
+! \begin{align*}
+!   ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+!   \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+! \end{align*}
+
+! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+!   $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+!   necessary to put a constraint on $\textbf{x}$, the solution is the
+!   unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+!   the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+!   -h_1)$, similarly to the previous case.
+!   But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+!   have to search $\lambda \in (-h_1, \infty)$ such as
+!   $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+!   \begin{align*}
+!     (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+!   \end{align*}
+!   or
+!   \begin{align*}
+!     (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+!   \end{align*}
+!   which is numerically more stable. And finally compute 
+!   \begin{align*}
+!     \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!     \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+!   \end{align*}
+! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+!   do exactly the same thing that the previous case but we search
+!   $\lambda \in (0, \infty)$ 
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+!   sum), again we do exactly the same thing that the previous case
+!   searching $\lambda \in (-h_1, \infty)$.
+  
+
+! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+! necessary in fact to remove the $j = 1$ in the sum since the term
+! where $h_i - \lambda < 10^{-6}$ are not computed.
+
+! After that, we take this vector $\textbf{x}^*$, called "x", and we do
+! the transformation to an antisymmetric matrix $\textbf{X}$, called
+! m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+! NB: 
+! An improvement can be done using a elleptical trust region.
+
+
+
+
+! Code
+
+! Provided:
+! | mo_num | integer | number of MOs |
+
+! Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+! Input:
+! | m         | integer          | number of MOs                                |
+! | n         | integer          | m*(m-1)/2                                       |
+! | H(n, n)   | double precision | hessian                                         |
+! | v_grad(n) | double precision | gradient                                        |
+! | e_val(n)  | double precision | eigenvalues of the hessian                      |
+! | W(n, n)   | double precision | eigenvectors of the hessian                     |
+! | rho       | double precision | agreement between the model and the reality,    |
+! |           |                  | represents the quality of the energy prediction |
+! | nb_iter   | integer          | number of iteration                             |
+
+! Input/Ouput:
+! | delta | double precision | radius of the trust region |
+
+! Output:
+! | x(n)      | double precision | vector containing the step |
+
+! Internal:
+! | accu          | double precision | temporary variable to compute the step       |
+! | lambda        | double precision | lagrange multiplier                          |
+! | trust_radius2 | double precision | square of the radius of the trust region     |
+! | norm2_x       | double precision | norm^2 of the vector x                       |
+! | norm2_g       | double precision | norm^2 of the vector containing the gradient |
+! | tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+! | i, j, k       | integer          | indexes                                      |
+
+! Function:
+! | dnrm2                   | double precision | Blas function computing the norm       |
+! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+
+! Initialization and norm
+
+! The norm of the step size will be useful for the trust region
+! algorithm. We start from a first guess and the radius of the trust
+! region will evolve during the optimization.
+
+! avoid_saddle is actually a test to avoid saddle points
+
+
+! Initialization of the Lagrange multiplier
+lambda = 0d0
+
+! List of w^T.g, to avoid the recomputation
+tmp_wtg = 0d0
+do j = 1, n
+  do i = 1, n
+    tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+  enddo
+enddo
+
+! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+! in the case of avoid_saddle
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  i = 2
+  ! Number of negative eigenvalues
+  do while (e_val(i) < - thresh_eig)
+    if (tmp_wtg(i) < thresh_wtg2) then
+      if (version_avoid_saddle == 1) then  
+        tmp_wtg(i) = 1d0
+      elseif  (version_avoid_saddle == 2) then  
+        tmp_wtg(i) = DABS(e_val(i))
+      elseif  (version_avoid_saddle == 3) then  
+        tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+      else
+        tmp_wtg(i) = thresh_wtg2
+      endif
+    endif
+    i = i + 1
+  enddo
+
+  ! For the fist one it's a little bit different
+  if (tmp_wtg(1) < thresh_wtg2) then 
+    tmp_wtg(1) = 0d0
+  endif
+
+endif 
+
+! Norm^2 of x, ||x||^2
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+! Anyway if the step is too big it will be reduced
+print*,'||x||^2 :', norm2_x
+
+! Norm^2 of the gradient, ||v_grad||^2
+norm2_g = (dnrm2(n,v_grad,1))**2
+print*,'||grad||^2 :', norm2_g
+
+! Trust radius initialization
+
+!     At the first iteration (nb_iter = 0) we initialize the trust region
+!     with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+!     (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+!     we compute this norm using f_norm_trust_region_omp as explain just
+!     below) 
+
+
+! trust radius
+if (nb_iter == 0) then
+   trust_radius2 = norm2_x 
+   ! To avoid infinite loop of cancellation of this first step
+   ! without changing delta
+   nb_iter = 1
+
+   ! Compute delta, delta = sqrt(trust_radius)
+   delta = dsqrt(trust_radius2)
+endif
+
+! Modification of the trust radius
+
+! In function of rho (which represents the agreement between the model
+! and the reality, cf. rho_model) the trust region evolves. We update
+! delta (the radius of the trust region).
+
+! To avoid too big trust region we put a maximum size.
+
+
+! Modification of the trust radius in function of rho
+if (rho >= 0.75d0) then
+   delta = 2d0 * delta
+elseif (rho >= 0.5d0) then
+   delta = delta
+elseif (rho >= 0.25d0) then
+   delta = 0.5d0 * delta
+else
+   delta = 0.25d0 * delta
+endif
+
+! Maximum size of the trust region
+!if (delta > 0.5d0 * n * pi) then
+!  delta = 0.5d0 * n * pi
+!  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+!endif
+
+if (delta > 1d10) then
+  delta = 1d10
+endif
+
+print*, 'Delta :', delta
+
+! Calculation of the optimal lambda
+
+! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+! - If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+!   $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+!   unconstrained one, $\lambda = 0$
+
+! You will find more details at the beginning
+
+
+! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+! and not (||x||^2 - delta)^2 = 0
+
+! Research of lambda to solve ||x(lambda)|| = Delta 
+
+! Display
+print*, 'e_val(1) = ', e_val(1)
+print*, 'w_1^T.g =', tmp_wtg(1)
+
+! H positive definite 
+if (e_val(1) > - thresh_eig) then
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  print*, '||x(0)||=', dsqrt(norm2_x)
+  print*, 'Delta=', delta
+
+  ! H positive definite, ||x(lambda = 0)|| <= Delta
+  if (dsqrt(norm2_x) <= delta) then 
+    print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+    print*, 'lambda = 0, no lambda optimization'
+    lambda = 0d0
+
+  ! H positive definite, ||x(lambda = 0)|| > Delta
+  else
+    ! Constraint solution
+    print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+    print*,'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+  endif
+
+! H indefinite
+else
+  if (DABS(tmp_wtg(1)) < thresh_wtg) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+    print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+  endif
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+  if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+    ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+    print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+    print*, 'lambda = -e_val(1), no lambda optimization'
+    lambda = - e_val(1)
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+  ! and
+  ! H indefinite, w_1^T.g =/= 0
+  else
+    ! Constraint solution/ add lambda
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+       print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+    else
+       print*, 'H indefinite, w_1^T.g =/= 0'
+    endif
+    print*, 'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+endif
+
+! Recomputation of the norm^2 of the step x
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+print*,''
+print*,'Summary after the trust region:'
+print*,'lambda:', lambda
+print*,'||x||:', dsqrt(norm2_x)
+print*,'delta:', delta
+
+! Calculation of the step x
+
+! x refers to $\textbf{x}^*$
+! We compute x in function of lambda using its formula :
+! \begin{align*}
+! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
+! + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+
+! Initialisation
+x = 0d0
+
+! Calculation of the step x
+
+! Normal version
+if (.not. absolute_eig) then
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+      enddo
+    endif
+  enddo
+
+! Version to use the absolute value of the eigenvalues
+else
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+      enddo
+    endif
+  enddo
+
+endif
+
+double precision :: beta, norm_x
+
+! Test
+! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+! eigenvectors multiply by a constant to ensure the condition
+! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+    ! norm of x
+    norm_x = dnrm2(n,x,1)
+
+    ! Computes the coefficient for the w_1 
+    beta = delta**2 - norm_x**2
+
+    ! Updates the step x
+    x = x + W(:,1) * dsqrt(beta)
+
+    ! Recomputes the norm to check
+    norm_x = dnrm2(n,x,1)
+
+    print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+    print*, '||x||', norm_x 
+  endif
+endif
+
+! Transformation of x
+
+! x is a vector of size n, so it can be write as a m by m
+! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+
+! Deallocation, end
+
+
+deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
--- a/src/utils_trust_region/trust_region_step.org
+++ b/src/utils_trust_region/trust_region_step.org
@ -0,0 +1,726 @@
+* Trust region
+
+*Compute the next step with the trust region algorithm*
+
+The Newton method is an iterative method to find a minimum of a given
+function. It uses a Taylor series truncated at the second order of the
+targeted function and gives its minimizer. The minimizer is taken as
+the new position and the same thing is done. And by doing so
+iteratively the method find a minimum, a local or global one depending
+of the starting point and the convexity/nonconvexity of the targeted
+function.  
+
+The goal of the trust region is to constrain the step size of the
+Newton method in a certain area around the actual position, where the 
+Taylor series is a good approximation of the targeted function. This
+area is called the "trust region".
+
+In addition, in function of the agreement between the Taylor
+development of the energy and the real energy, the size of the trust
+region will be updated at each iteration. By doing so, the step sizes
+are not too larges. In addition, since we add a criterion to cancel the
+step if the energy increases (more precisely if rho < 0.1), so it's
+impossible to diverge. \newline
+
+References: \newline
+Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+ISBN: 978-0-387-40065-5 \newline
+
+By using the first and the second derivatives, the Newton method gives
+a step:
+\begin{align*}
+  \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+  \textbf{g}_{(k)}
+\end{align*}
+which leads to the minimizer of the Taylor series.
+!!! Warning: the Newton method gives the minimizer if and only if
+$\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+\begin{align*}
+  ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+\end{align*}
+which is equivalent to 
+\begin{align*}
+  \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+\end{align*}
+
+with: \newline
+$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+size n) \newline
+$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+matrix) \newline
+$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+size n) \newline
+$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+\newline
+
+Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+$\textbf{H}$ is positive definite, the
+solution is the step given by the Newton method
+$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+Else we have to constrain the step size. For simplicity we will remove
+the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+Starting from the Taylor series of a function E (here, the energy)
+truncated at the 2nd order, we have:
+\begin{align*}
+  E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+  \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+  \mathcal{O}(\textbf{x}^2)
+\end{align*} 
+
+With the constraint on the norm of $\textbf{x}$ we can write the
+Lagrangian
+\begin{align*}
+  \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+  + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+  + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+\end{align*}
+Where: \newline
+$\lambda$ is the Lagrange multiplier \newline
+$E$ is the energy at the k-th iteration $\Leftrightarrow
+E(\textbf{x} = \textbf{0})$ \newline
+
+To solve this equation, we search a stationary point where the first
+derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+\end{align*}
+
+The derivative is:
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+  = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+\end{align*}
+
+So, we search $\textbf{x}$ such as:
+\begin{align*}
+\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+= \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+\end{align*}
+
+We can rewrite that as:
+\begin{align*}
+  \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+ = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+\end{align*}
+with $\textbf{I}$ is the identity matrix. 
+
+By doing so, the solution is:
+\begin{align*}
+  (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+\end{align*}
+\begin{align*}
+  \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+We have to solve this previous equation to find this $\textbf{x}$ in the
+trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+just a one dimension problem because we can express $\textbf{x}$ as a
+function of $\lambda$: 
+\begin{align*}
+  \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+
+We start from the fact that the hessian is diagonalizable. So we have: 
+\begin{align*}
+  \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+\end{align*}
+with: \newline
+$\textbf{H}$, the hessian matrix \newline
+$\textbf{W}$, the matrix containing the eigenvectors \newline
+$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+$h_i$, the i-th eigenvalue in ascending order \newline
+
+Now we use the fact that adding a constant on the diagonal just shifts
+the eigenvalues:
+\begin{align*}
+  \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+  +\textbf{I} \lambda) \cdot \textbf{W}^T
+\end{align*}
+
+By doing so we can express $\textbf{x}$ as a function of $\lambda$
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+with $\lambda \neq - h_i$.
+
+An interesting thing in our case is the norm of $\textbf{x}$,
+because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+\begin{align*}
+  ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+  \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+And if we study the properties of this function we see that: 
+\begin{align*}
+  \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+\end{align*}
+and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+\begin{align*}
+  \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+\end{align*}
+
+From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+strictly decreasing function on the interval $\lambda \in
+(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+solution. 
+
+Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+Newton method is only defined for a positive definite hessian matrix,
+so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+definite. Consequently, in the case where $\textbf{H}$ is not positive
+definite, to ensure the positive definiteness, $\lambda$ must be
+greater than $- h_1$.
+\begin{align*}
+  \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+\end{align*} 
+
+From that there are five cases:
+- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+  \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+  \lambda)$ 
+  must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+  similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+  0)|| \leq \Delta$
+  but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+
+Thus to find the solution, we can write: 
+\begin{align*}
+  ||\textbf{x}(\lambda)|| = \Delta
+\end{align*}
+\begin{align*}
+  ||\textbf{x}(\lambda)|| - \Delta = 0
+\end{align*}
+
+Taking the square of this equation
+\begin{align*}
+  (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+\end{align*}
+we have a function with one minimum for the optimal $\lambda$.
+Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+\begin{align*}
+  (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+\end{align*}
+
+But in practice, it is more effective to solve:
+\begin{align*}
+  (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+\end{align*}
+
+To do that, we just use the Newton method with "trust_newton" using
+first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+\Delta^2)^2$ with respect to $\textbf{x}$.
+This will give the optimal $\lambda$ to compute the
+solution $\textbf{x}$ with the formula seen previously:
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Evolution of the trust region
+
+We initialize the trust region at the first iteration using a radius
+\begin{align*}
+  \Delta = ||\textbf{x}(\lambda=0)||
+\end{align*}
+
+And for the next iteration the trust region will evolves depending of
+the agreement of the energy prediction based on the Taylor series
+truncated at the 2nd order and the real energy. If the Taylor series
+truncated at the 2nd order represents correctly the energy landscape
+the trust region will be extent else it will be reduced. In order to
+mesure this agreement we use the ratio rho cf. "rho_model" and
+"trust_e_model". From that we use the following values:
+- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+restarts with a smaller trust region until the energy decreases.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Summary
+
+To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+gradient and the radius of the trust region we can compute the norm of
+the Newton step  
+\begin{align*}
+  ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+  \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+\end{align*}
+
+- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+  $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+  necessary to put a constraint on $\textbf{x}$, the solution is the
+  unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+  the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+  -h_1)$, similarly to the previous case.
+  But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+  have to search $\lambda \in (-h_1, \infty)$ such as
+  $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+  \begin{align*}
+    (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+  \end{align*}
+  or
+  \begin{align*}
+    (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+  \end{align*}
+  which is numerically more stable. And finally compute 
+  \begin{align*}
+    \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+    \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+  \end{align*}
+- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+  do exactly the same thing that the previous case but we search
+  $\lambda \in (0, \infty)$ 
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+  sum), again we do exactly the same thing that the previous case
+  searching $\lambda \in (-h_1, \infty)$.
+  
+
+For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+necessary in fact to remove the $j = 1$ in the sum since the term
+where $h_i - \lambda < 10^{-6}$ are not computed.
+
+After that, we take this vector $\textbf{x}^*$, called "x", and we do
+the transformation to an antisymmetric matrix $\textbf{X}$, called
+m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+NB: 
+An improvement can be done using a elleptical trust region.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Code
+
+Provided:
+| mo_num | integer | number of MOs |
+
+Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+Input:
+| m         | integer          | number of MOs                                |
+| n         | integer          | m*(m-1)/2                                       |
+| H(n, n)   | double precision | hessian                                         |
+| v_grad(n) | double precision | gradient                                        |
+| e_val(n)  | double precision | eigenvalues of the hessian                      |
+| W(n, n)   | double precision | eigenvectors of the hessian                     |
+| rho       | double precision | agreement between the model and the reality,    |
+|           |                  | represents the quality of the energy prediction |
+| nb_iter   | integer          | number of iteration                             |
+
+Input/Ouput:
+| delta | double precision | radius of the trust region |
+
+Output:
+| x(n)      | double precision | vector containing the step |
+
+Internal:
+| accu          | double precision | temporary variable to compute the step       |
+| lambda        | double precision | lagrange multiplier                          |
+| trust_radius2 | double precision | square of the radius of the trust region     |
+| norm2_x       | double precision | norm^2 of the vector x                       |
+| norm2_g       | double precision | norm^2 of the vector containing the gradient |
+| tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+| i, j, k       | integer          | indexes                                      |
+
+Function:
+| dnrm2                   | double precision | Blas function computing the norm       |
+| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+#+END_SRC
+
+
+*** Initialization and norm
+
+The norm of the step size will be useful for the trust region
+algorithm. We start from a first guess and the radius of the trust
+region will evolve during the optimization.
+
+avoid_saddle is actually a test to avoid saddle points
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f                                                                                                                                                                                            
+  ! Initialization of the Lagrange multiplier
+  lambda = 0d0
+
+  ! List of w^T.g, to avoid the recomputation
+  tmp_wtg = 0d0
+  do j = 1, n
+    do i = 1, n
+      tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+    enddo
+  enddo
+
+  ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+  ! in the case of avoid_saddle
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    i = 2
+    ! Number of negative eigenvalues
+    do while (e_val(i) < - thresh_eig)
+      if (tmp_wtg(i) < thresh_wtg2) then
+        if (version_avoid_saddle == 1) then  
+          tmp_wtg(i) = 1d0
+        elseif  (version_avoid_saddle == 2) then  
+          tmp_wtg(i) = DABS(e_val(i))
+        elseif  (version_avoid_saddle == 3) then  
+          tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+        else
+          tmp_wtg(i) = thresh_wtg2
+        endif
+      endif
+      i = i + 1
+    enddo
+
+    ! For the fist one it's a little bit different
+    if (tmp_wtg(1) < thresh_wtg2) then 
+      tmp_wtg(1) = 0d0
+    endif
+
+  endif 
+
+  ! Norm^2 of x, ||x||^2
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+  ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+  ! Anyway if the step is too big it will be reduced
+  print*,'||x||^2 :', norm2_x
+
+  ! Norm^2 of the gradient, ||v_grad||^2
+  norm2_g = (dnrm2(n,v_grad,1))**2
+  print*,'||grad||^2 :', norm2_g
+#+END_SRC
+
+*** Trust radius initialization
+
+    At the first iteration (nb_iter = 0) we initialize the trust region
+    with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+    (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+    we compute this norm using f_norm_trust_region_omp as explain just
+    below) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! trust radius
+  if (nb_iter == 0) then
+     trust_radius2 = norm2_x 
+     ! To avoid infinite loop of cancellation of this first step
+     ! without changing delta
+     nb_iter = 1
+
+     ! Compute delta, delta = sqrt(trust_radius)
+     delta = dsqrt(trust_radius2)
+  endif
+#+END_SRC
+
+*** Modification of the trust radius
+
+In function of rho (which represents the agreement between the model
+and the reality, cf. rho_model) the trust region evolves. We update
+delta (the radius of the trust region).
+
+To avoid too big trust region we put a maximum size.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Modification of the trust radius in function of rho
+  if (rho >= 0.75d0) then
+     delta = 2d0 * delta
+  elseif (rho >= 0.5d0) then
+     delta = delta
+  elseif (rho >= 0.25d0) then
+     delta = 0.5d0 * delta
+  else
+     delta = 0.25d0 * delta
+  endif
+
+  ! Maximum size of the trust region
+  !if (delta > 0.5d0 * n * pi) then
+  !  delta = 0.5d0 * n * pi
+  !  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+  !endif
+
+  if (delta > 1d10) then
+    delta = 1d10
+  endif
+
+  print*, 'Delta :', delta
+#+END_SRC 
+  
+*** Calculation of the optimal lambda
+
+We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+- If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+  $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+  unconstrained one, $\lambda = 0$
+
+You will find more details at the beginning
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+  ! and not (||x||^2 - delta)^2 = 0
+  
+  ! Research of lambda to solve ||x(lambda)|| = Delta 
+
+  ! Display
+  print*, 'e_val(1) = ', e_val(1)
+  print*, 'w_1^T.g =', tmp_wtg(1)
+  
+  ! H positive definite 
+  if (e_val(1) > - thresh_eig) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+    print*, '||x(0)||=', dsqrt(norm2_x)
+    print*, 'Delta=', delta
+
+    ! H positive definite, ||x(lambda = 0)|| <= Delta
+    if (dsqrt(norm2_x) <= delta) then 
+      print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+      print*, 'lambda = 0, no lambda optimization'
+      lambda = 0d0
+
+    ! H positive definite, ||x(lambda = 0)|| > Delta
+    else
+      ! Constraint solution
+      print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+      print*,'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+  ! H indefinite
+  else
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+      norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+      print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+    endif
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+    if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+      ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+      print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+      print*, 'lambda = -e_val(1), no lambda optimization'
+      lambda = - e_val(1)
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+    ! and
+    ! H indefinite, w_1^T.g =/= 0
+    else
+      ! Constraint solution/ add lambda
+      if (DABS(tmp_wtg(1)) < thresh_wtg) then
+         print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+      else
+         print*, 'H indefinite, w_1^T.g =/= 0'
+      endif
+      print*, 'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+      endif
+     
+  endif
+  
+  ! Recomputation of the norm^2 of the step x
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+  print*,''
+  print*,'Summary after the trust region:'
+  print*,'lambda:', lambda
+  print*,'||x||:', dsqrt(norm2_x)
+  print*,'delta:', delta
+#+END_SRC
+
+*** Calculation of the step x
+
+x refers to $\textbf{x}^*$
+We compute x in function of lambda using its formula :
+\begin{align*}
+\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
+ \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Initialisation
+  x = 0d0
+
+  ! Calculation of the step x
+
+  ! Normal version
+  if (.not. absolute_eig) then
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+        enddo
+      endif
+    enddo
+
+  ! Version to use the absolute value of the eigenvalues
+  else
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+        enddo
+      endif
+    enddo
+
+  endif
+
+  double precision :: beta, norm_x
+ 
+  ! Test
+  ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+  ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+  ! eigenvectors multiply by a constant to ensure the condition
+  ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+      ! norm of x
+      norm_x = dnrm2(n,x,1)
+
+      ! Computes the coefficient for the w_1 
+      beta = delta**2 - norm_x**2
+
+      ! Updates the step x
+      x = x + W(:,1) * dsqrt(beta)
+
+      ! Recomputes the norm to check
+      norm_x = dnrm2(n,x,1)
+
+      print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+      print*, '||x||', norm_x 
+    endif
+  endif
+#+END_SRC
+
+*** Transformation of x
+
+x is a vector of size n, so it can be write as a m by m
+antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+    #+END_SRC
+
+*** Deallocation, end
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
+    #+END_SRC
+ 
--- a/src/utils_trust_region/vec_to_mat_index.irp.f
+++ b/src/utils_trust_region/vec_to_mat_index.irp.f
@ -0,0 +1,71 @@
+! Vector to matrix indexes
+  
+! *Compute the indexes p,q of a matrix element with the vector index i*
+
+! Vector (i) -> lower diagonal matrix (p,q), p > q
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | i | integer | index in the vector |
+
+! Ouput:
+! | p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+! |     |         | p > q,                                                  |
+! |     |         | p -> row,                                               |
+! |     |         | q -> column                                             |
+
+
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
--- a/src/utils_trust_region/vec_to_mat_index.org
+++ b/src/utils_trust_region/vec_to_mat_index.org
@ -0,0 +1,72 @@
+* Vector to matrix indexes
+  
+*Compute the indexes p,q of a matrix element with the vector index i*
+
+Vector (i) -> lower diagonal matrix (p,q), p > q
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| i | integer | index in the vector |
+
+Ouput:
+| p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+|     |         | p > q,                                                  |
+|     |         | p -> row,                                               |
+|     |         | q -> column                                             |
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
+#+END_SRC
--- a/src/utils_trust_region/vec_to_mat_v2.irp.f
+++ b/src/utils_trust_region/vec_to_mat_v2.irp.f
@ -0,0 +1,39 @@
+! Vect to antisymmetric matrix using mat_to_vec_index
+
+! Vector to antisymmetric matrix transformation using mat_to_vec_index
+! subroutine.
+
+! Can be done in OMP (for the first part and with omp critical for the second)
+
+
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
--- a/src/utils_trust_region/vec_to_mat_v2.org
+++ b/src/utils_trust_region/vec_to_mat_v2.org
@ -0,0 +1,40 @@
+* Vect to antisymmetric matrix using mat_to_vec_index
+
+Vector to antisymmetric matrix transformation using mat_to_vec_index
+subroutine.
+
+Can be done in OMP (for the first part and with omp critical for the second)
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f 
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
+#+END_SRC