diff --git a/configure b/configure
index b3ed7758..7d063b1d 100755
--- a/configure
+++ b/configure
@@ -369,7 +369,7 @@ else
       echo ""
       echo "${QP_ROOT}/build.ninja does not exist,"
       echo "you need to specify the COMPILATION configuration file."
-      echo "See  ./configure --help  for more details."
+      echo "See  ./configure -h  for more details."
       echo ""
 fi
 
diff --git a/external/Python/.gitignore b/external/Python/.gitignore
index e69de29b..9d09b447 100644
--- a/external/Python/.gitignore
+++ b/external/Python/.gitignore
@@ -0,0 +1 @@
+docopt.py
diff --git a/external/Python/docopt.py b/external/Python/docopt.py
deleted file mode 100644
index 7b927e2f..00000000
--- a/external/Python/docopt.py
+++ /dev/null
@@ -1,579 +0,0 @@
-"""Pythonic command-line interface parser that will make you smile.
-
- * http://docopt.org
- * Repository and issue-tracker: https://github.com/docopt/docopt
- * Licensed under terms of MIT license (see LICENSE-MIT)
- * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com
-
-"""
-import sys
-import re
-
-
-__all__ = ['docopt']
-__version__ = '0.6.2'
-
-
-class DocoptLanguageError(Exception):
-
-    """Error in construction of usage-message by developer."""
-
-
-class DocoptExit(SystemExit):
-
-    """Exit in case user invoked program with incorrect arguments."""
-
-    usage = ''
-
-    def __init__(self, message=''):
-        SystemExit.__init__(self, (message + '\n' + self.usage).strip())
-
-
-class Pattern(object):
-
-    def __eq__(self, other):
-        return repr(self) == repr(other)
-
-    def __hash__(self):
-        return hash(repr(self))
-
-    def fix(self):
-        self.fix_identities()
-        self.fix_repeating_arguments()
-        return self
-
-    def fix_identities(self, uniq=None):
-        """Make pattern-tree tips point to same object if they are equal."""
-        if not hasattr(self, 'children'):
-            return self
-        uniq = list(set(self.flat())) if uniq is None else uniq
-        for i, c in enumerate(self.children):
-            if not hasattr(c, 'children'):
-                assert c in uniq
-                self.children[i] = uniq[uniq.index(c)]
-            else:
-                c.fix_identities(uniq)
-
-    def fix_repeating_arguments(self):
-        """Fix elements that should accumulate/increment values."""
-        either = [list(c.children) for c in self.either.children]
-        for case in either:
-            for e in [c for c in case if case.count(c) > 1]:
-                if type(e) is Argument or type(e) is Option and e.argcount:
-                    if e.value is None:
-                        e.value = []
-                    elif type(e.value) is not list:
-                        e.value = e.value.split()
-                if type(e) is Command or type(e) is Option and e.argcount == 0:
-                    e.value = 0
-        return self
-
-    @property
-    def either(self):
-        """Transform pattern into an equivalent, with only top-level Either."""
-        # Currently the pattern will not be equivalent, but more "narrow",
-        # although good enough to reason about list arguments.
-        ret = []
-        groups = [[self]]
-        while groups:
-            children = groups.pop(0)
-            types = [type(c) for c in children]
-            if Either in types:
-                either = [c for c in children if type(c) is Either][0]
-                children.pop(children.index(either))
-                for c in either.children:
-                    groups.append([c] + children)
-            elif Required in types:
-                required = [c for c in children if type(c) is Required][0]
-                children.pop(children.index(required))
-                groups.append(list(required.children) + children)
-            elif Optional in types:
-                optional = [c for c in children if type(c) is Optional][0]
-                children.pop(children.index(optional))
-                groups.append(list(optional.children) + children)
-            elif AnyOptions in types:
-                optional = [c for c in children if type(c) is AnyOptions][0]
-                children.pop(children.index(optional))
-                groups.append(list(optional.children) + children)
-            elif OneOrMore in types:
-                oneormore = [c for c in children if type(c) is OneOrMore][0]
-                children.pop(children.index(oneormore))
-                groups.append(list(oneormore.children) * 2 + children)
-            else:
-                ret.append(children)
-        return Either(*[Required(*e) for e in ret])
-
-
-class ChildPattern(Pattern):
-
-    def __init__(self, name, value=None):
-        self.name = name
-        self.value = value
-
-    def __repr__(self):
-        return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value)
-
-    def flat(self, *types):
-        return [self] if not types or type(self) in types else []
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        pos, match = self.single_match(left)
-        if match is None:
-            return False, left, collected
-        left_ = left[:pos] + left[pos + 1:]
-        same_name = [a for a in collected if a.name == self.name]
-        if type(self.value) in (int, list):
-            if type(self.value) is int:
-                increment = 1
-            else:
-                increment = ([match.value] if type(match.value) is str
-                             else match.value)
-            if not same_name:
-                match.value = increment
-                return True, left_, collected + [match]
-            same_name[0].value += increment
-            return True, left_, collected
-        return True, left_, collected + [match]
-
-
-class ParentPattern(Pattern):
-
-    def __init__(self, *children):
-        self.children = list(children)
-
-    def __repr__(self):
-        return '%s(%s)' % (self.__class__.__name__,
-                           ', '.join(repr(a) for a in self.children))
-
-    def flat(self, *types):
-        if type(self) in types:
-            return [self]
-        return sum([c.flat(*types) for c in self.children], [])
-
-
-class Argument(ChildPattern):
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if type(p) is Argument:
-                return n, Argument(self.name, p.value)
-        return None, None
-
-    @classmethod
-    def parse(class_, source):
-        name = re.findall('(<\S*?>)', source)[0]
-        value = re.findall('\[default: (.*)\]', source, flags=re.I)
-        return class_(name, value[0] if value else None)
-
-
-class Command(Argument):
-
-    def __init__(self, name, value=False):
-        self.name = name
-        self.value = value
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if type(p) is Argument:
-                if p.value == self.name:
-                    return n, Command(self.name, True)
-                else:
-                    break
-        return None, None
-
-
-class Option(ChildPattern):
-
-    def __init__(self, short=None, long=None, argcount=0, value=False):
-        assert argcount in (0, 1)
-        self.short, self.long = short, long
-        self.argcount, self.value = argcount, value
-        self.value = None if value is False and argcount else value
-
-    @classmethod
-    def parse(class_, option_description):
-        short, long, argcount, value = None, None, 0, False
-        options, _, description = option_description.strip().partition('  ')
-        options = options.replace(',', ' ').replace('=', ' ')
-        for s in options.split():
-            if s.startswith('--'):
-                long = s
-            elif s.startswith('-'):
-                short = s
-            else:
-                argcount = 1
-        if argcount:
-            matched = re.findall('\[default: (.*)\]', description, flags=re.I)
-            value = matched[0] if matched else None
-        return class_(short, long, argcount, value)
-
-    def single_match(self, left):
-        for n, p in enumerate(left):
-            if self.name == p.name:
-                return n, p
-        return None, None
-
-    @property
-    def name(self):
-        return self.long or self.short
-
-    def __repr__(self):
-        return 'Option(%r, %r, %r, %r)' % (self.short, self.long,
-                                           self.argcount, self.value)
-
-
-class Required(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        l = left
-        c = collected
-        for p in self.children:
-            matched, l, c = p.match(l, c)
-            if not matched:
-                return False, left, collected
-        return True, l, c
-
-
-class Optional(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        for p in self.children:
-            m, left, collected = p.match(left, collected)
-        return True, left, collected
-
-
-class AnyOptions(Optional):
-
-    """Marker/placeholder for [options] shortcut."""
-
-
-class OneOrMore(ParentPattern):
-
-    def match(self, left, collected=None):
-        assert len(self.children) == 1
-        collected = [] if collected is None else collected
-        l = left
-        c = collected
-        l_ = None
-        matched = True
-        times = 0
-        while matched:
-            # could it be that something didn't match but changed l or c?
-            matched, l, c = self.children[0].match(l, c)
-            times += 1 if matched else 0
-            if l_ == l:
-                break
-            l_ = l
-        if times >= 1:
-            return True, l, c
-        return False, left, collected
-
-
-class Either(ParentPattern):
-
-    def match(self, left, collected=None):
-        collected = [] if collected is None else collected
-        outcomes = []
-        for p in self.children:
-            matched, _, _ = outcome = p.match(left, collected)
-            if matched:
-                outcomes.append(outcome)
-        if outcomes:
-            return min(outcomes, key=lambda outcome: len(outcome[1]))
-        return False, left, collected
-
-
-class TokenStream(list):
-
-    def __init__(self, source, error):
-        self += source.split() if hasattr(source, 'split') else source
-        self.error = error
-
-    def move(self):
-        return self.pop(0) if len(self) else None
-
-    def current(self):
-        return self[0] if len(self) else None
-
-
-def parse_long(tokens, options):
-    """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;"""
-    long, eq, value = tokens.move().partition('=')
-    assert long.startswith('--')
-    value = None if eq == value == '' else value
-    similar = [o for o in options if o.long == long]
-    if tokens.error is DocoptExit and similar == []:  # if no exact match
-        similar = [o for o in options if o.long and o.long.startswith(long)]
-    if len(similar) > 1:  # might be simply specified ambiguously 2+ times?
-        raise tokens.error('%s is not a unique prefix: %s?' %
-                           (long, ', '.join(o.long for o in similar)))
-    elif len(similar) < 1:
-        argcount = 1 if eq == '=' else 0
-        o = Option(None, long, argcount)
-        options.append(o)
-        if tokens.error is DocoptExit:
-            o = Option(None, long, argcount, value if argcount else True)
-    else:
-        o = Option(similar[0].short, similar[0].long,
-                   similar[0].argcount, similar[0].value)
-        if o.argcount == 0:
-            if value is not None:
-                raise tokens.error('%s must not have an argument' % o.long)
-        else:
-            if value is None:
-                if tokens.current() is None:
-                    raise tokens.error('%s requires argument' % o.long)
-                value = tokens.move()
-        if tokens.error is DocoptExit:
-            o.value = value if value is not None else True
-    return [o]
-
-
-def parse_shorts(tokens, options):
-    """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;"""
-    token = tokens.move()
-    assert token.startswith('-') and not token.startswith('--')
-    left = token.lstrip('-')
-    parsed = []
-    while left != '':
-        short, left = '-' + left[0], left[1:]
-        similar = [o for o in options if o.short == short]
-        if len(similar) > 1:
-            raise tokens.error('%s is specified ambiguously %d times' %
-                               (short, len(similar)))
-        elif len(similar) < 1:
-            o = Option(short, None, 0)
-            options.append(o)
-            if tokens.error is DocoptExit:
-                o = Option(short, None, 0, True)
-        else:  # why copying is necessary here?
-            o = Option(short, similar[0].long,
-                       similar[0].argcount, similar[0].value)
-            value = None
-            if o.argcount != 0:
-                if left == '':
-                    if tokens.current() is None:
-                        raise tokens.error('%s requires argument' % short)
-                    value = tokens.move()
-                else:
-                    value = left
-                    left = ''
-            if tokens.error is DocoptExit:
-                o.value = value if value is not None else True
-        parsed.append(o)
-    return parsed
-
-
-def parse_pattern(source, options):
-    tokens = TokenStream(re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source),
-                         DocoptLanguageError)
-    result = parse_expr(tokens, options)
-    if tokens.current() is not None:
-        raise tokens.error('unexpected ending: %r' % ' '.join(tokens))
-    return Required(*result)
-
-
-def parse_expr(tokens, options):
-    """expr ::= seq ( '|' seq )* ;"""
-    seq = parse_seq(tokens, options)
-    if tokens.current() != '|':
-        return seq
-    result = [Required(*seq)] if len(seq) > 1 else seq
-    while tokens.current() == '|':
-        tokens.move()
-        seq = parse_seq(tokens, options)
-        result += [Required(*seq)] if len(seq) > 1 else seq
-    return [Either(*result)] if len(result) > 1 else result
-
-
-def parse_seq(tokens, options):
-    """seq ::= ( atom [ '...' ] )* ;"""
-    result = []
-    while tokens.current() not in [None, ']', ')', '|']:
-        atom = parse_atom(tokens, options)
-        if tokens.current() == '...':
-            atom = [OneOrMore(*atom)]
-            tokens.move()
-        result += atom
-    return result
-
-
-def parse_atom(tokens, options):
-    """atom ::= '(' expr ')' | '[' expr ']' | 'options'
-             | long | shorts | argument | command ;
-    """
-    token = tokens.current()
-    result = []
-    if token in '([':
-        tokens.move()
-        matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token]
-        result = pattern(*parse_expr(tokens, options))
-        if tokens.move() != matching:
-            raise tokens.error("unmatched '%s'" % token)
-        return [result]
-    elif token == 'options':
-        tokens.move()
-        return [AnyOptions()]
-    elif token.startswith('--') and token != '--':
-        return parse_long(tokens, options)
-    elif token.startswith('-') and token not in ('-', '--'):
-        return parse_shorts(tokens, options)
-    elif token.startswith('<') and token.endswith('>') or token.isupper():
-        return [Argument(tokens.move())]
-    else:
-        return [Command(tokens.move())]
-
-
-def parse_argv(tokens, options, options_first=False):
-    """Parse command-line argument vector.
-
-    If options_first:
-        argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
-    else:
-        argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
-
-    """
-    parsed = []
-    while tokens.current() is not None:
-        if tokens.current() == '--':
-            return parsed + [Argument(None, v) for v in tokens]
-        elif tokens.current().startswith('--'):
-            parsed += parse_long(tokens, options)
-        elif tokens.current().startswith('-') and tokens.current() != '-':
-            parsed += parse_shorts(tokens, options)
-        elif options_first:
-            return parsed + [Argument(None, v) for v in tokens]
-        else:
-            parsed.append(Argument(None, tokens.move()))
-    return parsed
-
-
-def parse_defaults(doc):
-    # in python < 2.7 you can't pass flags=re.MULTILINE
-    split = re.split('\n *(<\S+?>|-\S+?)', doc)[1:]
-    split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
-    options = [Option.parse(s) for s in split if s.startswith('-')]
-    #arguments = [Argument.parse(s) for s in split if s.startswith('<')]
-    #return options, arguments
-    return options
-
-
-def printable_usage(doc):
-    # in python < 2.7 you can't pass flags=re.IGNORECASE
-    usage_split = re.split(r'([Uu][Ss][Aa][Gg][Ee]:)', doc)
-    if len(usage_split) < 3:
-        raise DocoptLanguageError('"usage:" (case-insensitive) not found.')
-    if len(usage_split) > 3:
-        raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
-    return re.split(r'\n\s*\n', ''.join(usage_split[1:]))[0].strip()
-
-
-def formal_usage(printable_usage):
-    pu = printable_usage.split()[1:]  # split and drop "usage:"
-    return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )'
-
-
-def extras(help, version, options, doc):
-    if help and any((o.name in ('-h', '--help')) and o.value for o in options):
-        print(doc.strip("\n"))
-        sys.exit()
-    if version and any(o.name == '--version' and o.value for o in options):
-        print(version)
-        sys.exit()
-
-
-class Dict(dict):
-    def __repr__(self):
-        return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items()))
-
-
-def docopt(doc, argv=None, help=True, version=None, options_first=False):
-    """Parse `argv` based on command-line interface described in `doc`.
-
-    `docopt` creates your command-line interface based on its
-    description that you pass as `doc`. Such description can contain
-    --options, <positional-argument>, commands, which could be
-    [optional], (required), (mutually | exclusive) or repeated...
-
-    Parameters
-    ----------
-    doc : str
-        Description of your command-line interface.
-    argv : list of str, optional
-        Argument vector to be parsed. sys.argv[1:] is used if not
-        provided.
-    help : bool (default: True)
-        Set to False to disable automatic help on -h or --help
-        options.
-    version : any object
-        If passed, the object will be printed if --version is in
-        `argv`.
-    options_first : bool (default: False)
-        Set to True to require options preceed positional arguments,
-        i.e. to forbid options and positional arguments intermix.
-
-    Returns
-    -------
-    args : dict
-        A dictionary, where keys are names of command-line elements
-        such as e.g. "--verbose" and "<path>", and values are the
-        parsed values of those elements.
-
-    Example
-    -------
-    >>> from docopt import docopt
-    >>> doc = '''
-    Usage:
-        my_program tcp <host> <port> [--timeout=<seconds>]
-        my_program serial <port> [--baud=<n>] [--timeout=<seconds>]
-        my_program (-h | --help | --version)
-
-    Options:
-        -h, --help  Show this screen and exit.
-        --baud=<n>  Baudrate [default: 9600]
-    '''
-    >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30']
-    >>> docopt(doc, argv)
-    {'--baud': '9600',
-     '--help': False,
-     '--timeout': '30',
-     '--version': False,
-     '<host>': '127.0.0.1',
-     '<port>': '80',
-     'serial': False,
-     'tcp': True}
-
-    See also
-    --------
-    * For video introduction see http://docopt.org
-    * Full documentation is available in README.rst as well as online
-      at https://github.com/docopt/docopt#readme
-
-    """
-    if argv is None:
-        argv = sys.argv[1:]
-    DocoptExit.usage = printable_usage(doc)
-    options = parse_defaults(doc)
-    pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
-    # [default] syntax for argument is disabled
-    #for a in pattern.flat(Argument):
-    #    same_name = [d for d in arguments if d.name == a.name]
-    #    if same_name:
-    #        a.value = same_name[0].value
-    argv = parse_argv(TokenStream(argv, DocoptExit), list(options),
-                      options_first)
-    pattern_options = set(pattern.flat(Option))
-    for ao in pattern.flat(AnyOptions):
-        doc_options = parse_defaults(doc)
-        ao.children = list(set(doc_options) - pattern_options)
-        #if any_options:
-        #    ao.children += [Option(o.short, o.long, o.argcount)
-        #                    for o in argv if type(o) is Option]
-    extras(help, version, argv, doc)
-    matched, left, collected = pattern.fix().match(argv)
-    if matched and left == []:  # better error message if left?
-        return Dict((a.name, a.value) for a in (pattern.flat() + collected))
-    raise DocoptExit()
diff --git a/include/.gitignore b/include/.gitignore
index d52be2d2..afcb2de6 100644
--- a/include/.gitignore
+++ b/include/.gitignore
@@ -5,3 +5,4 @@ zconf.h
 zlib.h
 zmq_utils.h
 f77_zmq_free.h
+f77_zmq.h
diff --git a/include/f77_zmq.h b/include/f77_zmq.h
deleted file mode 100644
index b19bb707..00000000
--- a/include/f77_zmq.h
+++ /dev/null
@@ -1,617 +0,0 @@
-      integer EADDRINUSE
-      integer EADDRNOTAVAIL
-      integer EAFNOSUPPORT
-      integer ECONNABORTED
-      integer ECONNREFUSED
-      integer ECONNRESET
-      integer EFSM
-      integer EHOSTUNREACH
-      integer EINPROGRESS
-      integer EMSGSIZE
-      integer EMTHREAD
-      integer ENETDOWN
-      integer ENETRESET
-      integer ENETUNREACH
-      integer ENOBUFS
-      integer ENOCOMPATPROTO
-      integer ENOTCONN
-      integer ENOTSOCK
-      integer ENOTSUP
-      integer EPROTONOSUPPORT
-      integer ETERM
-      integer ETIMEDOUT
-      integer ZMQ_AFFINITY
-      integer ZMQ_BACKLOG
-      integer ZMQ_BINDTODEVICE
-      integer ZMQ_BLOCKY
-      integer ZMQ_CHANNEL
-      integer ZMQ_CLIENT
-      integer ZMQ_CONFLATE
-      integer ZMQ_CONNECT_RID
-      integer ZMQ_CONNECT_ROUTING_ID
-      integer ZMQ_CONNECT_TIMEOUT
-      integer ZMQ_CURRENT_EVENT_VERSION
-      integer ZMQ_CURRENT_EVENT_VERSION_DRAFT
-      integer ZMQ_CURVE
-      integer ZMQ_CURVE_PUBLICKEY
-      integer ZMQ_CURVE_SECRETKEY
-      integer ZMQ_CURVE_SERVER
-      integer ZMQ_CURVE_SERVERKEY
-      integer ZMQ_DEALER
-      integer ZMQ_DEFINED_STDINT
-      integer ZMQ_DELAY_ATTACH_ON_CONNECT
-      integer ZMQ_DGRAM
-      integer ZMQ_DISCONNECT_MSG
-      integer ZMQ_DISH
-      integer ZMQ_DONTWAIT
-      integer ZMQ_EVENTS
-      integer ZMQ_EVENT_ACCEPTED
-      integer ZMQ_EVENT_ACCEPT_FAILED
-      integer ZMQ_EVENT_ALL
-      integer ZMQ_EVENT_ALL_V1
-      integer ZMQ_EVENT_ALL_V2
-      integer ZMQ_EVENT_BIND_FAILED
-      integer ZMQ_EVENT_CLOSED
-      integer ZMQ_EVENT_CLOSE_FAILED
-      integer ZMQ_EVENT_CONNECTED
-      integer ZMQ_EVENT_CONNECT_DELAYED
-      integer ZMQ_EVENT_CONNECT_RETRIED
-      integer ZMQ_EVENT_DISCONNECTED
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_AUTH
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL
-      integer ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL
-      integer ZMQ_EVENT_HANDSHAKE_SUCCEEDED
-      integer ZMQ_EVENT_LISTENING
-      integer ZMQ_EVENT_MONITOR_STOPPED
-      integer ZMQ_EVENT_PIPES_STATS
-      integer ZMQ_FAIL_UNROUTABLE
-      integer ZMQ_FD
-      integer ZMQ_FORWARDER
-      integer ZMQ_GATHER
-      integer ZMQ_GROUP_MAX_LENGTH
-      integer ZMQ_GSSAPI
-      integer ZMQ_GSSAPI_NT_HOSTBASED
-      integer ZMQ_GSSAPI_NT_KRB5_PRINCIPAL
-      integer ZMQ_GSSAPI_NT_USER_NAME
-      integer ZMQ_GSSAPI_PLAINTEXT
-      integer ZMQ_GSSAPI_PRINCIPAL
-      integer ZMQ_GSSAPI_PRINCIPAL_NAMETYPE
-      integer ZMQ_GSSAPI_SERVER
-      integer ZMQ_GSSAPI_SERVICE_PRINCIPAL
-      integer ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE
-      integer ZMQ_HANDSHAKE_IVL
-      integer ZMQ_HAS_CAPABILITIES
-      integer ZMQ_HAUSNUMERO
-      integer ZMQ_HEARTBEAT_IVL
-      integer ZMQ_HEARTBEAT_TIMEOUT
-      integer ZMQ_HEARTBEAT_TTL
-      integer ZMQ_HELLO_MSG
-      integer ZMQ_IDENTITY
-      integer ZMQ_IMMEDIATE
-      integer ZMQ_INVERT_MATCHING
-      integer ZMQ_IN_BATCH_SIZE
-      integer ZMQ_IO_THREADS
-      integer ZMQ_IO_THREADS_DFLT
-      integer ZMQ_IPC_FILTER_GID
-      integer ZMQ_IPC_FILTER_PID
-      integer ZMQ_IPC_FILTER_UID
-      integer ZMQ_IPV4ONLY
-      integer ZMQ_IPV6
-      integer ZMQ_LAST_ENDPOINT
-      integer ZMQ_LINGER
-      integer ZMQ_LOOPBACK_FASTPATH
-      integer ZMQ_MAXMSGSIZE
-      integer ZMQ_MAX_MSGSZ
-      integer ZMQ_MAX_SOCKETS
-      integer ZMQ_MAX_SOCKETS_DFLT
-      integer ZMQ_MECHANISM
-      integer ZMQ_METADATA
-      integer ZMQ_MORE
-      integer ZMQ_MSG_T_SIZE
-      integer ZMQ_MULTICAST_HOPS
-      integer ZMQ_MULTICAST_LOOP
-      integer ZMQ_MULTICAST_MAXTPDU
-      integer ZMQ_NOBLOCK
-      integer ZMQ_NOTIFY_CONNECT
-      integer ZMQ_NOTIFY_DISCONNECT
-      integer ZMQ_NULL
-      integer ZMQ_ONLY_FIRST_SUBSCRIBE
-      integer ZMQ_OUT_BATCH_SIZE
-      integer ZMQ_PAIR
-      integer ZMQ_PEER
-      integer ZMQ_PLAIN
-      integer ZMQ_PLAIN_PASSWORD
-      integer ZMQ_PLAIN_SERVER
-      integer ZMQ_PLAIN_USERNAME
-      integer ZMQ_POLLERR
-      integer ZMQ_POLLIN
-      integer ZMQ_POLLITEMS_DFLT
-      integer ZMQ_POLLOUT
-      integer ZMQ_POLLPRI
-      integer ZMQ_PRIORITY
-      integer ZMQ_PROBE_ROUTER
-      integer ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID
-      integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION
-      integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA
-      integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE
-      integer ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY
-      integer ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND
-      integer ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED
-      integer ZMQ_PTR
-      integer ZMQ_PUB
-      integer ZMQ_PULL
-      integer ZMQ_PUSH
-      integer ZMQ_QUEUE
-      integer ZMQ_RADIO
-      integer ZMQ_RATE
-      integer ZMQ_RCVBUF
-      integer ZMQ_RCVHWM
-      integer ZMQ_RCVMORE
-      integer ZMQ_RCVTIMEO
-      integer ZMQ_RECONNECT_IVL
-      integer ZMQ_RECONNECT_IVL_MAX
-      integer ZMQ_RECONNECT_STOP
-      integer ZMQ_RECONNECT_STOP_AFTER_DISCONNECT
-      integer ZMQ_RECONNECT_STOP_CONN_REFUSED
-      integer ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED
-      integer ZMQ_RECOVERY_IVL
-      integer ZMQ_REP
-      integer ZMQ_REQ
-      integer ZMQ_REQ_CORRELATE
-      integer ZMQ_REQ_RELAXED
-      integer ZMQ_ROUTER
-      integer ZMQ_ROUTER_BEHAVIOR
-      integer ZMQ_ROUTER_HANDOVER
-      integer ZMQ_ROUTER_MANDATORY
-      integer ZMQ_ROUTER_NOTIFY
-      integer ZMQ_ROUTER_RAW
-      integer ZMQ_ROUTING_ID
-      integer ZMQ_SCATTER
-      integer ZMQ_SERVER
-      integer ZMQ_SHARED
-      integer ZMQ_SNDBUF
-      integer ZMQ_SNDHWM
-      integer ZMQ_SNDMORE
-      integer ZMQ_SNDTIMEO
-      integer ZMQ_SOCKET_LIMIT
-      integer ZMQ_SOCKS_PASSWORD
-      integer ZMQ_SOCKS_PROXY
-      integer ZMQ_SOCKS_USERNAME
-      integer ZMQ_SRCFD
-      integer ZMQ_STREAM
-      integer ZMQ_STREAMER
-      integer ZMQ_STREAM_NOTIFY
-      integer ZMQ_SUB
-      integer ZMQ_SUBSCRIBE
-      integer ZMQ_TCP_ACCEPT_FILTER
-      integer ZMQ_TCP_KEEPALIVE
-      integer ZMQ_TCP_KEEPALIVE_CNT
-      integer ZMQ_TCP_KEEPALIVE_IDLE
-      integer ZMQ_TCP_KEEPALIVE_INTVL
-      integer ZMQ_TCP_MAXRT
-      integer ZMQ_THREAD_AFFINITY_CPU_ADD
-      integer ZMQ_THREAD_AFFINITY_CPU_REMOVE
-      integer ZMQ_THREAD_NAME_PREFIX
-      integer ZMQ_THREAD_PRIORITY
-      integer ZMQ_THREAD_PRIORITY_DFLT
-      integer ZMQ_THREAD_SAFE
-      integer ZMQ_THREAD_SCHED_POLICY
-      integer ZMQ_THREAD_SCHED_POLICY_DFLT
-      integer ZMQ_TOS
-      integer ZMQ_TYPE
-      integer ZMQ_UNSUBSCRIBE
-      integer ZMQ_USE_FD
-      integer ZMQ_VERSION
-      integer ZMQ_VERSION_MAJOR
-      integer ZMQ_VERSION_MINOR
-      integer ZMQ_VERSION_PATCH
-      integer ZMQ_VMCI_BUFFER_MAX_SIZE
-      integer ZMQ_VMCI_BUFFER_MIN_SIZE
-      integer ZMQ_VMCI_BUFFER_SIZE
-      integer ZMQ_VMCI_CONNECT_TIMEOUT
-      integer ZMQ_WSS_CERT_PEM
-      integer ZMQ_WSS_HOSTNAME
-      integer ZMQ_WSS_KEY_PEM
-      integer ZMQ_WSS_TRUST_PEM
-      integer ZMQ_WSS_TRUST_SYSTEM
-      integer ZMQ_XPUB
-      integer ZMQ_XPUB_MANUAL
-      integer ZMQ_XPUB_MANUAL_LAST_VALUE
-      integer ZMQ_XPUB_NODROP
-      integer ZMQ_XPUB_VERBOSE
-      integer ZMQ_XPUB_VERBOSER
-      integer ZMQ_XPUB_WELCOME_MSG
-      integer ZMQ_XREP
-      integer ZMQ_XREQ
-      integer ZMQ_XSUB
-      integer ZMQ_ZAP_DOMAIN
-      integer ZMQ_ZAP_ENFORCE_DOMAIN
-      integer ZMQ_ZERO_COPY_RECV
-      parameter(EADDRINUSE=156384717)
-      parameter(EADDRNOTAVAIL=156384718)
-      parameter(EAFNOSUPPORT=156384723)
-      parameter(ECONNABORTED=156384725)
-      parameter(ECONNREFUSED=156384719)
-      parameter(ECONNRESET=156384726)
-      parameter(EFSM=156384763)
-      parameter(EHOSTUNREACH=156384729)
-      parameter(EINPROGRESS=156384720)
-      parameter(EMSGSIZE=156384722)
-      parameter(EMTHREAD=156384766)
-      parameter(ENETDOWN=156384716)
-      parameter(ENETRESET=156384730)
-      parameter(ENETUNREACH=156384724)
-      parameter(ENOBUFS=156384715)
-      parameter(ENOCOMPATPROTO=156384764)
-      parameter(ENOTCONN=156384727)
-      parameter(ENOTSOCK=156384721)
-      parameter(ENOTSUP=156384713)
-      parameter(EPROTONOSUPPORT=156384714)
-      parameter(ETERM=156384765)
-      parameter(ETIMEDOUT=156384728)
-      parameter(ZMQ_AFFINITY=4)
-      parameter(ZMQ_BACKLOG=19)
-      parameter(ZMQ_BINDTODEVICE=92)
-      parameter(ZMQ_BLOCKY=70)
-      parameter(ZMQ_CHANNEL=20)
-      parameter(ZMQ_CLIENT=13)
-      parameter(ZMQ_CONFLATE=54)
-      parameter(ZMQ_CONNECT_RID=61)
-      parameter(ZMQ_CONNECT_ROUTING_ID=61)
-      parameter(ZMQ_CONNECT_TIMEOUT=79)
-      parameter(ZMQ_CURRENT_EVENT_VERSION=1)
-      parameter(ZMQ_CURRENT_EVENT_VERSION_DRAFT=2)
-      parameter(ZMQ_CURVE=2)
-      parameter(ZMQ_CURVE_PUBLICKEY=48)
-      parameter(ZMQ_CURVE_SECRETKEY=49)
-      parameter(ZMQ_CURVE_SERVER=47)
-      parameter(ZMQ_CURVE_SERVERKEY=50)
-      parameter(ZMQ_DEALER=5)
-      parameter(ZMQ_DEFINED_STDINT=1)
-      parameter(ZMQ_DELAY_ATTACH_ON_CONNECT=39)
-      parameter(ZMQ_DGRAM=18)
-      parameter(ZMQ_DISCONNECT_MSG=111)
-      parameter(ZMQ_DISH=15)
-      parameter(ZMQ_DONTWAIT=1)
-      parameter(ZMQ_EVENTS=15)
-      parameter(ZMQ_EVENT_ACCEPTED=32)
-      parameter(ZMQ_EVENT_ACCEPT_FAILED=64)
-      parameter(ZMQ_EVENT_ALL=65535)
-      parameter(ZMQ_EVENT_ALL_V1=65535)
-      parameter(ZMQ_EVENT_ALL_V2=131071)
-      parameter(ZMQ_EVENT_BIND_FAILED=16)
-      parameter(ZMQ_EVENT_CLOSED=128)
-      parameter(ZMQ_EVENT_CLOSE_FAILED=256)
-      parameter(ZMQ_EVENT_CONNECTED=1)
-      parameter(ZMQ_EVENT_CONNECT_DELAYED=2)
-      parameter(ZMQ_EVENT_CONNECT_RETRIED=4)
-      parameter(ZMQ_EVENT_DISCONNECTED=512)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_AUTH=16384)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL=2048)
-      parameter(ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL=8192)
-      parameter(ZMQ_EVENT_HANDSHAKE_SUCCEEDED=4096)
-      parameter(ZMQ_EVENT_LISTENING=8)
-      parameter(ZMQ_EVENT_MONITOR_STOPPED=1024)
-      parameter(ZMQ_EVENT_PIPES_STATS=65536)
-      parameter(ZMQ_FAIL_UNROUTABLE=33)
-      parameter(ZMQ_FD=14)
-      parameter(ZMQ_FORWARDER=2)
-      parameter(ZMQ_GATHER=16)
-      parameter(ZMQ_GROUP_MAX_LENGTH=255)
-      parameter(ZMQ_GSSAPI=3)
-      parameter(ZMQ_GSSAPI_NT_HOSTBASED=0)
-      parameter(ZMQ_GSSAPI_NT_KRB5_PRINCIPAL=2)
-      parameter(ZMQ_GSSAPI_NT_USER_NAME=1)
-      parameter(ZMQ_GSSAPI_PLAINTEXT=65)
-      parameter(ZMQ_GSSAPI_PRINCIPAL=63)
-      parameter(ZMQ_GSSAPI_PRINCIPAL_NAMETYPE=90)
-      parameter(ZMQ_GSSAPI_SERVER=62)
-      parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL=64)
-      parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE=91)
-      parameter(ZMQ_HANDSHAKE_IVL=66)
-      parameter(ZMQ_HAS_CAPABILITIES=1)
-      parameter(ZMQ_HAUSNUMERO=156384712)
-      parameter(ZMQ_HEARTBEAT_IVL=75)
-      parameter(ZMQ_HEARTBEAT_TIMEOUT=77)
-      parameter(ZMQ_HEARTBEAT_TTL=76)
-      parameter(ZMQ_HELLO_MSG=110)
-      parameter(ZMQ_IDENTITY=5)
-      parameter(ZMQ_IMMEDIATE=39)
-      parameter(ZMQ_INVERT_MATCHING=74)
-      parameter(ZMQ_IN_BATCH_SIZE=101)
-      parameter(ZMQ_IO_THREADS=1)
-      parameter(ZMQ_IO_THREADS_DFLT=1)
-      parameter(ZMQ_IPC_FILTER_GID=60)
-      parameter(ZMQ_IPC_FILTER_PID=58)
-      parameter(ZMQ_IPC_FILTER_UID=59)
-      parameter(ZMQ_IPV4ONLY=31)
-      parameter(ZMQ_IPV6=42)
-      parameter(ZMQ_LAST_ENDPOINT=32)
-      parameter(ZMQ_LINGER=17)
-      parameter(ZMQ_LOOPBACK_FASTPATH=94)
-      parameter(ZMQ_MAXMSGSIZE=22)
-      parameter(ZMQ_MAX_MSGSZ=5)
-      parameter(ZMQ_MAX_SOCKETS=2)
-      parameter(ZMQ_MAX_SOCKETS_DFLT=1023)
-      parameter(ZMQ_MECHANISM=43)
-      parameter(ZMQ_METADATA=95)
-      parameter(ZMQ_MORE=1)
-      parameter(ZMQ_MSG_T_SIZE=6)
-      parameter(ZMQ_MULTICAST_HOPS=25)
-      parameter(ZMQ_MULTICAST_LOOP=96)
-      parameter(ZMQ_MULTICAST_MAXTPDU=84)
-      parameter(ZMQ_NOBLOCK=1)
-      parameter(ZMQ_NOTIFY_CONNECT=1)
-      parameter(ZMQ_NOTIFY_DISCONNECT=2)
-      parameter(ZMQ_NULL=0)
-      parameter(ZMQ_ONLY_FIRST_SUBSCRIBE=108)
-      parameter(ZMQ_OUT_BATCH_SIZE=102)
-      parameter(ZMQ_PAIR=0)
-      parameter(ZMQ_PEER=19)
-      parameter(ZMQ_PLAIN=1)
-      parameter(ZMQ_PLAIN_PASSWORD=46)
-      parameter(ZMQ_PLAIN_SERVER=44)
-      parameter(ZMQ_PLAIN_USERNAME=45)
-      parameter(ZMQ_POLLERR=4)
-      parameter(ZMQ_POLLIN=1)
-      parameter(ZMQ_POLLITEMS_DFLT=16)
-      parameter(ZMQ_POLLOUT=2)
-      parameter(ZMQ_POLLPRI=8)
-      parameter(ZMQ_PRIORITY=112)
-      parameter(ZMQ_PROBE_ROUTER=51)
-      parameter(ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED=805306368)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID=536870914)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION=536870915)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA=536870917)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE=536870916)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY=536870913)
-      parameter(ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED=536870912)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC=285212673)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA=268435480)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE=268435458)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE=268435459)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR=268435477)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO=268435475)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE=268435476)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE=268435474)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY=268435478)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED=268435473)
-      parameter(
-     & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME=268435479)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH=285212674)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND=268435457)
-      parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED=268435456)
-      parameter(ZMQ_PTR=8)
-      parameter(ZMQ_PUB=1)
-      parameter(ZMQ_PULL=7)
-      parameter(ZMQ_PUSH=8)
-      parameter(ZMQ_QUEUE=3)
-      parameter(ZMQ_RADIO=14)
-      parameter(ZMQ_RATE=8)
-      parameter(ZMQ_RCVBUF=12)
-      parameter(ZMQ_RCVHWM=24)
-      parameter(ZMQ_RCVMORE=13)
-      parameter(ZMQ_RCVTIMEO=27)
-      parameter(ZMQ_RECONNECT_IVL=18)
-      parameter(ZMQ_RECONNECT_IVL_MAX=21)
-      parameter(ZMQ_RECONNECT_STOP=109)
-      parameter(ZMQ_RECONNECT_STOP_AFTER_DISCONNECT=3)
-      parameter(ZMQ_RECONNECT_STOP_CONN_REFUSED=1)
-      parameter(ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED=2)
-      parameter(ZMQ_RECOVERY_IVL=9)
-      parameter(ZMQ_REP=4)
-      parameter(ZMQ_REQ=3)
-      parameter(ZMQ_REQ_CORRELATE=52)
-      parameter(ZMQ_REQ_RELAXED=53)
-      parameter(ZMQ_ROUTER=6)
-      parameter(ZMQ_ROUTER_BEHAVIOR=33)
-      parameter(ZMQ_ROUTER_HANDOVER=56)
-      parameter(ZMQ_ROUTER_MANDATORY=33)
-      parameter(ZMQ_ROUTER_NOTIFY=97)
-      parameter(ZMQ_ROUTER_RAW=41)
-      parameter(ZMQ_ROUTING_ID=5)
-      parameter(ZMQ_SCATTER=17)
-      parameter(ZMQ_SERVER=12)
-      parameter(ZMQ_SHARED=3)
-      parameter(ZMQ_SNDBUF=11)
-      parameter(ZMQ_SNDHWM=23)
-      parameter(ZMQ_SNDMORE=2)
-      parameter(ZMQ_SNDTIMEO=28)
-      parameter(ZMQ_SOCKET_LIMIT=3)
-      parameter(ZMQ_SOCKS_PASSWORD=100)
-      parameter(ZMQ_SOCKS_PROXY=68)
-      parameter(ZMQ_SOCKS_USERNAME=99)
-      parameter(ZMQ_SRCFD=2)
-      parameter(ZMQ_STREAM=11)
-      parameter(ZMQ_STREAMER=1)
-      parameter(ZMQ_STREAM_NOTIFY=73)
-      parameter(ZMQ_SUB=2)
-      parameter(ZMQ_SUBSCRIBE=6)
-      parameter(ZMQ_TCP_ACCEPT_FILTER=38)
-      parameter(ZMQ_TCP_KEEPALIVE=34)
-      parameter(ZMQ_TCP_KEEPALIVE_CNT=35)
-      parameter(ZMQ_TCP_KEEPALIVE_IDLE=36)
-      parameter(ZMQ_TCP_KEEPALIVE_INTVL=37)
-      parameter(ZMQ_TCP_MAXRT=80)
-      parameter(ZMQ_THREAD_AFFINITY_CPU_ADD=7)
-      parameter(ZMQ_THREAD_AFFINITY_CPU_REMOVE=8)
-      parameter(ZMQ_THREAD_NAME_PREFIX=9)
-      parameter(ZMQ_THREAD_PRIORITY=3)
-      parameter(ZMQ_THREAD_PRIORITY_DFLT=-1)
-      parameter(ZMQ_THREAD_SAFE=81)
-      parameter(ZMQ_THREAD_SCHED_POLICY=4)
-      parameter(ZMQ_THREAD_SCHED_POLICY_DFLT=-1)
-      parameter(ZMQ_TOS=57)
-      parameter(ZMQ_TYPE=16)
-      parameter(ZMQ_UNSUBSCRIBE=7)
-      parameter(ZMQ_USE_FD=89)
-      parameter(ZMQ_VERSION=40304)
-      parameter(ZMQ_VERSION_MAJOR=4)
-      parameter(ZMQ_VERSION_MINOR=3)
-      parameter(ZMQ_VERSION_PATCH=4)
-      parameter(ZMQ_VMCI_BUFFER_MAX_SIZE=87)
-      parameter(ZMQ_VMCI_BUFFER_MIN_SIZE=86)
-      parameter(ZMQ_VMCI_BUFFER_SIZE=85)
-      parameter(ZMQ_VMCI_CONNECT_TIMEOUT=88)
-      parameter(ZMQ_WSS_CERT_PEM=104)
-      parameter(ZMQ_WSS_HOSTNAME=106)
-      parameter(ZMQ_WSS_KEY_PEM=103)
-      parameter(ZMQ_WSS_TRUST_PEM=105)
-      parameter(ZMQ_WSS_TRUST_SYSTEM=107)
-      parameter(ZMQ_XPUB=9)
-      parameter(ZMQ_XPUB_MANUAL=71)
-      parameter(ZMQ_XPUB_MANUAL_LAST_VALUE=98)
-      parameter(ZMQ_XPUB_NODROP=69)
-      parameter(ZMQ_XPUB_VERBOSE=40)
-      parameter(ZMQ_XPUB_VERBOSER=78)
-      parameter(ZMQ_XPUB_WELCOME_MSG=72)
-      parameter(ZMQ_XREP=6)
-      parameter(ZMQ_XREQ=5)
-      parameter(ZMQ_XSUB=10)
-      parameter(ZMQ_ZAP_DOMAIN=55)
-      parameter(ZMQ_ZAP_ENFORCE_DOMAIN=93)
-      parameter(ZMQ_ZERO_COPY_RECV=10)
-      integer              f77_zmq_bind
-      external             f77_zmq_bind
-      integer              f77_zmq_close
-      external             f77_zmq_close
-      integer              f77_zmq_connect
-      external             f77_zmq_connect
-      integer              f77_zmq_ctx_destroy
-      external             f77_zmq_ctx_destroy
-      integer              f77_zmq_ctx_get
-      external             f77_zmq_ctx_get
-      integer*8            f77_zmq_ctx_new
-      external             f77_zmq_ctx_new
-      integer              f77_zmq_ctx_set
-      external             f77_zmq_ctx_set
-      integer              f77_zmq_ctx_shutdown
-      external             f77_zmq_ctx_shutdown
-      integer              f77_zmq_ctx_term
-      external             f77_zmq_ctx_term
-      integer              f77_zmq_disconnect
-      external             f77_zmq_disconnect
-      integer              f77_zmq_errno
-      external             f77_zmq_errno
-      integer              f77_zmq_getsockopt
-      external             f77_zmq_getsockopt
-      integer              f77_zmq_microsleep
-      external             f77_zmq_microsleep
-      integer              f77_zmq_msg_close
-      external             f77_zmq_msg_close
-      integer              f77_zmq_msg_copy
-      external             f77_zmq_msg_copy
-      integer              f77_zmq_msg_copy_from_data
-      external             f77_zmq_msg_copy_from_data
-      integer              f77_zmq_msg_copy_to_data
-      external             f77_zmq_msg_copy_to_data
-      integer              f77_zmq_msg_copy_to_data8
-      external             f77_zmq_msg_copy_to_data8
-      integer*8            f77_zmq_msg_data
-      external             f77_zmq_msg_data
-      integer*8            f77_zmq_msg_data_new
-      external             f77_zmq_msg_data_new
-      integer              f77_zmq_msg_destroy
-      external             f77_zmq_msg_destroy
-      integer              f77_zmq_msg_destroy_data
-      external             f77_zmq_msg_destroy_data
-      integer              f77_zmq_msg_get
-      external             f77_zmq_msg_get
-      character*(64)       f77_zmq_msg_gets
-      external             f77_zmq_msg_gets
-      integer              f77_zmq_msg_init
-      external             f77_zmq_msg_init
-      integer              f77_zmq_msg_init_data
-      external             f77_zmq_msg_init_data
-      integer              f77_zmq_msg_init_size
-      external             f77_zmq_msg_init_size
-      integer              f77_zmq_msg_more
-      external             f77_zmq_msg_more
-      integer              f77_zmq_msg_move
-      external             f77_zmq_msg_move
-      integer*8            f77_zmq_msg_new
-      external             f77_zmq_msg_new
-      integer              f77_zmq_msg_recv
-      external             f77_zmq_msg_recv
-      integer*8            f77_zmq_msg_recv8
-      external             f77_zmq_msg_recv8
-      integer              f77_zmq_msg_send
-      external             f77_zmq_msg_send
-      integer*8            f77_zmq_msg_send8
-      external             f77_zmq_msg_send8
-      integer              f77_zmq_msg_set
-      external             f77_zmq_msg_set
-      integer              f77_zmq_msg_size
-      external             f77_zmq_msg_size
-      integer*8            f77_zmq_msg_size8
-      external             f77_zmq_msg_size8
-      integer              f77_zmq_poll
-      external             f77_zmq_poll
-      integer              f77_zmq_pollitem_destroy
-      external             f77_zmq_pollitem_destroy
-      integer*8            f77_zmq_pollitem_new
-      external             f77_zmq_pollitem_new
-      integer              f77_zmq_pollitem_revents
-      external             f77_zmq_pollitem_revents
-      integer              f77_zmq_pollitem_set_events
-      external             f77_zmq_pollitem_set_events
-      integer              f77_zmq_pollitem_set_socket
-      external             f77_zmq_pollitem_set_socket
-      integer              f77_zmq_proxy
-      external             f77_zmq_proxy
-      integer              f77_zmq_proxy_steerable
-      external             f77_zmq_proxy_steerable
-      integer              f77_zmq_recv
-      external             f77_zmq_recv
-      integer*8            f77_zmq_recv8
-      external             f77_zmq_recv8
-      integer              f77_zmq_send
-      external             f77_zmq_send
-      integer*8            f77_zmq_send8
-      external             f77_zmq_send8
-      integer              f77_zmq_send_const
-      external             f77_zmq_send_const
-      integer*8            f77_zmq_send_const8
-      external             f77_zmq_send_const8
-      integer              f77_zmq_setsockopt
-      external             f77_zmq_setsockopt
-      integer*8            f77_zmq_socket
-      external             f77_zmq_socket
-      integer              f77_zmq_socket_monitor
-      external             f77_zmq_socket_monitor
-      character*(64)       f77_zmq_strerror
-      external             f77_zmq_strerror
-      integer              f77_zmq_term
-      external             f77_zmq_term
-      integer              f77_zmq_unbind
-      external             f77_zmq_unbind
-      integer              f77_zmq_version
-      external             f77_zmq_version
-      integer              pthread_create
-      external             pthread_create
-      integer              pthread_create_arg
-      external             pthread_create_arg
-      integer              pthread_detach
-      external             pthread_detach
-      integer              pthread_join
-      external             pthread_join
diff --git a/ocaml/Message.ml b/ocaml/Message.ml
index b7d77430..049203d7 100644
--- a/ocaml/Message.ml
+++ b/ocaml/Message.ml
@@ -63,11 +63,11 @@ end
 
 module Connect_msg : sig
   type t = Tcp | Inproc | Ipc
-  val create : typ:string -> t
+  val create : string -> t
   val to_string : t -> string
 end = struct
   type t = Tcp | Inproc | Ipc
-  let create ~typ =
+  let create typ =
     match typ with
     | "tcp" -> Tcp
     | "inproc" -> Inproc
@@ -515,9 +515,9 @@ let of_string s =
     | Connect_ socket ->
         Connect (Connect_msg.create socket)
     | NewJob_ { state ; push_address_tcp ; push_address_inproc } ->
-        Newjob (Newjob_msg.create push_address_tcp push_address_inproc state)
+        Newjob (Newjob_msg.create ~address_tcp:push_address_tcp ~address_inproc:push_address_inproc ~state)
     | EndJob_ state  ->
-        Endjob (Endjob_msg.create state)
+        Endjob (Endjob_msg.create ~state)
     | GetData_ { state ; client_id ; key } ->
         GetData (GetData_msg.create ~client_id ~state ~key)
     | PutData_ { state ; client_id ; key } ->
diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml
index 92a6f5ca..ad827316 100644
--- a/ocaml/TaskServer.ml
+++ b/ocaml/TaskServer.ml
@@ -776,7 +776,7 @@ let run ~port =
       Zmq.Socket.create zmq_context Zmq.Socket.rep
     in
     Zmq.Socket.set_linger_period rep_socket 1_000_000;
-    bind_socket "REP" rep_socket port;
+    bind_socket ~socket_type:"REP" ~socket:rep_socket ~port;
 
     let initial_program_state =
     {   queue = Queuing_system.create () ;
diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml
index d096b15b..dfbab167 100644
--- a/ocaml/qp_run.ml
+++ b/ocaml/qp_run.ml
@@ -110,7 +110,7 @@ let run slave ?prefix exe ezfio_file =
   let task_thread =
      let thread =
       Thread.create ( fun () ->
-         TaskServer.run port_number )
+         TaskServer.run ~port:port_number )
      in
      thread ();
   in
diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja
index c0ba8c6a..0f70f4c4 100755
--- a/scripts/compilation/qp_create_ninja
+++ b/scripts/compilation/qp_create_ninja
@@ -121,6 +121,7 @@ def ninja_create_env_variable(pwd_config_file):
 
     l_string.append("LIB = {0} ".format(str_lib))
 
+    l_string.append("CONFIG_FILE = {0}".format(pwd_config_file))
     l_string.append("")
 
     return l_string
diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
index fe25e9c0..4fc9ad6b 100644
--- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -19,11 +19,11 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
   return
  endif
  n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
  num_A = ao_nucl(j)
  power_A(1:3)= ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
  num_B = ao_nucl(i)
  power_B(1:3)= ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
@@ -33,19 +33,19 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
   do m=1,ao_prim_num(i)
     beta = ao_expo_ordered_transp(m,i)
     do mm = 1, 3
-     ! (x phi_i ) * phi_j 
+     ! (x phi_i ) * phi_j
      ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
      !
      ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
      power_B_tmp = power_B
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
      xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j)             &
-                                            * ao_coef_normalized_ordered_transp(m,i) 
-     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 
+                                            * ao_coef_normalized_ordered_transp(m,i)
+     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1
      power_B_tmp(mm) += 1
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
      xyz_ints(mm) += contrib * 1.d0        * ao_coef_normalized_ordered_transp(l,j)             &
-                                           * ao_coef_normalized_ordered_transp(m,i) 
+                                           * ao_coef_normalized_ordered_transp(m,i)
     enddo
   enddo
  enddo
@@ -58,7 +58,7 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
   BEGIN_DOC
   ! phi_j_erf_mu_r_phi  = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  phi_i(r)
   END_DOC
- 
+
   implicit none
   integer,          intent(in) :: i,j
   double precision, intent(in) :: mu_in, C_center(3)
@@ -77,24 +77,24 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
 
   n_pt_in = n_pt_max_integrals
 
-  ! j 
+  ! j
   num_A         = ao_nucl(j)
   power_A(1:3)  = ao_power(j,1:3)
   A_center(1:3) = nucl_coord(num_A,1:3)
 
-  ! i 
+  ! i
   num_B         = ao_nucl(i)
   power_B(1:3)  = ao_power(i,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
   do l = 1, ao_prim_num(j)
    alpha = ao_expo_ordered_transp(l,j)
    do m = 1, ao_prim_num(i)
      beta = ao_expo_ordered_transp(m,i)
 
-     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)  
+     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
 
-     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i) 
+     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i)
    enddo
   enddo
 
@@ -124,11 +124,11 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
   return
  endif
  n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
  num_A = ao_nucl(j)
  power_A(1:3)= ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
  num_B = ao_nucl(i)
  power_B(1:3)= ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
@@ -141,7 +141,7 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
     call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
     do mm = 1, 4
      gauss_ints(mm) += xyz_ints(mm)  * ao_coef_normalized_ordered_transp(l,j)             &
-                                     * ao_coef_normalized_ordered_transp(m,i) 
+                                     * ao_coef_normalized_ordered_transp(m,i)
     enddo
   enddo
  enddo
@@ -161,7 +161,7 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
   integer,          intent(in)  :: i, j
   double precision, intent(in)  :: mu, C_center(3), delta
   double precision, intent(out) :: gauss_ints
- 
+
   integer                       :: n_pt_in, l, m
   integer                       :: num_A, power_A(3), num_b, power_B(3)
   double precision              :: alpha, beta, A_center(3), B_center(3), coef
@@ -177,16 +177,16 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
 
   n_pt_in = n_pt_max_integrals
 
-  ! j 
+  ! j
   num_A         = ao_nucl(j)
   power_A(1:3)  = ao_power(j,1:3)
   A_center(1:3) = nucl_coord(num_A,1:3)
 
-  ! i 
+  ! i
   num_B         = ao_nucl(i)
   power_B(1:3)  = ao_power(i,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
   do l = 1, ao_prim_num(j)
    alpha = ao_expo_ordered_transp(l,j)
    do m = 1, ao_prim_num(i)
@@ -219,7 +219,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
   !
   END_DOC
 
-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'
 
   implicit none
 
@@ -274,7 +274,82 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
 end subroutine NAI_pol_x_mult_erf_ao
 
 ! ---
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
 
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, n_points
+  double precision, intent(in)  :: mu_in, C_center(n_points,3)
+  double precision, intent(out) :: ints(n_points,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  integer                       :: power_xA(3), m, ipoint
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+  double precision              :: NAI_pol_mult_erf
+
+  ints = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
+    return
+  endif
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      power_xA = power_A
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v
+
+! ---
 subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
 
   BEGIN_DOC
@@ -289,7 +364,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
   !
   END_DOC
 
-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'
 
   implicit none
 
@@ -333,7 +408,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
 
       do j = 1, ao_prim_num(j_ao)
         alphaj = ao_expo_ordered_transp                   (j,j_ao)
-        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
 
         ! First term = (x-Ax)**(ax+1)
         integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
@@ -351,6 +426,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
 
 end subroutine NAI_pol_x_mult_erf_ao_with1s
 
+!--
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)   :: i_ao, j_ao, n_points
+  double precision, intent(in)   :: beta, B_center(n_points,3), mu_in, C_center(n_points,3)
+  double precision, intent(out)  :: ints(n_points,3)
+
+  integer                        :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision               :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                        :: ipoint
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
+    return
+  endif
+
+  ints(:,:) = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, &
+            alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, &
+          alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s
+
+
 ! ---
 
 subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
@@ -361,7 +521,7 @@ subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
   !
   ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z
   END_DOC
- include 'utils/constants.include.F'                                                                                                                                  
+ include 'utils/constants.include.F'
  integer, intent(in) :: i_ao,j_ao,m
  double precision, intent(in) :: mu_in, C_center(3)
  double precision, intent(out):: ints
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index d515b0c1..facb6264 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -22,15 +22,15 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
  power_B(1:3)= ao_power(j,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
  do l=1,ao_prim_num(i)
-  alpha = ao_expo_ordered_transp(l,i)     
+  alpha = ao_expo_ordered_transp(l,i)
   do k=1,ao_prim_num(j)
-   beta = ao_expo_ordered_transp(k,j)     
+   beta = ao_expo_ordered_transp(k,j)
    call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp)
    do m = 1, 3
     gauss_ints(m) += gauss_ints_tmp(m) *  ao_coef_normalized_ordered_transp(l,i)             &
-                                       *  ao_coef_normalized_ordered_transp(k,j) 
+                                       *  ao_coef_normalized_ordered_transp(k,j)
    enddo
-  enddo 
+  enddo
  enddo
 end
 
@@ -61,13 +61,13 @@ double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,m
  power_B(1:3)= ao_power(j,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
  do l=1,ao_prim_num(i)
-  alpha = ao_expo_ordered_transp(l,i)     
+  alpha = ao_expo_ordered_transp(l,i)
   do k=1,ao_prim_num(j)
-   beta = ao_expo_ordered_transp(k,j)     
+   beta = ao_expo_ordered_transp(k,j)
    gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
    overlap_gauss_xyz_r12_ao_specific = gauss_int *  ao_coef_normalized_ordered_transp(l,i)             &
-                                                 *  ao_coef_normalized_ordered_transp(k,j) 
-  enddo 
+                                                 *  ao_coef_normalized_ordered_transp(k,j)
+  enddo
  enddo
 end
 
@@ -90,13 +90,13 @@ subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints)
    power_B(1:3)= ao_power(j,1:3)
    B_center(1:3) = nucl_coord(num_B,1:3)
    do l=1,ao_prim_num(i)
-    alpha = ao_expo_ordered_transp(l,i)     
+    alpha = ao_expo_ordered_transp(l,i)
     do k=1,ao_prim_num(j)
-     beta = ao_expo_ordered_transp(k,j)     
+     beta = ao_expo_ordered_transp(k,j)
      analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
      aos_ints(j,i) += analytical_j *  ao_coef_normalized_ordered_transp(l,i)             &
-                                   *  ao_coef_normalized_ordered_transp(k,j) 
-    enddo 
+                                   *  ao_coef_normalized_ordered_transp(k,j)
+    enddo
    enddo
   enddo
  enddo
@@ -114,7 +114,7 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
   implicit none
   integer,          intent(in) :: i, j
   double precision, intent(in) :: D_center(3), delta
- 
+
   integer                      :: power_A(3), power_B(3), l, k
   double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
 
@@ -133,23 +133,75 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
   B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
   do l = 1, ao_prim_num(i)
-    alpha = ao_expo_ordered_transp           (l,i)     
-    coef1 = ao_coef_normalized_ordered_transp(l,i) 
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
 
     do k = 1, ao_prim_num(j)
       beta = ao_expo_ordered_transp(k,j)
-      coef = coef1 * ao_coef_normalized_ordered_transp(k,j) 
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
 
       if(dabs(coef) .lt. 1d-12) cycle
 
       analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
 
       overlap_gauss_r12_ao += coef * analytical_j
-    enddo 
+    enddo
   enddo
 
 end function overlap_gauss_r12_ao
 
+! --
+
+subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j, n_points
+  double precision, intent(in) :: D_center(n_points,3), delta
+  double precision, intent(out) :: resv(n_points)
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1 
+  double precision, allocatable :: analytical_j(:)
+
+  double precision, external   :: overlap_gauss_r12
+  integer                        :: ipoint
+
+  resv(:) = 0.d0
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(analytical_j(n_points))
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points)
+      do ipoint=1, n_points
+        resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint)
+      enddo
+    enddo
+  enddo
+  deallocate(analytical_j)
+
+end
+
 ! ---
 
 double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j)
@@ -163,14 +215,13 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center,
   implicit none
   integer,          intent(in) :: i, j
   double precision, intent(in) :: B_center(3), beta, D_center(3), delta
- 
+
   integer                      :: power_A1(3), power_A2(3), l, k
   double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j
   double precision             :: G_center(3), gama, fact_g, gama_inv
 
   double precision, external   :: overlap_gauss_r12, overlap_gauss_r12_ao
 
-  ASSERT(beta .gt. 0.d0)
   if(beta .lt. 1d-10) then
     overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j)
     return
@@ -204,22 +255,118 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center,
   A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
   do l = 1, ao_prim_num(i)
-    alpha1 = ao_expo_ordered_transp                    (l,i)     
+    alpha1 = ao_expo_ordered_transp                    (l,i)
     coef1  = fact_g * ao_coef_normalized_ordered_transp(l,i)
     if(dabs(coef1) .lt. 1d-12) cycle
 
     do k = 1, ao_prim_num(j)
       alpha2 = ao_expo_ordered_transp                   (k,j)
-      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) 
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
       if(dabs(coef12) .lt. 1d-12) cycle
 
       analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2)
 
       overlap_gauss_r12_ao_with1s += coef12 * analytical_j
-    enddo 
+    enddo
   enddo
 
 end function overlap_gauss_r12_ao_with1s
 
 ! ---
 
+subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points)
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  ! using an array of D_centers.
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, n_points
+  double precision, intent(in)  :: B_center(3), beta, D_center(n_points,3), delta
+  double precision, intent(out) :: resv(n_points)
+
+  integer                      :: power_A1(3), power_A2(3), l, k
+  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
+  double precision             :: coef12, coef12f
+  double precision             :: gama, gama_inv
+  double precision             :: bg, dg, bdg
+
+  integer :: ipoint
+
+  double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:)
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ASSERT(beta .gt. 0.d0)
+
+  if(beta .lt. 1d-10) then
+    call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+    return
+  endif
+
+  resv(:) = 0.d0
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama        = beta + delta
+  gama_inv    = 1.d0 / gama
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+
+  bg = beta * gama_inv
+  dg = delta * gama_inv
+  bdg = bg * delta 
+  do ipoint=1,n_points
+    G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
+    G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
+    G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
+    fact_g(ipoint) = bdg * ( &
+          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
+        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
+        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+
+    if(fact_g(ipoint) < 10d0) then
+      fact_g(ipoint) = dexp(-fact_g(ipoint))
+    else
+      fact_g(ipoint) = 0.d0
+    endif
+
+  enddo
+
+    ! ---
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp                    (l,i)
+    coef1  = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(G_center, gama, A1_center,&
+            A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points)
+
+      do ipoint=1,n_points
+        coef12f = coef12 * fact_g(ipoint)
+        resv(ipoint) += coef12f * analytical_j(ipoint)
+      end do
+
+    enddo
+  enddo
+  deallocate (fact_g, G_center, analytical_j )
+
+
+end
+
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index 50b4bf96..cdc86456 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -11,60 +11,65 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
   implicit none
   integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: r(3), expo_fit, coef_fit
   double precision              :: coef, beta, B_center(3)
   double precision              :: tmp
   double precision              :: wall0, wall1
 
+  double precision, allocatable :: int_fit_v(:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
-  int2_grad1u2_grad2u2_j1b2 = 0.d0
+ int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,    &
+     !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,&
+     !$OMP          ao_overlap_abs)
 
-    do i = 1, ao_num
-      do j = i, ao_num
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
 
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
 
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+     do i_1s = 1, List_all_comb_b3_size
 
-          do i_fit = 1, n_max_fit_slat
+       coef        = List_all_comb_b3_coef  (i_1s)
+       beta        = List_all_comb_b3_expo  (i_1s)
+       B_center(1) = List_all_comb_b3_cent(1,i_1s)
+       B_center(2) = List_all_comb_b3_cent(2,i_1s)
+       B_center(3) = List_all_comb_b3_cent(3,i_1s)
 
-            expo_fit = expo_gauss_1_erf_x_2(i_fit)
-            coef_fit = coef_gauss_1_erf_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+       do i_fit = 1, n_max_fit_slat
 
-            tmp += -0.25d0 * coef * coef_fit * int_fit
-          enddo
-        enddo
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
 
-        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+               expo_fit, i, j, int_fit_v, n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
  !$OMP END DO
+ deallocate(int_fit_v)
  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
@@ -78,7 +83,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   call wall_time(wall1)
   print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
@@ -91,61 +96,60 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   END_DOC
 
   implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
-  double precision              :: coef, beta, B_center(3), tmp
-  double precision              :: wall0, wall1
+  integer                        :: i, j, ipoint, i_1s, i_fit
+  double precision               :: r(3), expo_fit, coef_fit
+  double precision               :: coef, beta, B_center(3), tmp
+  double precision               :: wall0, wall1
+  double precision, allocatable  :: int_fit_v(:)
 
-  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision, external     :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
-  int2_u2_j1b2 = 0.d0
+  int2_u2_j1b2(:,:,:) = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+      !$OMP          coef_fit, expo_fit, int_fit_v)                  &
+      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+      !$OMP          final_grid_points_transp, n_max_fit_slat,       &
+      !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,       &
+      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
+      !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+  allocate(int_fit_v(n_points_final_grid))
+  !$OMP DO SCHEDULE(dynamic)
+  do i = 1, ao_num
+    do j = i, ao_num
 
-    do i = 1, ao_num
-      do j = i, ao_num
+      do i_1s = 1, List_all_comb_b3_size
 
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+        coef        = List_all_comb_b3_coef  (i_1s)
+        beta        = List_all_comb_b3_expo  (i_1s)
+        B_center(1) = List_all_comb_b3_cent(1,i_1s)
+        B_center(2) = List_all_comb_b3_cent(2,i_1s)
+        B_center(3) = List_all_comb_b3_cent(3,i_1s)
 
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+        do i_fit = 1, n_max_fit_slat
 
-          do i_fit = 1, n_max_fit_slat
+          expo_fit = expo_gauss_j_mu_x_2(i_fit)
+          coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef
 
-            expo_fit = expo_gauss_j_mu_x_2(i_fit)
-            coef_fit = coef_gauss_j_mu_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+                expo_fit, i, j, int_fit_v, n_points_final_grid)
 
-            tmp += coef * coef_fit * int_fit
+          do ipoint = 1, n_points_final_grid
+            int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
           enddo
+
         enddo
 
-        int2_u2_j1b2(j,i,ipoint) = tmp
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  deallocate(int_fit_v)
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -158,7 +162,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   call wall_time(wall1)
   print*, ' wall time for int2_u2_j1b2', wall1 - wall0
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
@@ -171,84 +175,95 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   END_DOC
 
   implicit none
-  integer          :: i, j, ipoint, i_1s, i_fit
-  double precision :: r(3), int_fit(3), expo_fit, coef_fit
-  double precision :: coef, beta, B_center(3), dist
-  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
-  double precision :: tmp_x, tmp_y, tmp_z
-  double precision :: wall0, wall1
+  integer                        :: i, j, ipoint, i_1s, i_fit
+  double precision               :: r(3), expo_fit, coef_fit
+  double precision               :: coef, beta, B_center(3)
+  double precision               :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp
+  double precision               :: tmp_x, tmp_y, tmp_z
+  double precision               :: wall0, wall1
+  double precision, allocatable  :: int_fit_v(:,:), dist(:), centr_1s(:,:)
 
-  provide mu_erf final_grid_points j1b_pen
+  provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
-  int2_u_grad1u_x_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
- !$OMP DO
+  allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3))
   do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
+    r(1) = final_grid_points_transp(ipoint,1)
+    r(2) = final_grid_points_transp(ipoint,2)
+    r(3) = final_grid_points_transp(ipoint,3)
 
-    do i = 1, ao_num
-      do j = i, ao_num
+    dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1))  &
+                 + (B_center(2) - r(2)) * (B_center(2) - r(2))  &
+                 + (B_center(3) - r(3)) * (B_center(3) - r(3))
+  enddo
 
-        tmp_x = 0.d0
-        tmp_y = 0.d0
-        tmp_z = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
 
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+      !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s,        &
+      !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
+      !$OMP          tmp_x, tmp_y, tmp_z)                            &
+      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+      !$OMP          final_grid_points_transp, n_max_fit_slat, dist, &
+      !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,   &
+      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
+      !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+  allocate(int_fit_v(n_points_final_grid,3))
 
-          do i_fit = 1, n_max_fit_slat
+  do i_1s = 1, List_all_comb_b3_size
 
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    coef        = List_all_comb_b3_coef  (i_1s)
+    beta        = List_all_comb_b3_expo  (i_1s)
+    B_center(1) = List_all_comb_b3_cent(1,i_1s)
+    B_center(2) = List_all_comb_b3_cent(2,i_1s)
+    B_center(3) = List_all_comb_b3_cent(3,i_1s)
 
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
+    do i_fit = 1, n_max_fit_slat
 
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+      coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef
 
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
-            !if(expo_coef_1s .gt. 80.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
-            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+      alpha_1s     = beta + expo_fit
+      alpha_1s_inv = 1.d0 / alpha_1s
 
-            tmp_x += coef_tmp * int_fit(1)
-            tmp_y += coef_tmp * int_fit(2)
-            tmp_z += coef_tmp * int_fit(3)
+      do ipoint = 1, n_points_final_grid
+        r(1) = final_grid_points_transp(ipoint,1)
+        r(2) = final_grid_points_transp(ipoint,2)
+        r(3) = final_grid_points_transp(ipoint,3)
+
+        centr_1s(ipoint,1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+        centr_1s(ipoint,2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+        centr_1s(ipoint,3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      enddo
+
+      expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+      !$OMP BARRIER
+      !$OMP DO SCHEDULE(dynamic)
+      do i = 1, ao_num
+        do j = i, ao_num
+          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,&
+              1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid)
+
+          do ipoint = 1, n_points_final_grid
+            coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint))
+            int2_u_grad1u_x_j1b2(1,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1)
+            int2_u_grad1u_x_j1b2(2,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2)
+            int2_u_grad1u_x_j1b2(3,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3)
           enddo
         enddo
-
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
       enddo
+      !$OMP END DO NOWAIT
+
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  deallocate(int_fit_v)
+  !$OMP END PARALLEL
+
+  deallocate(dist)
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -263,7 +278,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   call wall_time(wall1)
   print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
@@ -291,11 +306,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
  !$OMP PARALLEL DEFAULT (NONE)                                      &
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
  !$OMP          final_grid_points, n_max_fit_slat,                  &
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
  !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -323,7 +338,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
             alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
+            alpha_1s_inv = 1.d0 / alpha_1s
             centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
             centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
@@ -332,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             !if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
             !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
+
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
             tmp += coef_tmp * int_fit
@@ -357,7 +372,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
   call wall_time(wall1)
   print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0
 
-END_PROVIDER 
+END_PROVIDER
 
 ! ---
 
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 42d37069..1178cc31 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -63,7 +63,6 @@ END_PROVIDER
       tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
     enddo
 
-    ASSERT(List_all_comb_b2_expo(i) .gt. 0d0)
     if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
 
     List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
@@ -177,8 +176,8 @@ END_PROVIDER
 
     enddo
 
-    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
     if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
 
     List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
     List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index 749227ea..1638aa9e 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -1,67 +1,139 @@
-
 double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
   BEGIN_DOC
   ! Computes the following integral :
   !
-  ! .. math::
-  ! 
+  ! .. math                      ::
+  !
   !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !
   END_DOC
 
- implicit none
+  implicit none
   include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
- double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
- integer, intent(in)             :: power_A(3),power_B(3)
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
 
- double precision  :: overlap_x,overlap_y,overlap_z,overlap
- ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
- double precision  :: A_center_new(3)   ! new center
- integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
- double precision  :: alpha_new         ! new exponent
- double precision  :: fact_a_new        ! constant factor
- double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
- integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
- dim1=100
- thr = 1.d-10
- d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=100
+  thr = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
 
- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
-                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
- ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
- accu = 0.d0
- do lx = 0, iorder_a_new(1)
-  coefx = A_new(lx,1)
-  if(dabs(coefx).lt.thr)cycle
-  iorder_tmp(1) = lx
-  do ly = 0, iorder_a_new(2)
-   coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
-   if(dabs(coefxy).lt.thr)cycle
-   iorder_tmp(2) = ly
-   do lz = 0, iorder_a_new(3)
-    coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
-    if(dabs(coefxyz).lt.thr)cycle
-    iorder_tmp(3) = lz
-    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-    accu += coefxyz * overlap
-   enddo
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+        accu += coefxyz * overlap
+      enddo
+    enddo
   enddo
- enddo
- overlap_gauss_r12 = fact_a_new * accu 
+  overlap_gauss_r12 = fact_a_new * accu
 end
 
+!---
+
+subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   using an array of D_centers
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  integer, intent(in)            :: n_points
+  double precision, intent(in)   :: D_center(n_points,3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+  double precision, intent(out)  :: rvec(n_points)
+
+  double precision, allocatable  :: overlap(:)
+  double precision               :: overlap_x, overlap_y, overlap_z
+
+  integer                        :: maxab
+  integer, allocatable           :: iorder_a_new(:)
+  double precision, allocatable  :: A_new(:,:,:), A_center_new(:,:)
+  double precision, allocatable  :: fact_a_new(:)
+  double precision               :: alpha_new
+  double precision               :: accu,thr, coefxy
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint
+
+  dim1=100
+  thr = 1.d-10
+  d(:) = 0
+
+  maxab = maxval(power_A(1:3))
+
+  allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), &
+            fact_a_new(n_points), iorder_a_new(3), overlap(n_points) )
+
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, &
+        alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, &
+        D_center, A_center, n_points)
+
+  do ipoint=1,n_points
+    rvec(ipoint) = 0.d0
+  enddo
+
+  do lx = 0, iorder_a_new(1)
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        iorder_tmp(3) = lz
+        call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points)
+        do ipoint=1,n_points
+          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * &
+                                        A_new(ipoint,ly,2) * &
+                                        A_new(ipoint,lz,3) * overlap(ipoint)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do ipoint=1,n_points
+    rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
+  enddo
+  deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
+end
+
+!---
+!---
 
 subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints)
   BEGIN_DOC
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !
   ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
@@ -69,14 +141,14 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_
 
  implicit none
   include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
  double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
  integer, intent(in)             :: power_A(3),power_B(3)
  double precision, intent(out)   :: gauss_ints(3)
 
  double precision  :: overlap_x,overlap_y,overlap_z,overlap
  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_new(0:max_dim,3)! new polynom
  double precision  :: A_center_new(3)   ! new center
  integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
  integer           :: power_B_new(3)
@@ -88,8 +160,8 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_
  thr = 1.d-10
  d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
 
- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
                                       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
  gauss_ints = 0.d0
@@ -99,12 +171,12 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_
   iorder_tmp(1) = lx
   do ly = 0, iorder_a_new(2)
    coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
+   coefxy = coefx * coefy
    if(dabs(coefxy).lt.thr)cycle
    iorder_tmp(2) = ly
    do lz = 0, iorder_a_new(3)
     coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
+    coefxyz = coefxy * coefz
     if(dabs(coefxyz).lt.thr)cycle
     iorder_tmp(3) = lz
     do m = 1, 3
@@ -129,7 +201,7 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !    \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !
   ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
@@ -137,13 +209,13 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
 
  implicit none
   include 'constants.include.F'
- double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
  double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
  integer, intent(in)             :: power_A(3),power_B(3),mx
 
  double precision  :: overlap_x,overlap_y,overlap_z,overlap
  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
- double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_new(0:max_dim,3)! new polynom
  double precision  :: A_center_new(3)   ! new center
  integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
  integer           :: power_B_new(3)
@@ -155,8 +227,8 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
  thr = 1.d-10
  d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
 
- ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
- call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
                                       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
  overlap_gauss_xyz_r12_specific = 0.d0
@@ -166,12 +238,12 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center
   iorder_tmp(1) = lx
   do ly = 0, iorder_a_new(2)
    coefy = A_new(ly,2)
-   coefxy = coefx * coefy 
+   coefxy = coefx * coefy
    if(dabs(coefxy).lt.thr)cycle
    iorder_tmp(2) = ly
    do lz = 0, iorder_a_new(3)
     coefz = A_new(lz,3)
-    coefxyz = coefxy * coefz 
+    coefxyz = coefxy * coefz
     if(dabs(coefxyz).lt.thr)cycle
     iorder_tmp(3) = lz
     m = mx
diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index 263e9845..96625df5 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -124,7 +124,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
   !
@@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
 
 end function NAI_pol_mult_erf
 
+! ---
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in, n_points
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(out) :: res_v(n_points)
+
+  integer                      :: i, n_pt, n_pt_out, ipoint
+  double precision             :: P_center(3)
+  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision             :: const_factor, dist_integral
+  double precision             :: accu, p_inv, p, rho, p_inv_2
+  double precision             :: p_new
+
+  double precision             :: rint
+
+  p       = alpha + beta
+  p_inv   = 1.d0 / p
+  p_inv_2 = 0.5d0 * p_inv
+  rho     = alpha * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+
+  dist          = 0.d0
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  do ipoint=1,n_points
+    dist_integral = 0.d0
+    do i = 1, 3
+      dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+    enddo
+    const_factor = dist * rho
+    if(const_factor > 80.d0) then
+      res_V(ipoint) = 0.d0
+      cycle
+    endif
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    n_pt  =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+    const = p * dist_integral * p_new * p_new
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+    p_new = p_new * p_new
+    call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  enddo
+
+end
+
 ! ---
 
 double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
@@ -207,7 +293,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
   !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
   !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
@@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
 
 end function NAI_pol_mult_erf_with1s
 
+!--
+
+subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2&
+      , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in)   :: n_pt_in, n_points
+  integer,          intent(in)   :: power_A1(3), power_A2(3)
+  double precision, intent(in)   :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3)
+  double precision, intent(in)   :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out)  :: res_v(n_points)
+
+  integer                        :: i, n_pt, n_pt_out, ipoint
+  double precision               :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision               :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision               :: dist_integral
+  double precision               :: d(0:n_pt_in), coeff, const, factor
+  double precision               :: accu
+  double precision               :: p_new, p_new2
+
+  double precision               :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+      + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+      + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+
+  if(const_factor12 > 80.d0) then
+    res_v(:) = 0.d0
+    return
+  endif
+
+    ! ---
+
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2 = p_new * p_new
+  n_pt  =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) &
+              + power_A1(3) + power_A2(3) )
+
+  do ipoint=1,n_points
+
+    P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+    P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+    P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+    dist   = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+        + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+        + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+
+    const_factor = const_factor12 + dist * rho
+    if(const_factor > 80.d0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+        + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+        + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+
+    ! ---
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    const = p * dist_integral * p_new2
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+
+    !TODO: VECTORIZE HERE
+    call give_polynomial_mult_center_one_e_erf_opt(                  &
+        A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  end do
+
+end
+
+! ---
 ! ---
 
 subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
@@ -432,10 +643,11 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt
 ! ---
 
 
+
 subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
       power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   BEGIN_DOC
-  ! Returns the explicit polynomial in terms of the $t$ variable of the 
+  ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90
index 141292d8..bad641ab 100644
--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@@ -1095,9 +1095,9 @@ double precision function overlap_orb_ylm_grid(nptsgrid,r_orb,npower_orb,center_
 implicit none
 !! PSEUDOS
 integer nptsgridmax,nptsgrid
-double precision coefs_pseudo,ptsgrid
 parameter(nptsgridmax=50)
-common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+double precision coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+common/pseudos/coefs_pseudo,ptsgrid
 !!!!!
 integer npower_orb(3),l,m,i
 double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3)
@@ -1235,10 +1235,10 @@ end
       subroutine initpseudos(nptsgrid)
       implicit none
       integer nptsgridmax,nptsgrid,ik
-      double precision coefs_pseudo,ptsgrid
       double precision p,q,r,s
       parameter(nptsgridmax=50)
-      common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+      double precision :: coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+      common/pseudos/coefs_pseudo,ptsgrid
 
       p=1.d0/dsqrt(2.d0)
       q=1.d0/dsqrt(3.d0)
diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f
index a72200f7..64b522d8 100644
--- a/src/becke_numerical_grid/grid_becke_vector.irp.f
+++ b/src/becke_numerical_grid/grid_becke_vector.irp.f
@@ -58,3 +58,18 @@ END_PROVIDER
   enddo
 
 END_PROVIDER
+
+
+BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)]
+  implicit none
+  BEGIN_DOC
+  !  final_grid_points_transp(j,1:3) = (/ x, y, z /) of the jth grid point
+  END_DOC
+  integer :: i
+  do i=1,n_points_final_grid
+    final_grid_points_transp(i,1) = final_grid_points(1,i)
+    final_grid_points_transp(i,2) = final_grid_points(2,i)
+    final_grid_points_transp(i,3) = final_grid_points(3,i)
+  enddo
+END_PROVIDER
+
diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f
index d83d69e9..f3eaff32 100644
--- a/src/bitmask/core_inact_act_virt.irp.f
+++ b/src/bitmask/core_inact_act_virt.irp.f
@@ -173,10 +173,7 @@ BEGIN_PROVIDER [integer, n_core_inact_act_orb ]
   END_DOC
   n_core_inact_act_orb = (n_core_orb + n_inact_orb + n_act_orb)
 END_PROVIDER
- 
 
-
- 
  BEGIN_PROVIDER [ integer(bit_kind), core_bitmask , (N_int,2) ]
    implicit none
    BEGIN_DOC
@@ -443,5 +440,4 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)]
   endif
  enddo
 
-END_PROVIDER 
-
+END_PROVIDER
diff --git a/src/cis/cis.irp.f b/src/cis/cis.irp.f
index cc047622..ab2294ad 100644
--- a/src/cis/cis.irp.f
+++ b/src/cis/cis.irp.f
@@ -79,6 +79,6 @@ subroutine run
   call ezfio_set_cis_energy(CI_energy)
   psi_coef = ci_eigenvectors
   SOFT_TOUCH psi_coef
-  call save_wavefunction_truncated(thresh_save_wf)
+  call save_wavefunction_truncated(save_threshold)
 
 end
diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90
index b701f0ec..73bd600d 100644
--- a/src/csf/cfgCI_interface.f90
+++ b/src/csf/cfgCI_interface.f90
@@ -46,6 +46,24 @@ module cfunctions
            real   (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax)
          end subroutine getCSFtoDETTransformationMatrix
       end interface
+       interface
+         subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt
+      end interface
+       interface
+         subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp')
+           import C_INT32_T, C_INT64_T, C_DOUBLE
+           integer(kind=C_INT32_T),value,intent(in) :: m
+           integer(kind=C_INT32_T),value,intent(in) :: n
+           real (kind=C_DOUBLE ),intent(in) :: A(m,n)
+           real (kind=C_DOUBLE ),intent(out) :: B(m,n)
+         end subroutine gramSchmidt_qp
+      end interface
 end module cfunctions
 
 subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) &
diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c
index 746de04e..3510db37 100644
--- a/src/csf/cfgCI_utils.c
+++ b/src/csf/cfgCI_utils.c
@@ -1,5 +1,6 @@
 #include <stdint.h>
 #include <stdio.h>
+#include <assert.h>
 #include "tree_utils.h"
 
 void int_to_bin_digit(int64_t in, int count, int* out)
@@ -28,19 +29,19 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){
     int nsomo = *inpnsomo;
     int ms = *inpms;
     int nparcoupl = (nsomo + ms)/2;
-    *outncsfs = binom(nsomo, nparcoupl);
+    *outncsfs = binom((double)nsomo, (double)nparcoupl);
 }
 
 void getncsfs(int NSOMO, int MS, int *outncsfs){
-    int nparcoupl = (NSOMO + MS)/2;
-    int nparcouplp1 = ((NSOMO + MS)/2)+1;
+    int nparcoupl = (NSOMO + MS)/2; // n_alpha
+    int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1
     double tmpndets=0.0;
     if(NSOMO == 0){
         (*outncsfs) = 1;
         return;
     }
-    tmpndets = binom(NSOMO, nparcoupl);
-    (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1));
+    tmpndets = binom((double)NSOMO, (double)nparcoupl);
+    (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1));
 }
 
 #include <stdint.h>
@@ -252,6 +253,27 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
     buildTreeDriver(bftree, *NSOMO, MS, NBF);
 }
 
+void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols);
+
+// QR to orthogonalize CSFs does not work
+//void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
+//  int i,j;
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",overlapMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//  // Call the function ortho_qr from qp
+//  ortho_qr_csf(overlapMatrix, rows, orthoMatrix, rows, cols);
+//  //for(j=0;j<cols;++j){
+//  //  for(i=0;i<rows;++i){
+//  //    printf(" %3.2f ",orthoMatrix[j*rows + i]);
+//  //  }
+//  //  printf("\n");
+//  //}
+//}
+
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix){
 
     // vector
@@ -341,8 +363,12 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
          Get BFtoDeterminant Matrix
     ************************************/
 
-    printf("In convertcsftodet\n");
+
+    //printf(" --- In convet ----\n");
     convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);
+    //printf(" --- done bf det basis ---- row=%d col=%d\n",rowsbftodetI,colsbftodetI);
+
+    //printRealMatrix(bftodetmatrixI,rowsbftodetI,colsbftodetI);
 
     int rowsI = 0;
     int colsI = 0;
@@ -350,6 +376,8 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
     //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
     getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
 
+    //printf("Overlap matrix\n");
+    //printRealMatrix(overlapMatrixI,rowsI,colsI);
 
     /***********************************
          Get Orthonormalization Matrix
@@ -359,6 +387,9 @@ void convertCSFtoDetBasis(int64_t Isomo, int MS, int rowsmax, int colsmax, doubl
 
     gramSchmidt(overlapMatrixI, rowsI, colsI, orthoMatrixI);
 
+    //printf("Ortho matrix\n");
+    //printRealMatrix(orthoMatrixI,rowsI,colsI);
+
     /***********************************
          Get Final CSF to Det Matrix
     ************************************/
@@ -1340,11 +1371,11 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
     for(int i = 0; i < npairs; i++){
         for(int j = 0; j < NSOMO; j++) {
             inpdet[j] = detslist[i*NSOMO + j];
-            printf(" %d ",inpdet[j]);
+            //printf(" %d ",inpdet[j]);
         }
-        printf("\n");
+        //printf("\n");
         findAddofDetDriver(dettree, NSOMO, inpdet, &addr);
-        printf("(%d) - addr  = %d\n",i,addr);
+        //printf("(%d) - addr  = %d\n",i,addr);
         // Calculate the phase for cfg to QP2 conversion
         //get_phase_cfg_to_qp_inpList(inpdet, NSOMO, &phase_cfg_to_qp);
         //rowvec[addr] = 1.0 * phaselist[i]*phase_cfg_to_qp/sqrt(fac);
@@ -1363,12 +1394,23 @@ void getbftodetfunction(Tree *dettree, int NSOMO, int MS, int *BF1, double *rowv
 void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *rows, int *cols){
 
     int NSOMO=0;
+    //printf("before getSetBits Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
     getSetBits(Isomo, &NSOMO);
+    //printf("Isomo=%ld, NSOMO=%ld\n",Isomo,NSOMO);
     int ndets = 0;
     int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    // MS = alpha_num - beta_num
+    int nalpha = (NSOMO + MS)/2;
+    //printf(" in convertbftodet : MS=%d nalpha=%3.2f\n",MS,nalpha);
+    //ndets = (int)binom(dNSOMO, nalpha);
+    if(NSOMO > 0){
+      ndets = (int)binom((double)NSOMO, (double)nalpha);
+    }
+    else if(NSOMO == 0){
+      ndets = 1;
+    }
+    else printf("Something is wrong in calcMEdetpair\n");
 
     Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
     dettree.rootNode = malloc(sizeof(Node));
@@ -1389,16 +1431,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
     }
     else{
 
-    //int addr = -1;
-    //int inpdet[NSOMO];
-    //inpdet[0] = 1;
-    //inpdet[1] = 1;
-    //inpdet[2] = 1;
-    //inpdet[3] = 0;
-    //inpdet[4] = 0;
-    //inpdet[5] = 0;
-
-    //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr);
 
     int detlist[ndets];
     getDetlistDriver(&dettree, NSOMO, detlist);
@@ -1411,6 +1443,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int *
     generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO);
 
     // Initialize transformation matrix
+    //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO);
+    assert( NBF > 0);
+    assert( ndets > 0);
     (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double));
     (*rows) = NBF;
     (*cols) = ndets;
@@ -1465,9 +1500,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co
     getSetBits(Isomo, &NSOMO);
     int ndets = 0;
     int NBF = 0;
-    double dNSOMO = NSOMO*1.0;
-    double nalpha = (NSOMO + MS)/2.0;
-    ndets = (int)binom(dNSOMO, nalpha);
+    //double dNSOMO = NSOMO*1.0;
+    //double nalpha = (NSOMO + MS)/2.0;
+    int nalpha = (NSOMO + MS)/2;
+    ndets = (int)binom((double)NSOMO, (double)nalpha);
 
     Tree dettree = (Tree){  .rootNode = NULL, .NBF = -1 };
     dettree.rootNode = malloc(sizeof(Node));
@@ -1551,6 +1587,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso
     getncsfs(NSOMOJ, MS, &NBFJ);
     (*rowsout) = NBFI;
     (*colsout) = NBFJ;
+  //exit(0);
 }
 
 void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){
@@ -1669,6 +1706,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int rowsbftodetI, colsbftodetI;
 
+    //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS);
     convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI);
 
     // Fill matrix
@@ -1676,8 +1714,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     int colsI = 0;
 
     //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
-    //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    //printf("Isomo=%ld\n",Isomo);
     getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO);
+    if(Isomo == 0){
+      rowsI = 1;
+      colsI = 1;
+    }
+
+    //printf("Isomo=%ld\n",Isomo);
 
     orthoMatrixI = malloc(rowsI*colsI*sizeof(double));
 
@@ -1689,6 +1733,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int rowsbftodetJ, colsbftodetJ;
 
+    //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS);
     convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ);
 
     int rowsJ = 0;
@@ -1696,6 +1741,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     // Fill matrix
     //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
     getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO);
+    if(Jsomo == 0){
+      rowsJ = 1;
+      colsJ = 1;
+    }
 
     orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double));
 
@@ -1713,18 +1762,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
 
     int transA=false;
     int transB=false;
+    //printf("1Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA);
     callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB);
+    //printf("done\n");
 
     // now transform I in csf basis
     double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double));
     transA = false;
     transB = false;
+    //printf("2Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA);
     callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB);
 
     // now transform J in BF basis
     double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double));
     transA = false;
     transB = true;
+    //printf("3Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ);
     callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB);
 
     // now transform J in CSF basis
@@ -1735,13 +1791,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in
     double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double));
     transA = false;
     transB = true;
+    //printf("4Calling blas\n");
+    //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ);
     callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB);
     // Transfer to actual buffer in Fortran order
     for(int i = 0; i < rowsI; i++)
         for(int j = 0; j < rowsJ; j++)
             CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j];
 
-
     // Garbage collection
     free(overlapMatrixI);
     free(overlapMatrixJ);
diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index f73362eb..cea7640c 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -1,3 +1,592 @@
+use bitmasks
+
+ BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))]
+&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ]
+  implicit none
+  !use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer                        :: idxI ! The id of the Ith CFG
+  integer(bit_kind)              :: Icfg(N_int,2)
+  integer                        :: NalphaIcfg
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                      :: Idomo
+  integer*8                      :: Isomo
+  integer*8                      :: Jdomo
+  integer*8                      :: Jsomo
+  integer*8                      :: diffSOMO
+  integer*8                      :: diffDOMO
+  integer*8                      :: xordiffSOMODOMO
+  integer                        :: ndiffSOMO
+  integer                        :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: ndiffAll
+  integer                        :: i,ii
+  integer                        :: j,jj
+  integer                        :: k,kk
+  integer                        :: kstart
+  integer                        :: kend
+  integer                        :: Nsomo_I
+  integer                        :: hole
+  integer                        :: p
+  integer                        :: q
+  integer                        :: countalphas
+  logical                        :: pqAlreadyGenQ
+  logical                        :: pqExistsQ
+  logical                        :: ppExistsQ
+  integer*8                      :: MS
+
+  double precision               :: t0, t1
+  call wall_time(t0)
+
+  MS = elec_alpha_num-elec_beta_num
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  NalphaIcfg_list = 0
+
+  do idxI = 1, N_configuration
+
+    Icfg  = psi_configuration(:,:,idxI)
+
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+
+    ! find out all pq holes possible
+    nholes = 0
+    ! holes in SOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+      endif
+    end do
+    ! holes in DOMO
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+      endif
+    end do
+
+    ! find vmos
+    listvmos = -1
+    vmotype = -1
+    nvmos = 0
+    do ii = 1,n_act_orb
+      i = list_act(ii)
+      if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then
+        if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 1
+        else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then
+          nvmos += 1
+          listvmos(nvmos) = i
+          vmotype(nvmos) = 2
+        end if
+      end if
+    end do
+
+    tableUniqueAlphas = .FALSE.
+
+    ! Now find the allowed (p,q) excitations
+    Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+    Nsomo_I = POPCNT(Isomo)
+    if(Nsomo_I .EQ. 0) then
+      kstart = 1
+    else
+      kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+    endif
+    kend = idxI-1
+
+    do i = 1,nholes
+      p = listholes(i)
+      do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+          ! SOMO -> VMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = Idomo
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+          ! SOMO -> SOMO
+          Jsomo = IBCLR(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBSET(Idomo,q-1)
+          ! Check for Minimal alpha electrons (MS)
+          if(POPCNT(Jsomo).ge.MS)then
+            kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+            kend = idxI-1
+          else
+            cycle
+          endif
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+          ! DOMO -> VMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBSET(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          kstart = cfg_seniority_index(Nsomo_I)
+          kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+          ! DOMO -> SOMO
+          Jsomo = IBSET(Isomo,p-1)
+          Jsomo = IBCLR(Jsomo,q-1)
+          Jdomo = IBCLR(Idomo,p-1)
+          Jdomo = IBSET(Jdomo,q-1)
+          kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+          kend = idxI-1
+        else
+          print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+        ! Check for Minimal alpha electrons (MS)
+        if(POPCNT(Jsomo).lt.MS)then
+          cycle
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniority to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+          diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)))
+          ndiffSOMO = POPCNT(diffSOMO)
+          if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+          diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)))
+          xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+          ndiffDOMO = POPCNT(diffDOMO)
+          nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+          nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+          !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+          if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+            pqAlreadyGenQ = .TRUE.
+            ppExistsQ = .TRUE.
+            EXIT
+          endif
+          if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+            pqAlreadyGenQ = .TRUE.
+            EXIT
+          endif
+        end do
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+
+        if(.NOT. pqExistsQ) then
+          tableUniqueAlphas(p,q) = .TRUE.
+        endif
+      end do
+    end do
+
+    !print *,tableUniqueAlphas(:,:)
+
+    ! prune list of alphas
+    Isomo = Icfg(1,1)
+    Idomo = Icfg(1,2)
+    Jsomo = Icfg(1,1)
+    Jdomo = Icfg(1,2)
+    NalphaIcfg = 0
+    do i = 1, nholes
+      p = listholes(i)
+      do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+          if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+            ! SOMO -> VMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = Idomo
+          else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+            ! SOMO -> SOMO
+            Jsomo = IBCLR(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBSET(Idomo,q-1)
+            if(POPCNT(Jsomo).ge.MS)then
+              kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+              kend = idxI-1
+            else
+              cycle
+            endif
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+            ! DOMO -> VMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBSET(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+          else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+            ! DOMO -> SOMO
+            Jsomo = IBSET(Isomo,p-1)
+            Jsomo = IBCLR(Jsomo,q-1)
+            Jdomo = IBCLR(Idomo,p-1)
+            Jdomo = IBSET(Jdomo,q-1)
+          else
+            print*,"Something went wrong in obtain_associated_alphaI"
+          endif
+
+          ! SOMO
+          !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+          if(POPCNT(Jsomo) .ge. NSOMOMin) then
+            NalphaIcfg += 1
+            alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo
+            alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+            NalphaIcfg_list(idxI) = NalphaIcfg
+          endif
+        endif
+      end do
+    end do
+
+    ! Check if this Icfg has been previously generated as a mono
+    ppExistsQ = .False.
+    Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1))
+    Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2))
+    kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+    do k = kstart, idxI-1
+      diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+      ndiffSOMO = POPCNT(diffSOMO)
+      if (ndiffSOMO /= 2) cycle
+      diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+      xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+      ndiffDOMO = POPCNT(diffDOMO)
+      nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+      if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then
+        ppExistsQ = .TRUE.
+        EXIT
+      endif
+    end do
+    ! Diagonal part (pp,qq)
+    if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+      ! SOMO
+      if(POPCNT(Jsomo) .ge. NSOMOMin) then
+        NalphaIcfg += 1
+        alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1)
+        alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2)
+        NalphaIcfg_list(idxI) = NalphaIcfg
+      endif
+    endif
+
+    NalphaIcfg = 0
+  enddo ! end loop idxI
+  call wall_time(t1)
+  print *, 'Preparation : ', t1 - t0
+
+END_PROVIDER
+
+  subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for alphasI
+  ! Returns the associated alpha's for
+  ! the input configuration Icfg.
+  END_DOC
+
+  integer,intent(in)                 :: idxI ! The id of the Ith CFG
+  integer(bit_kind),intent(in)       :: Icfg(N_int,2)
+  integer,intent(out)                :: NalphaIcfg
+  integer(bit_kind),intent(out)      :: alphasIcfg(N_int,2,*)
+  logical,dimension(:,:),allocatable :: tableUniqueAlphas
+  integer                            :: listholes(mo_num)
+  integer                            :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                            :: nholes
+  integer                            :: nvmos
+  integer                            :: listvmos(mo_num)
+  integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer*8                          :: Idomo
+  integer*8                          :: Isomo
+  integer*8                          :: Jdomo
+  integer*8                          :: Jsomo
+  integer*8                          :: diffSOMO
+  integer*8                          :: diffDOMO
+  integer*8                          :: xordiffSOMODOMO
+  integer                            :: ndiffSOMO
+  integer                            :: ndiffDOMO
+  integer                            :: nxordiffSOMODOMO
+  integer                            :: ndiffAll
+  integer                            :: i, ii
+  integer                            :: j, jj
+  integer                            :: k, kk
+  integer                            :: kstart
+  integer                            :: kend
+  integer                            :: Nsomo_I
+  integer                            :: hole
+  integer                            :: p
+  integer                            :: q
+  integer                            :: countalphas
+  logical                            :: pqAlreadyGenQ
+  logical                            :: pqExistsQ
+  logical                            :: ppExistsQ
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  !print*,"Input cfg"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  ! find out all pq holes possible
+  nholes = 0
+  ! holes in SOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 1
+     endif
+  end do
+  ! holes in DOMO
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then
+        nholes += 1
+        listholes(nholes) = i
+        holetype(nholes) = 2
+     endif
+  end do
+
+  ! find vmos
+  listvmos = -1
+  vmotype = -1
+  nvmos = 0
+  do ii = 1,n_act_orb
+    i = list_act(ii)
+     !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+     if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 1
+     else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then
+        nvmos += 1
+        listvmos(nvmos) = i
+        vmotype(nvmos) = 2
+     end if
+  end do
+
+  !print *,"Nvmo=",nvmos
+  !print *,listvmos
+  !print *,vmotype
+
+  allocate(tableUniqueAlphas(mo_num,mo_num))
+  tableUniqueAlphas = .FALSE.
+
+  ! Now find the allowed (p,q) excitations
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  Nsomo_I = POPCNT(Isomo)
+  if(Nsomo_I .EQ. 0) then
+    kstart = 1
+  else
+    kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))
+  endif
+  kend = idxI-1
+  !print *,"Isomo"
+  !call debug_spindet(Isomo,1)
+  !call debug_spindet(Idomo,1)
+
+  !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI
+  !do i = 1,nholes
+  !   print *,i,"->",listholes(i)
+  !enddo
+  !do i = 1,nvmos
+  !   print *,i,"->",listvmos(i)
+  !enddo
+
+  do i = 1,nholes
+     p = listholes(i)
+     do j = 1,nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+           ! SOMO -> VMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = Idomo
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+           ! SOMO -> SOMO
+           Jsomo = IBCLR(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBSET(Idomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4)))
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+           ! DOMO -> VMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBSET(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           kstart = cfg_seniority_index(Nsomo_I)
+           kend = idxI-1
+        else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+           ! DOMO -> SOMO
+           Jsomo = IBSET(Isomo,p-1)
+           Jsomo = IBCLR(Jsomo,q-1)
+           Jdomo = IBCLR(Idomo,p-1)
+           Jdomo = IBSET(Jdomo,q-1)
+           kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)))
+           kend = idxI-1
+        else
+           print*,"Something went wrong in obtain_associated_alphaI"
+        endif
+
+        ! Again, we don't have to search from 1
+        ! we just use seniortiy to find the
+        ! first index with NSOMO - 2 to NSOMO + 2
+        ! this is what is done in kstart, kend
+
+        pqAlreadyGenQ = .FALSE.
+        ! First check if it can be generated before
+        do k = kstart, kend
+           diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+           ndiffSOMO = POPCNT(diffSOMO)
+           if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle
+           diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+           xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+           ndiffDOMO = POPCNT(diffDOMO)
+           nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+           nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO
+           !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then
+           if((ndiffSOMO+ndiffDOMO) .EQ. 0) then
+              pqAlreadyGenQ = .TRUE.
+              ppExistsQ = .TRUE.
+              EXIT
+           endif
+           if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+              pqAlreadyGenQ = .TRUE.
+              !EXIT
+              !ppExistsQ = .TRUE.
+              !print *,i,k,ndiffSOMO,ndiffDOMO
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Jdomo,1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1)
+              !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1)
+              EXIT
+           endif
+        end do
+
+        !print *,"(,",p,",",q,")",pqAlreadyGenQ
+
+        if(pqAlreadyGenQ) cycle
+
+        pqExistsQ = .FALSE.
+        ! now check if this exists in the selected list
+        !do k = idxI+1, N_configuration
+        !   diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k))
+        !   diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k))
+        !   ndiffSOMO = POPCNT(diffSOMO)
+        !   ndiffDOMO = POPCNT(diffDOMO)
+        !   if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        !      pqExistsQ = .TRUE.
+        !      EXIT
+        !   endif
+        !end do
+
+        if(.NOT. pqExistsQ) then
+           tableUniqueAlphas(p,q) = .TRUE.
+           !print *,p,q
+           !call debug_spindet(Jsomo,1)
+           !call debug_spindet(Jdomo,1)
+        endif
+     end do
+  end do
+
+  !print *,tableUniqueAlphas(:,:)
+
+  ! prune list of alphas
+  Isomo = Icfg(1,1)
+  Idomo = Icfg(1,2)
+  Jsomo = Icfg(1,1)
+  Jdomo = Icfg(1,2)
+  NalphaIcfg = 0
+  do i = 1, nholes
+     p = listholes(i)
+     do j = 1, nvmos
+        q = listvmos(j)
+        if(p .EQ. q) cycle
+        if(tableUniqueAlphas(p,q)) then
+           if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then
+              ! SOMO -> VMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = Idomo
+           else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then
+              ! SOMO -> SOMO
+              Jsomo = IBCLR(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBSET(Idomo,q-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then
+              ! DOMO -> VMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBSET(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+           else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then
+              ! DOMO -> SOMO
+              Jsomo = IBSET(Isomo,p-1)
+              Jsomo = IBCLR(Jsomo,q-1)
+              Jdomo = IBCLR(Idomo,p-1)
+              Jdomo = IBSET(Jdomo,q-1)
+           else
+              print*,"Something went wrong in obtain_associated_alphaI"
+           endif
+
+           ! SOMO
+           NalphaIcfg += 1
+           !print *,i,j,"|",NalphaIcfg
+           alphasIcfg(1,1,NalphaIcfg) = Jsomo
+           alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+           !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1)
+        endif
+     end do
+  end do
+
+  ! Check if this Icfg has been previously generated as a mono
+  ppExistsQ = .False.
+  Isomo = iand(act_bitmask(1,1),Icfg(1,1))
+  Idomo = iand(act_bitmask(1,1),Icfg(1,2))
+  do k = 1, idxI-1
+     diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k)))
+     diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k)))
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffSOMO = POPCNT(diffSOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        ppExistsQ = .TRUE.
+        EXIT
+     endif
+  end do
+  ! Diagonal part (pp,qq)
+  if(nholes > 0 .AND. (.NOT. ppExistsQ))then
+     ! SOMO
+     NalphaIcfg += 1
+     !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg
+     !call debug_spindet(Idomo,1)
+     !call debug_spindet(Jdomo,1)
+     alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1)
+     alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2)
+  endif
+
+  end subroutine
+
   function getNSOMO(Icfg) result(NSOMO)
     implicit none
     integer(bit_kind),intent(in)   :: Icfg(N_int,2)
@@ -8,98 +597,3 @@
        NSOMO += POPCNT(Icfg(i,1))
     enddo
   end function getNSOMO
-
-subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel)
-  implicit none
-  BEGIN_DOC
-  ! This function converts the orbital ids
-  ! in real space to those used in model space
-  ! in order to identify the matrices required
-  ! for the calculation of MEs.
-  !
-  ! The type of excitations are ordered as follows:
-  ! Type 1 - SOMO -> SOMO
-  ! Type 2 - DOMO -> VMO
-  ! Type 3 - SOMO -> VMO
-  ! Type 4 - DOMO -> SOMO
-  END_DOC
-  integer(bit_kind),intent(in)         :: Ialpha(N_int,2)
-  integer(bit_kind),intent(in)         :: Jcfg(N_int,2)
-  integer,intent(in)                   :: p,q
-  integer,intent(in)                   :: extype
-  integer,intent(out)                  :: pmodel,qmodel
-  integer*8                            :: Isomo
-  integer*8                            :: Idomo
-  integer*8                            :: Jsomo
-  integer*8                            :: Jdomo
-  integer*8                            :: mask
-  integer*8                            :: Isomotmp
-  integer*8                            :: Jsomotmp
-  integer                              :: pos0,pos0prev
-
-  ! TODO Flag (print) when model space indices is > 64
-  Isomo = Ialpha(1,1)
-  Idomo = Ialpha(1,2)
-  Jsomo = Jcfg(1,1)
-  Jdomo = Jcfg(1,2)
-  pos0prev = 0
-  pmodel = p
-  qmodel = q
-
-  if(p .EQ. q) then
-     pmodel = 1
-     qmodel = 1
-  else
-     !print *,"input pq=",p,q,"extype=",extype
-     !call debug_spindet(Isomo,1)
-     !call debug_spindet(Idomo,1)
-     !call debug_spindet(Jsomo,1)
-     !call debug_spindet(Jdomo,1)
-     select case(extype)
-       case (1)
-          ! SOMO -> SOMO
-          ! remove all domos
-          !print *,"type -> SOMO -> SOMO"
-          mask = ISHFT(1_8,p) - 1
-          Isomotmp = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomotmp = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
-       case (2)
-          ! DOMO -> VMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> VMO"
-          mask = ISHFT(1_8,p) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomotmp = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
-       case (3)
-          ! SOMO -> VMO
-          !print *,"type -> SOMO -> VMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Isomo = IAND(Isomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Jsomo = IAND(Jsomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-       case (4)
-          ! DOMO -> SOMO
-          ! remove all domos except one at p
-          !print *,"type -> DOMO -> SOMO"
-          !Isomo = IEOR(Isomo,Jsomo)
-          mask = ISHFT(1_8,p) - 1
-          Jsomo = IAND(Jsomo,mask)
-          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
-          mask = ISHFT(1_8,q) - 1
-          Isomo = IAND(Isomo,mask)
-          qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
-       case default
-          print *,"something is wrong in convertOrbIdsToModelSpaceIds"
-     end select
-  endif
-     !print *,p,q,"model ids=",pmodel,qmodel
-end subroutine convertOrbIdsToModelSpaceIds
diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f
index 8e2a513c..c11a49a4 100644
--- a/src/csf/configurations.irp.f
+++ b/src/csf/configurations.irp.f
@@ -458,8 +458,9 @@ end
 
 END_PROVIDER
 
- BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ]
+ BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ]
 &BEGIN_PROVIDER [ integer, cfg_nsomo_max ]
+&BEGIN_PROVIDER [ integer, cfg_nsomo_min ]
   implicit none
   BEGIN_DOC
  ! Returns the index in psi_configuration of the first cfg with
@@ -467,9 +468,10 @@ END_PROVIDER
  !
  ! cfg_nsomo_max : Max number of SOMO in the current wave function
  END_DOC
- integer :: i, k, s, sold
+ integer :: i, k, s, sold, soldmin
  cfg_seniority_index(:) = -1
  sold = -1
+ soldmin = 2000
  cfg_nsomo_max = 0
  do i=1,N_configuration
    s = 0
@@ -482,6 +484,10 @@ END_PROVIDER
      cfg_seniority_index(s) = i
      cfg_nsomo_max = s
    endif
+   if (soldmin .GT. s ) then
+     soldmin = s
+     cfg_nsomo_min = s
+   endif
  enddo
 END_PROVIDER
 
@@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d
  enddo
 END_PROVIDER
 
-subroutine binary_search_cfg(cfgInp,addcfg)
+subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp)
   use bitmasks
   implicit none
   BEGIN_DOC
   ! Documentation for binary_search
-  ! 
-  ! Does a binary search to find 
+  !
+  ! Does a binary search to find
   ! the address of a configuration in a list of
   ! configurations.
   END_DOC
   integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
   integer          , intent(out) :: addcfg
-  integer :: i,j,k,r,l
-  integer*8 :: key, key2
-  logical :: found
-  !integer*8, allocatable :: bit_tmp(:)
-  !integer*8, external            :: configuration_search_key
+  integer*8,          intent(in) :: bit_tmp(0:N_configuration+1)
 
-  !allocate(bit_tmp(0:N_configuration))
-  !bit_tmp(0) = 0
-  do i=1,N_configuration
-    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
-    found = .True.
-    do k=1,N_int
-      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
-                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
-    enddo
-    if (found) then
-      addcfg = i
-      exit
+  logical                        :: found
+  integer                        :: l, r, j, k
+  integer*8                      :: key
+
+  integer*8, external            :: configuration_search_key
+
+  key = configuration_search_key(cfgInp,N_int)
+
+  ! Binary search
+  l = 0
+  r = N_configuration+1
+IRP_IF WITHOUT_SHIFTRL
+  j = ishft(r-l,-1)
+IRP_ELSE
+  j = shiftr(r-l,1)
+IRP_ENDIF
+  do while (j>=1)
+    j = j+l
+    if (bit_tmp(j) == key) then
+      ! Find 1st element which matches the key
+      if (j > 1) then
+        do while (j>1 .and. bit_tmp(j-1) == key)
+          j = j-1
+        enddo
+      endif
+      ! Find correct element matching the key
+      do while (bit_tmp(j) == key)
+        found = .True.
+        do k=1,N_int
+          found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))&
+                        .and. (psi_configuration(k,2,j) == cfgInp(k,2))
+        enddo
+        if (found) then
+          addcfg = j
+          return
+        endif
+        j = j+1
+      enddo
+      addcfg = -1
+      return
+    else if (bit_tmp(j) > key) then
+      r = j
+    else
+      l = j
     endif
+IRP_IF WITHOUT_SHIFTRL
+    j = ishft(r-l,-1)
+IRP_ELSE
+    j = shiftr(r-l,1)
+IRP_ENDIF
   enddo
 
+  addcfg = -1
+  return
+
 end subroutine
 
+!subroutine binary_search_cfg(cfgInp,addcfg)
+!  use bitmasks
+!  implicit none
+!  BEGIN_DOC
+!  ! Documentation for binary_search
+!  ! 
+!  ! Does a binary search to find 
+!  ! the address of a configuration in a list of
+!  ! configurations.
+!  END_DOC
+!  integer(bit_kind), intent(in)  :: cfgInp(N_int,2)
+!  integer          , intent(out) :: addcfg
+!  integer :: i,j,k,r,l
+!  integer*8 :: key, key2
+!  logical :: found
+!  !integer*8, allocatable :: bit_tmp(:)
+!  !integer*8, external            :: configuration_search_key
+!
+!  !allocate(bit_tmp(0:N_configuration))
+!  !bit_tmp(0) = 0
+!  do i=1,N_configuration
+!    !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int)
+!    found = .True.
+!    do k=1,N_int
+!      found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) &
+!                    .and. (psi_configuration(k,2,i) == cfgInp(k,2))
+!    enddo
+!    if (found) then
+!      addcfg = i
+!      exit
+!    endif
+!  enddo
+!
+!end subroutine
+!
  BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ]
 &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ]
 
diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f
index fecc6123..bdd8b327 100644
--- a/src/csf/conversion.irp.f
+++ b/src/csf/conversion.irp.f
@@ -1,3 +1,16 @@
+BEGIN_PROVIDER [ double precision, psi_csf_coef, (N_csf, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Wafe function in CSF basis
+ END_DOC
+
+ double precision, allocatable :: buffer(:,:)
+ allocate ( buffer(N_det, N_states) )
+ buffer(1:N_det, 1:N_states) = psi_coef(1:N_det, 1:N_states)
+ call convertWFfromDETtoCSF(N_states, buffer, psi_csf_coef)
+END_PROVIDER
+
+
 subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
   use cfunctions
   use bitmasks
@@ -12,7 +25,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
   double precision, intent(out)  :: psi_coef_cfg_out(n_CSF,N_st)
   integer*8                      :: Isomo, Idomo, mask
   integer(bit_kind)              :: Ialpha(N_int) ,Ibeta(N_int)
-  integer                        :: rows, cols, i, j, k
+  integer                        :: rows, cols, i, j, k, salpha
   integer                        :: startdet, enddet
   integer                        :: ndetI
   integer                        :: getNSOMO
@@ -26,6 +39,8 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
 
   integer s, bfIcfg
   integer countcsf
+  integer MS
+  MS = elec_alpha_num-elec_beta_num
   countcsf = 0
   phasedet = 1.0d0
   do i = 1,N_configuration
@@ -44,12 +59,19 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out)
       enddo
     enddo
 
-    s = 0
+    s = 0 ! s == total number of SOMOs
     do k=1,N_int
       if (psi_configuration(k,1,i) == 0_bit_kind) cycle
       s = s + popcnt(psi_configuration(k,1,i))
     enddo
-    bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+
+    if(iand(s,1) .EQ. 0) then
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+    else
+      salpha = (s + MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+    endif
 
     ! perhaps blocking with CFGs of same seniority
     ! can be more efficient
@@ -80,7 +102,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   double precision,intent(in)    :: psi_coef_cfg_in(n_CSF,N_st)
   double precision,intent(out)   :: psi_coef_det(N_det,N_st)
   double precision               :: tmp_psi_coef_det(maxDetDimPerBF,N_st)
-  integer                        :: s, bfIcfg
+  integer                        :: s, bfIcfg, salpha
   integer                        :: countcsf
   integer(bit_kind)              :: Ialpha(N_int), Ibeta(N_int)
   integer                        :: rows, cols, i, j, k
@@ -91,6 +113,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
   double precision,allocatable   :: tempCoeff (:,:)
   double precision               :: phasedet
   integer                        :: idx
+  integer MS
+  MS = elec_alpha_num-elec_beta_num
 
   countcsf = 0
 
@@ -104,7 +128,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det)
       if (psi_configuration(k,1,i) == 0_bit_kind) cycle
       s = s + popcnt(psi_configuration(k,1,i))
     enddo
-    bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+    salpha = (s + MS)/2
+    bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
 
     allocate(tempCoeff(bfIcfg,N_st))
 
diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f
index c1560148..1c59d579 100644
--- a/src/csf/create_excitations.irp.f
+++ b/src/csf/create_excitations.irp.f
@@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint)
   enddo
 end
 
-subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
+subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint)
   implicit none
   use bitmasks
   BEGIN_DOC
@@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
   ! ex_type_singles : on output contains type of excitations  :
   !
   END_DOC
+  integer*8, intent(in)          :: bit_tmp(0:N_configuration+1)
   integer, intent(in)            :: Nint
   integer, intent(inout)         :: n_singles
   integer, intent(out)           :: idxs_singles(*)
@@ -248,20 +249,26 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin
   integer(bit_kind)              :: Jdet(Nint,2)
 
   integer           :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg
+  integer           :: ii,kk
   integer(bit_kind) :: single(Nint,2)
   logical           :: i_ok
 
+
   n_singles = 0
   !TODO
   !Make list of Somo  and Domo for holes
   !Make list of Unocc and Somo for particles
-  do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
-    do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+  !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb
+  do ii = 1, n_act_orb
+    i_hole = list_act(ii)
+    !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb
+    do kk = 1, n_act_orb
+      i_particle = list_act(kk)
       if(i_hole .EQ. i_particle) cycle
       addcfg = -1
       call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok)
       if (i_ok) then
-        call binary_search_cfg(single,addcfg)
+        call binary_search_cfg(single,addcfg,bit_tmp)
         if(addcfg .EQ. -1) cycle
         n_singles = n_singles + 1
         do k=1,Nint
diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f
new file mode 100644
index 00000000..7d7ae09b
--- /dev/null
+++ b/src/csf/obtain_I_foralpha.irp.f
@@ -0,0 +1,397 @@
+subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! givenI by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: givenI(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: ntotalconnectedI
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                                  :: nxordiffSOMODOMO
+  integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer :: listholes(mo_num)
+  integer :: holetype(mo_num)
+  integer :: end_index
+  integer :: Nsomo_I
+
+  ! 
+  ! 2 2 1 1 0 0 : 1 1 0 0 0 0
+  !               0 0 1 1 0 0
+  !
+  ! 2 1 1 1 1 0 : 1 0 0 0 0 0
+  !               0 1 1 1 1 0
+  !xorS           0 1 0 0 1 0 : 2
+  !xorD           0 1 0 0 0 0 : 1 
+  !xorSD          0 0 0 0 1 0 : 1
+  !                           -----
+  !                             4
+  ! 1 1 1 1 1 1 : 0 0 0 0 0 0
+  !               1 1 1 1 1 1
+  !               1 1 0 0 1 1 : 4
+  !               1 1 0 0 0 0 : 2 
+  !               0 0 0 0 1 1 : 2
+  !                           -----
+  !                             8
+  !
+
+  nconnectedI = 0
+  ntotalconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = givenI(1,1)
+  Idomo = givenI(1,2)
+  Nsomo_I = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  !end_index = N_configuration
+  !print *,"Start and End = ",idxI, end_index
+
+
+  p = 0
+  q = 0
+  do i=idxI,end_index
+     !if(.True.) then
+     !  nconnectedI += 1
+     !  connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+     !  idxs_connectedI(nconnectedI)=i
+     !  cycle
+     !endif
+     Isomo = givenI(1,1)
+     Idomo = givenI(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+       !-------
+       ! MONO |
+       !-------
+        nconnectedI += 1
+        connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+        idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then
+       !----------------------------
+       ! DOMO -> VMO + DOMO -> VMO |
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then
+       !----------------------------
+       ! DOUBLE
+       !----------------------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then
+       !-----------------
+       ! DOUBLE
+       !-----------------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)=i
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1))
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+       !--------
+       ! I = I |
+       !--------
+       nconnectedI += 1
+       connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+       idxs_connectedI(nconnectedI)= i
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+        ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes)
+     endif
+  end do
+
+end subroutine obtain_connected_J_givenI
+
+subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! Ialpha by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer(bit_kind),intent(in)             :: Ialpha(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: excitationIds(2,*)
+  integer,intent(out)                      :: excitationTypes(*)
+  real*8 ,intent(out)                      :: diagfactors(*)
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                        :: nxordiffSOMODOMO
+  integer                        :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num)
+  integer                        :: end_index
+  integer                        :: Nsomo_alpha
+  integer*8                      :: MS
+  MS = elec_alpha_num-elec_beta_num
+
+  nconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Nsomo_alpha = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  end_index = N_configuration
+
+
+  p = 0
+  q = 0
+  if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1'
+  do i=idxI,end_index
+     Isomo = Ialpha(1,1)
+     Idomo = Ialpha(1,2)
+     Jsomo = psi_configuration(1,1,i)
+     Jdomo = psi_configuration(1,2,i)
+     ! Check for Minimal alpha electrons (MS)
+     if(POPCNT(Isomo).lt.MS)then
+       cycle
+     endif
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     !if(idxI.eq.1)then
+     !  print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo)
+     !endif
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        select case(ndiffDOMO)
+        case (0)
+           ! SOMO -> VMO
+           !print *,"obt SOMO -> VMO"
+           extyp = 3
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case (1)
+           ! DOMO -> VMO
+           ! or
+           ! SOMO -> SOMO
+           nsomoJ = POPCNT(Jsomo)
+           nsomoalpha = POPCNT(Isomo)
+           if(nsomoJ .GT. nsomoalpha) then
+              ! DOMO -> VMO
+              !print *,"obt DOMO -> VMO"
+              extyp = 2
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+           else
+              ! SOMO -> SOMO
+              !print *,"obt SOMO -> SOMO"
+              extyp = 1
+!IRP_IF WITHOUT_TRAILZ
+!              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+!IRP_ELSE
+              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+!IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,q-1)
+!IRP_IF WITHOUT_TRAILZ
+!              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+!IRP_ELSE
+              p = TRAILZ(Isomo) + 1
+!IRP_ENDIF
+              ! Check for Minimal alpha electrons (MS)
+              !if(POPCNT(Isomo).lt.MS)then
+              !  cycle
+              !endif
+           end if
+        case (2)
+           ! DOMO -> SOMO
+           !print *,"obt DOMO -> SOMO"
+           extyp = 4
+           IJsomo = IEOR(Isomo, Jsomo)
+!IRP_IF WITHOUT_TRAILZ
+!           p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1
+!IRP_ELSE
+           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+!IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+!IRP_IF WITHOUT_TRAILZ
+!           q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1
+!IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+!IRP_ENDIF
+        case default
+           print *,"something went wront in get connectedI"
+        end select
+        starti = psi_config_data(i,1)
+        endi   = psi_config_data(i,2)
+        nconnectedI += 1
+        do k=1,N_int
+          connectedI(k,1,nconnectedI) = psi_configuration(k,1,i)
+          connectedI(k,2,nconnectedI) = psi_configuration(k,2,i)
+        enddo
+        idxs_connectedI(nconnectedI)=starti
+        excitationIds(1,nconnectedI)=p
+        excitationIds(2,nconnectedI)=q
+        excitationTypes(nconnectedI) = extyp
+        diagfactors(nconnectedI) = 1.0d0
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = psi_configuration(1,1,i)
+        Idomo = psi_configuration(1,2,i)
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do iii = 1,n_act_orb
+          ii = list_act(iii)
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+
+        do k=1,nholes
+           p = listholes(k)
+           q = p
+           extyp = 1
+           if(holetype(k) .EQ. 1) then
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 1.0d0
+           else
+              starti = psi_config_data(i,1)
+              endi   = psi_config_data(i,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = psi_configuration(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 2.0d0
+           endif
+        enddo
+     endif
+  end do
+
+end subroutine obtain_connected_I_foralpha
diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f
index 85ed5f84..833fa7b0 100644
--- a/src/csf/sigma_vector.irp.f
+++ b/src/csf/sigma_vector.irp.f
@@ -1,10 +1,13 @@
  real*8 function logabsgamma(x)
  implicit none
  real*8, intent(in) :: x
- logabsgamma = log(abs(gamma(x)))
+ logabsgamma = 1.d32  ! Avoid floating point exception
+ if (x>0.d0) then
+   logabsgamma = log(abs(gamma(x)))
+ endif
  end function logabsgamma
-  
   BEGIN_PROVIDER [ integer, NSOMOMax]
+ &BEGIN_PROVIDER [ integer, NSOMOMin]
  &BEGIN_PROVIDER [ integer, NCSFMax]
  &BEGIN_PROVIDER [ integer*8, NMO]
  &BEGIN_PROVIDER [ integer, NBFMax]
@@ -16,11 +19,19 @@
   ! The maximum number of SOMOs for the current calculation.
   ! required for the calculation of prototype arrays.
   END_DOC
+  integer MS, ialpha
+  MS = elec_alpha_num-elec_beta_num
   NSOMOMax = min(elec_num, cfg_nsomo_max + 2)
+  if(AND(cfg_nsomo_min , 1) .eq. 0)then
+    NSOMOMin = max(0,cfg_nsomo_min-2)
+  else
+    NSOMOMin = max(1,cfg_nsomo_min-2)
+  endif
   ! Note that here we need NSOMOMax + 2 sizes
-  NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0
+  ialpha = (NSOMOMax + MS)/2
+  NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK)
   NBFMax = NCSFMax
-  maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2))))
+  maxDetDimPerBF = max(1,nint((binom(NSOMOMax,ialpha))))
   NMO = n_act_orb
   integer i,j,k,l
   integer startdet,enddet
@@ -29,63 +40,113 @@
   integer dimcsfpercfg
   integer detDimperBF
   real*8 :: coeff, binom1, binom2
-  integer MS
   integer ncfgpersomo
   real*8, external :: logabsgamma
   detDimperBF = 0
-  MS = elec_alpha_num-elec_beta_num
   ! number of cfgs = number of dets for 0 somos
+  n_CSF = cfg_seniority_index(NSOMOMin)-1
+  ncfgprev = cfg_seniority_index(NSOMOMin)
+  !do i = 0-iand(MS,1)+2, NSOMOMax,2
+  !!print *," i=",0," dimcsf=",1," ncfg=",ncfgprev, " senor=",cfg_seniority_index(0)
+  !!do i = NSOMOMin+2, NSOMOMax,2
+  !!   if(cfg_seniority_index(i) .EQ. -1)then
+  !!      ncfgpersomo = N_configuration + 1
+  !!   else
+  !!      ncfgpersomo = cfg_seniority_index(i)
+  !!   endif
+  !!ncfg = ncfgpersomo - ncfgprev
+  !!!detDimperBF = max(1,nint((binom(i,(i+1)/2))))
+  !!!dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1))))
+  !!n_CSF += ncfg * dimcsfpercfg
+  !!!if(cfg_seniority_index(i+2) == -1) EXIT
+  !!!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF
+  !!ncfgprev = cfg_seniority_index(i)
+  !!print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i)
+  !!enddo
+  !!print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration
   n_CSF = 0
-  ncfgprev = cfg_seniority_index(0)
-  ncfgpersomo = ncfgprev
-  do i = iand(MS,1), NSOMOMax-2,2
-    if(cfg_seniority_index(i) .EQ. -1) then
-      cycle
-    endif
-    if(cfg_seniority_index(i+2) .EQ. -1) then
-      ncfgpersomo = N_configuration + 1
+  !ncfgprev = cfg_seniority_index(0)
+  !ncfgpersomo = ncfgprev
+  !do i = iand(MS,1), NSOMOMax-2,2
+  !  if(cfg_seniority_index(i) .EQ. -1) then
+  !    cycle
+  !  endif
+  !  if(cfg_seniority_index(i+2) .EQ. -1) then
+  !    ncfgpersomo = N_configuration + 1
+  !  else
+  !    if(cfg_seniority_index(i+2) > ncfgpersomo) then
+  !        ncfgpersomo = cfg_seniority_index(i+2)
+  !    else
+  !      k = 0
+  !      do while(cfg_seniority_index(i+2+k) < ncfgpersomo)
+  !        k = k + 2
+  !        ncfgpersomo = cfg_seniority_index(i+2+k)
+  !      enddo
+  !    endif
+  !  endif
+  !  ncfg = ncfgpersomo - ncfgprev
+  !  if(i .EQ. 0 .OR. i .EQ. 1) then
+  !    dimcsfpercfg = 1
+  !  elseif( i .EQ. 3) then
+  !    dimcsfpercfg = 2
+  !  else
+  !    if(iand(MS,1) .EQ. 0) then
+  !      dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1))))
+  !    else
+  !      dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2))))
+  !    endif
+  !  endif
+  !  n_CSF += ncfg * dimcsfpercfg
+  !  print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i)
+  !  if(cfg_seniority_index(i+2) > ncfgprev) then
+  !    ncfgprev = cfg_seniority_index(i+2)
+  !  else
+  !    k = 0
+  !    do while(cfg_seniority_index(i+2+k) < ncfgprev)
+  !      k = k + 2
+  !      ncfgprev = cfg_seniority_index(i+2+k)
+  !    enddo
+  !  endif
+  !enddo
+  n_CSF = 0
+  !print *," -9(((((((((((((( NSOMOMin=",NSOMOMin
+  ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 
+  if(ncfgprev.eq.-1)then
+    ncfgprev=1
+  endif
+  do i=NSOMOMin,NSOMOMax+2,2
+    !k=0
+    !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax))
+    !  k=k+2
+    !end do
+    if(cfg_seniority_index(i).eq.-1)cycle
+    if(cfg_seniority_index(i+2).eq.-1)then
+      ncfg = N_configuration - ncfgprev + 1
+      if(ncfg .eq. 0)then
+        ncfg=1
+      endif
     else
-      if(cfg_seniority_index(i+2) > ncfgpersomo) then
-          ncfgpersomo = cfg_seniority_index(i+2)
+      ncfg = cfg_seniority_index(i+2) - ncfgprev
+    endif
+    if(i .EQ. 0 .OR. i .EQ. 1) then
+      dimcsfpercfg = 1
+    elseif( i .EQ. 3) then
+      dimcsfpercfg = 2
+    else
+      if(iand(MS,1) .EQ. 0) then
+        ialpha = (i + MS)/2
+        dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1))))
       else
-        k = 0
-        do while(cfg_seniority_index(i+2+k) < ncfgpersomo)
-          k = k + 2
-          ncfgpersomo = cfg_seniority_index(i+2+k)
-        enddo
+        ialpha = (i + MS)/2
+        dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1))))
       endif
     endif
-    ncfg = ncfgpersomo - ncfgprev
-    if(iand(MS,1) .EQ. 0) then
-      !dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1))))
-      binom1 = dexp(logabsgamma(1.0d0*(i+1))                            &
-                  - logabsgamma(1.0d0*((i/2)+1))                        &
-                  - logabsgamma(1.0d0*(i-((i/2))+1)));
-      binom2 = dexp(logabsgamma(1.0d0*(i+1))                            &
-                  - logabsgamma(1.0d0*(((i/2)+1)+1))                    &
-                  - logabsgamma(1.0d0*(i-((i/2)+1)+1)));
-      dimcsfpercfg = max(1,nint(binom1 - binom2))
-    else
-      !dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2))))
-      binom1 = dexp(logabsgamma(1.0d0*(i+1))                            &
-                  - logabsgamma(1.0d0*(((i+1)/2)+1))                    &
-                  - logabsgamma(1.0d0*(i-(((i+1)/2))+1)));
-      binom2 = dexp(logabsgamma(1.0d0*(i+1))                            &
-                  - logabsgamma(1.0d0*((((i+3)/2)+1)+1))                &
-                  - logabsgamma(1.0d0*(i-(((i+3)/2)+1)+1)));
-      dimcsfpercfg = max(1,nint(binom1 - binom2))
-    endif
-    n_CSF += ncfg * dimcsfpercfg
-    if(cfg_seniority_index(i+2) > ncfgprev) then
-      ncfgprev = cfg_seniority_index(i+2)
-    else
-      k = 0
-      do while(cfg_seniority_index(i+2+k) < ncfgprev)
-        k = k + 2
-        ncfgprev = cfg_seniority_index(i+2+k)
-      enddo
-    endif
-  enddo
+    n_CSF += ncfg*dimcsfpercfg
+    !print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i)
+    ncfgprev = cfg_seniority_index(i+2)
+  end do
+  !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration
+
 END_PROVIDER
 
 
@@ -106,7 +167,7 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout)
   real*8,intent(out)             :: phaseout
   integer(bit_kind)              :: mask, deta(N_int), detb(N_int)
   integer                        :: nbetas
-  integer                        :: k
+  integer                        :: count, k
 
   ! Initialize deta and detb
   deta = Ialpha
@@ -146,7 +207,117 @@ end subroutine get_phase_qp_to_cfg
 
 
 
-  BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)]
+  BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)]
+ &BEGIN_PROVIDER [ real*8, psi_coef_config,  (n_CSF,1)]
+ &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)]
+ &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)]
+  use cfunctions
+  implicit none
+  BEGIN_DOC
+  ! Documentation for DetToCSFTransformationMatrix
+  ! Provides the matrix of transformatons for the
+  ! conversion between determinant to CSF basis (in BFs)
+  END_DOC
+  integer*8 :: Isomo, Idomo
+  integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int)
+  integer   :: rows, cols, i, j, k
+  integer   :: startdet, enddet, idx
+  integer*8 MS, salpha
+  integer ndetI
+  integer :: getNSOMO
+  real*8,dimension(:,:),allocatable    :: tempBuffer
+  real*8,dimension(:),allocatable    :: tempCoeff
+  real*8  :: norm_det1, phasedet
+
+  integer :: nt
+
+
+  norm_det1 = 0.d0
+  MS = elec_alpha_num - elec_beta_num
+  ! initialization
+  psi_coef_config = 0.d0
+  DetToCSFTransformationMatrix(0,:,:) = 1.d0
+  do i = 2-iand(MS,1_8), NSOMOMax,2
+    Isomo = IBSET(0_8, i) - 1_8
+    ! rows = Ncsfs
+    ! cols = Ndets
+    salpha = (i+MS)/2
+    bfIcfg = max(1,nint((binom(i,salpha)-binom(i,salpha+1))))
+    ndetI =  max(1,nint((binom(i,salpha))))
+    !bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1))))
+    !ndetI =  max(1,nint((binom(i,(i+1)/2))))
+
+    allocate(tempBuffer(bfIcfg,ndetI))
+    call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer)
+    DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) =  tempBuffer(1:bfIcfg,1:ndetI)
+    deallocate(tempBuffer)
+  enddo
+
+  integer s, bfIcfg
+  integer countcsf
+  countcsf = 0
+  integer countdet
+  countdet = 0
+  integer istate
+  istate = 1
+  psi_csf_to_config_data(1) = 1
+  phasedet = 1.0d0
+  call omp_set_max_active_levels(1)
+  !$OMP PARALLEL
+  !$OMP MASTER
+  do i = 1,N_configuration
+      startdet = psi_configuration_to_psi_det(1,i)
+      enddet = psi_configuration_to_psi_det(2,i)
+      ndetI = enddet-startdet+1
+
+      allocate(tempCoeff(ndetI))
+      countdet = 1
+      do j = startdet, enddet
+         idx = psi_configuration_to_psi_det_data(j)
+         Ialpha(:) = psi_det(:,1,idx)
+         Ibeta(:)  = psi_det(:,2,idx)
+         call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet)
+         tempCoeff(countdet) = psi_coef(idx, istate)*phasedet
+         norm_det1 += tempCoeff(countdet)*tempCoeff(countdet)
+         countdet += 1
+      enddo
+
+       !print *,"dimcoef=",bfIcfg,norm_det1
+       !call printMatrix(tempCoeff,ndetI,1)
+
+      s = 0
+      do k=1,N_int
+        if (psi_configuration(k,1,i) == 0_bit_kind) cycle
+        s = s + popcnt(psi_configuration(k,1,i))
+      enddo
+      salpha = (s+MS)/2
+      bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1))))
+      !bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
+
+      ! perhaps blocking with CFGs of same seniority
+      ! can be more efficient
+      allocate(tempBuffer(bfIcfg,ndetI))
+      tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI)
+
+       call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1))
+       !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1)
+
+      deallocate(tempCoeff)
+      deallocate(tempBuffer)
+      psi_config_data(i,1) = countcsf + 1
+      do k=1,bfIcfg
+        psi_csf_to_config_data(countcsf+k) = i
+      enddo
+      countcsf += bfIcfg
+      psi_config_data(i,2) = countcsf
+  enddo
+  !$OMP END MASTER
+  !$OMP END PARALLEL
+  call omp_set_max_active_levels(4)
+
+  END_PROVIDER
+
+  BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)]
  &BEGIN_PROVIDER [ integer, rowsmax]
  &BEGIN_PROVIDER [ integer, colsmax]
   use cfunctions
@@ -165,14 +336,18 @@ end subroutine get_phase_qp_to_cfg
   cols = -1
   integer*8 MS
   MS = elec_alpha_num-elec_beta_num
-  integer nsomomin
   nsomomin = elec_alpha_num-elec_beta_num
   rowsmax = 0
   colsmax = 0
+  !print *,"NSOMOMax = ",NSOMOMax
+  !print *,"NSOMOMin = ",NSOMOMin
   !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2))
   ! Type
   ! 1. SOMO -> SOMO
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  !print *,"Doing SOMO->SOMO"
+  AIJpqMatrixDimsList(NSOMOMin,1,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,1,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i-2,i-2, 2
         Jsomo = ISHFT(1_8,j)-1
@@ -199,6 +374,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, "SOMO->SOMO \t",i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -206,15 +382,18 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows
-              AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols
+              AIJpqMatrixDimsList(nsomoi,1,k,l,1) = rows
+              AIJpqMatrixDimsList(nsomoi,1,k,l,2) = cols
            end do
         end do
      end do
   end do
   ! Type
   ! 2. DOMO -> VMO
-  do i = 0+iand(nsomomin,1), NSOMOMax, 2
+  !print *,"Doing DOMO->VMO"
+  AIJpqMatrixDimsList(NSOMOMin,2,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,2,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      tmpsomo = ISHFT(1_8,i+2)-1
      do j = i+2,i+2, 2
@@ -247,6 +426,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -254,8 +434,8 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows
-              AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols
+              AIJpqMatrixDimsList(nsomoi,2,k,l,1) = rows
+              AIJpqMatrixDimsList(nsomoi,2,k,l,2) = cols
            end do
         end do
      end do
@@ -263,15 +443,17 @@ end subroutine get_phase_qp_to_cfg
   ! Type
   ! 3. SOMO -> VMO
   !print *,"Doing SOMO->VMO"
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  AIJpqMatrixDimsList(NSOMOMin,3,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,3,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,j)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) then
            cycle
         end if
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,l-1)
@@ -286,6 +468,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -293,25 +476,27 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(i,j,3,k,l,1) = rows
-              AIJpqMatrixDimsList(i,j,3,k,l,2) = cols
+              AIJpqMatrixDimsList(i,3,k,l,1) = rows
+              AIJpqMatrixDimsList(i,3,k,l,2) = cols
            end do
         end do
      end do
   end do
   ! Type
-  ! 4. DOMO -> VMO
+  ! 4. DOMO -> SOMO
   !print *,"Doing DOMO->SOMO"
-  do i = 2-iand(nsomomin,1), NSOMOMax, 2
+  AIJpqMatrixDimsList(NSOMOMin,4,1,1,1) = 1
+  AIJpqMatrixDimsList(NSOMOMin,4,1,1,2) = 1
+  do i = NSOMOMin, NSOMOMax, 2
      do j = i,i, 2
         if(j .GT. NSOMOMax .OR. j .LE. 0) then
            cycle
         end if
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
-                 Isomo = IBCLR(Isomo,k+1-1)
+                 Isomo = IBCLR(Isomo,k-1)
                  Jsomo = ISHFT(1_8,j+1)-1
                  Jsomo = IBCLR(Jsomo,l-1)
               else
@@ -323,6 +508,7 @@ end subroutine get_phase_qp_to_cfg
                    MS,                       &
                    rows,                     &
                    cols)
+              !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols
               if(rowsmax .LT. rows) then
                  rowsmax = rows
               end if
@@ -330,15 +516,16 @@ end subroutine get_phase_qp_to_cfg
                  colsmax = cols
               end if
               ! i -> j
-              AIJpqMatrixDimsList(i,j,4,k,l,1) = rows
-              AIJpqMatrixDimsList(i,j,4,k,l,2) = cols
+              AIJpqMatrixDimsList(i,4,k,l,1) = rows
+              AIJpqMatrixDimsList(i,4,k,l,2) = cols
            end do
         end do
      end do
   end do
+  !print *,"Rowsmax=",rowsmax," Colsmax=",colsmax
   END_PROVIDER
 
-  BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)]
+  BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)]
   use cfunctions
   implicit none
   BEGIN_DOC
@@ -368,70 +555,79 @@ end subroutine get_phase_qp_to_cfg
   rows = -1
   cols = -1
   integer*8 MS
-  MS = 0
-  touch AIJpqMatrixDimsList
+  MS = elec_alpha_num-elec_beta_num
   real*8,dimension(:,:),allocatable :: meMatrix
   integer maxdim
-  !maxdim = max(rowsmax,colsmax)
-  ! allocate matrix
-  !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2))
+
   ! Type
   ! 1. SOMO -> SOMO
-  do i = 2, NSOMOMax, 2
+  AIJpqContainer = 0.d0
+  AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0
+  integer :: rows_old, cols_old
+  rows_old = -1
+  cols_old = -1
+  allocate(meMatrix(1,1))
+  do i = NSOMOMin+2, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
-     do j = i-2,i-2, 2
-        if(j .GT. NSOMOMax .OR. j .LT. 0) cycle
-        do k = 1,i
-           do l = 1,i
+     j=i-2
+     if(j .GT. NSOMOMax .OR. j .LT. 0) cycle
+     nsomoi = i
+     do k = 1,i
+        orbp = k
+        do l = 1,i
 
-              ! Define Jsomo
-              if(k .NE. l) then
-                 Jsomo = IBCLR(Isomo, k-1)
-                 Jsomo = IBCLR(Jsomo, l-1)
-                 nsomoi = i
-                 nsomoj = j
-              else
-                 Isomo = ISHFT(1_8,i)-1
-                 Jsomo = ISHFT(1_8,i)-1
-                 nsomoi = i
-                 nsomoj = i
-              endif
+           ! Define Jsomo
+           if(k .NE. l) then
+              Jsomo = IBCLR(Isomo, k-1)
+              Jsomo = IBCLR(Jsomo, l-1)
+              nsomoj = j
+           else
+              Isomo = ISHFT(1_8,i)-1
+              Jsomo = ISHFT(1_8,i)-1
+              nsomoj = i
+           endif
 
-              AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0
-              call getApqIJMatrixDims(Isomo,           &
-                   Jsomo, &
-                   MS,                       &
-                   rows,                     &
-                   cols)
+           call getApqIJMatrixDims(Isomo,           &
+                Jsomo, &
+                MS,                       &
+                rows,                     &
+                cols)
 
-              orbp = k
-              orbq = l
-              allocate(meMatrix(rows,cols))
-              meMatrix = 0.0d0
-              ! fill matrix
-              call getApqIJMatrixDriver(Isomo,           &
-                   Jsomo, &
-                   orbp,                     &
-                   orbq,                     &
-                   MS,                       &
-                   NMO,                      &
-                   meMatrix,                 &
-                   rows,                     &
-                   cols)
-              ! i -> j
-             do ri = 1,rows
-                 do ci = 1,cols
-                    AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci)
-                 end do
+           orbq = l
+           if ((rows /= rows_old).or.(cols /= cols_old)) then
+             deallocate(meMatrix)
+             allocate(meMatrix(rows,cols))
+             rows_old = rows
+             cols_old = cols
+           endif
+           meMatrix = 0.0d0
+           ! fill matrix
+           call getApqIJMatrixDriver(Isomo,           &
+                Jsomo, &
+                orbp,                     &
+                orbq,                     &
+                MS,                       &
+                NMO,                      &
+                meMatrix,                 &
+                rows,                     &
+                cols)
+           ! i -> j
+          do ri = 1,rows
+              do ci = 1,cols
+                 AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci)
               end do
-              deallocate(meMatrix)
            end do
         end do
      end do
   end do
+  deallocate(meMatrix)
+
   ! Type
   ! 2. DOMO -> VMO
-  do i = 0, NSOMOMax, 2
+  !print *,"Doing DOMO -> VMO"
+  !AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      tmpsomo = ISHFT(1_8,i+2)-1
      do j = i+2,i+2, 2
@@ -456,7 +652,12 @@ end subroutine get_phase_qp_to_cfg
                  nsomoj = j
               endif
 
-              AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -477,10 +678,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
+             !call printMatrix(meMatrix,rows,cols)
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -490,13 +694,16 @@ end subroutine get_phase_qp_to_cfg
   end do
   ! Type
   ! 3. SOMO -> VMO
-  do i = 2, NSOMOMax, 2
+  !print *,"Doing SOMO -> VMO"
+  !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1:2,1:2,3,NSOMOMin) = 1.0d0
+  do i = NSOMOMin, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,j)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) cycle
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,l-1)
@@ -507,7 +714,12 @@ end subroutine get_phase_qp_to_cfg
                  Jsomo = ISHFT(1_8,j)-1
               endif
 
-              AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(i,3,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,3,i) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -528,10 +740,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !call printMatrix(meMatrix,rows,cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -541,24 +756,35 @@ end subroutine get_phase_qp_to_cfg
   end do
   ! Type
   ! 4. DOMO -> SOMO
-  do i = 2, NSOMOMax, 2
+  !print *,"Doing DOMO -> SOMO"
+  !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0
+  AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0
+  AIJpqContainer(1,1,2,2,4,NSOMOMin) = 1.0d0
+  AIJpqContainer(1,1,2,1,4,NSOMOMin) =-1.0d0
+  AIJpqContainer(1,1,1,2,4,NSOMOMin) =-1.0d0
+  do i = NSOMOMin+2, NSOMOMax, 2
      Isomo = ISHFT(1_8,i)-1
      do j = i,i, 2
         Jsomo = ISHFT(1_8,i)-1
         if(j .GT. NSOMOMax .OR. j .LE. 0) cycle
-        do k = 1,i
-           do l = 1,i
+        do k = 1,i+1
+           do l = 1,i+1
               if(k .NE. l) then
                  Isomo = ISHFT(1_8,i+1)-1
                  Isomo = IBCLR(Isomo,k-1)
                  Jsomo = ISHFT(1_8,j+1)-1
-                 Jsomo = IBCLR(Jsomo,l+1-1)
+                 Jsomo = IBCLR(Jsomo,l-1)
               else
                  Isomo = ISHFT(1_8,i)-1
                  Jsomo = ISHFT(1_8,j)-1
               endif
 
-              AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0
+              !print *,"k,l=",k,l
+              !call debug_spindet(Jsomo,1)
+              !call debug_spindet(Isomo,1)
+
+              !AIJpqContainer(i,4,k,l,:,:) = 0.0d0
+              AIJpqContainer(:,:,k,l,4,i) = 0.0d0
               call getApqIJMatrixDims(Isomo,           &
                    Jsomo, &
                    MS,                       &
@@ -580,10 +806,13 @@ end subroutine get_phase_qp_to_cfg
                    meMatrix,                 &
                    rows,                     &
                    cols)
+             !call printMatrix(meMatrix,rows,cols)
+             !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax
               ! i -> j
              do ri = 1,rows
                  do ci = 1,cols
-                    AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci)
+                    !AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci)
+                    AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci)
                  end do
               end do
               deallocate(meMatrix)
@@ -593,93 +822,1215 @@ end subroutine get_phase_qp_to_cfg
   end do
   END_PROVIDER
 
-
-!!!!!!
-
-  BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)]
- &BEGIN_PROVIDER [ real*8, psi_coef_config,  (n_CSF)]
- &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)]
-  use cfunctions
+subroutine calculate_preconditioner_cfg(diag_energies)
+  implicit none
   use bitmasks
+  BEGIN_DOC
+  ! Documentation for calculate_preconditioner
+  !
+  ! Calculates the diagonal energies of
+  ! the configurations in psi_configuration
+  ! returns : diag_energies :
+  END_DOC
+  integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj
+  real*8,intent(out) :: diag_energies(n_CSF)
+  integer                            :: nholes
+  integer                            :: nvmos
+  integer                            :: listvmos(mo_num)
+  integer                            :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                            :: listholes(mo_num)
+  integer                            :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer*8                          :: Idomo
+  integer*8                          :: Isomo
+  integer*8                          :: Jdomo
+  integer*8                          :: Jsomo
+  integer*8                          :: diffSOMO
+  integer*8                          :: diffDOMO
+  integer                            :: NSOMOI
+  integer                            :: NSOMOJ
+  integer                            :: ndiffSOMO
+  integer                            :: ndiffDOMO
+  integer                            :: starti, endi, cnti, cntj, rows,cols
+  integer                            :: extype,pmodel,qmodel
+  integer(bit_kind) :: Icfg(N_INT,2)
+  integer(bit_kind) :: Jcfg(N_INT,2)
+  integer,external  :: getNSOMO
+  real*8, external  :: mo_two_e_integral
+  real*8            :: hpp
+  real*8            :: meCC
+  real*8            :: ecore
+  real*8            :: core_act_contrib
+
+  !PROVIDE h_core_ri
+  PROVIDE core_fock_operator
+  PROVIDE h_act_ri
+  ! initialize energies
+  diag_energies = 0.d0
+  !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num
+
+  ! calculate core energy
+  !call get_core_energy(ecore)
+  diag_energies = core_energy - nuclear_repulsion
+
+  ! calculate the core energy
+  !print *,"Core 2energy=",ref_bitmask_energy
+
+  do i=1,N_configuration
+
+     Isomo = psi_configuration(1,1,i)
+     Idomo = psi_configuration(1,2,i)
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     NSOMOI = getNSOMO(psi_configuration(:,:,i))
+
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+
+     core_act_contrib = 0.0d0
+
+     ! find out all pq holes possible
+     nholes = 0
+     ! holes in SOMO
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+           nholes += 1
+           listholes(nholes) = k
+           holetype(nholes) = 1
+        endif
+     enddo
+     ! holes in DOMO
+     !do k = n_core_orb+1,n_core_orb + n_act_orb
+     !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+           nholes += 1
+           listholes(nholes) = k
+           holetype(nholes) = 2
+        endif
+     enddo
+
+     ! find vmos
+     listvmos = -1
+     vmotype = -1
+     nvmos = 0
+     !do k = n_core_orb+1,n_core_orb + n_act_orb
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+        !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+        if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+           nvmos += 1
+           listvmos(nvmos) = k
+           vmotype(nvmos) = 0
+        else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+           nvmos += 1
+           listvmos(nvmos) = k
+           vmotype(nvmos) = 1
+        end if
+     enddo
+     !print *,"I=",i
+     !call debug_spindet(psi_configuration(1,1,i),N_int)
+     !call debug_spindet(psi_configuration(1,2,i),N_int)
+
+     do k=1,nholes
+        p = listholes(k)
+        noccp = holetype(k)
+
+
+        ! core-virtual
+        do l = 1, n_core_orb
+         jj = list_core(l)
+         core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p))
+        enddo
+
+        ! Calculate one-electron
+        ! and two-electron coulomb terms
+        do l=1,nholes
+           q = listholes(l)
+           noccq = holetype(l)
+           !print *,"--------------- K=",p," L=",q
+
+           ! one-electron term
+           if(p.EQ.q) then
+              hpp = noccq * h_act_ri(p,q)!mo_one_e_integrals(q,q)
+           else
+              hpp = 0.d0
+           endif
+
+
+           do j=starti,endi
+              ! coulomb term
+              ! (pp,qq) = <pq|pq>
+              if(p.EQ.q) then
+                 diag_energies(j) += hpp !+ 0.5d0 * (noccp * noccq * mo_two_e_integral(p,q,p,q))
+                 !print *,"hpp=",hpp,"diga= ",diag_energies(j)
+!             else
+!                diag_energies(j) +=     !  0.5d0 * noccp * noccq * mo_two_e_integral(p,q,p,q)
+!                print *,"diga= ",diag_energies(j)
+              endif
+           enddo
+        enddo
+
+     enddo
+     !print *,"I=",i," core_act=",core_act_contrib
+     do j=starti,endi
+       diag_energies(j) += core_act_contrib
+     end do
+  enddo
+
+end subroutine calculate_preconditioner_cfg
+
+subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for obtain_connected_I_foralpha
+  ! This function returns all those selected configurations
+  ! which are connected to the input configuration
+  ! Ialpha by a single excitation.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
+  !
+  ! Order of operators
+  ! \alpha> = a^\dag_p a_q |I> = E_pq |I>
+  END_DOC
+  integer          ,intent(in)             :: idxI
+  integer          ,intent(in)             :: nconnectedJ
+  integer(bit_kind),intent(in)             :: listconnectedJ(N_int,2,*)
+  integer(bit_kind),intent(in)             :: Ialpha(N_int,2)
+  integer(bit_kind),intent(out)            :: connectedI(N_int,2,*)
+  integer          ,intent(in)             :: idslistconnectedJ(*)
+  integer          ,intent(out)            :: idxs_connectedI(*)
+  integer,intent(out)                      :: nconnectedI
+  integer,intent(out)                      :: excitationIds(2,*)
+  integer,intent(out)                      :: excitationTypes(*)
+  real*8 ,intent(out)                      :: diagfactors(*)
+  integer*8                                :: Idomo
+  integer*8                                :: Isomo
+  integer*8                                :: Jdomo
+  integer*8                                :: Jsomo
+  integer*8                                :: IJsomo
+  integer*8                                :: diffSOMO
+  integer*8                                :: diffDOMO
+  integer*8                                :: xordiffSOMODOMO
+  integer                                  :: ndiffSOMO
+  integer                                  :: ndiffDOMO
+  integer                                  :: nxordiffSOMODOMO
+  integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ
+  integer :: listholes(mo_num)
+  integer :: holetype(mo_num)
+  integer :: end_index
+  integer :: Nsomo_alpha
+  logical :: isOKlistJ
+
+  PROVIDE DetToCSFTransformationMatrix
+
+  isOKlistJ = .False.
+
+  nconnectedI = 0
+  end_index = N_configuration
+
+  ! Since CFGs are sorted wrt to seniority
+  ! we don't have to search the full CFG list
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Nsomo_alpha = POPCNT(Isomo)
+  end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1)
+  if(end_index .LT. 0) end_index= N_configuration
+  !end_index = N_configuration
+
+
+  p = 0
+  q = 0
+  do i=1,nconnectedJ
+     idxJ = idslistconnectedJ(i)
+     Isomo = Ialpha(1,1)
+     Idomo = Ialpha(1,2)
+     Jsomo = listconnectedJ(1,1,i)
+     Jdomo = listconnectedJ(1,2,i)
+     diffSOMO = IEOR(Isomo,Jsomo)
+     ndiffSOMO = POPCNT(diffSOMO)
+     diffDOMO = IEOR(Idomo,Jdomo)
+     xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO)
+     ndiffDOMO = POPCNT(diffDOMO)
+     nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO)
+     nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO 
+     if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then
+        select case(ndiffDOMO)
+        case (0)
+           ! SOMO -> VMO
+           !print *,"obt SOMO -> VMO"
+           extyp = 3
+           IJsomo = IEOR(Isomo, Jsomo)
+IRP_IF WITHOUT_TRAILZ
+           p = (popcnt(ieor( IAND(Isomo,IJsomo), IAND(Isomo,IJsomo)-1)) -1) + 1
+IRP_ELSE
+           p = TRAILZ(IAND(Isomo,IJsomo)) + 1
+IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+IRP_ENDIF
+        case (1)
+           ! DOMO -> VMO
+           ! or
+           ! SOMO -> SOMO
+           nsomoJ = POPCNT(Jsomo)
+           nsomoalpha = POPCNT(Isomo)
+           if(nsomoJ .GT. nsomoalpha) then
+              ! DOMO -> VMO
+              !print *,"obt DOMO -> VMO"
+              extyp = 2
+IRP_IF WITHOUT_TRAILZ
+              p = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+IRP_ELSE
+              p = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+              q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+IRP_ELSE
+              q = TRAILZ(Isomo) + 1
+IRP_ENDIF
+           else
+              ! SOMO -> SOMO
+              !print *,"obt SOMO -> SOMO"
+              extyp = 1
+IRP_IF WITHOUT_TRAILZ
+              q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1
+IRP_ELSE
+              q = TRAILZ(IEOR(Idomo,Jdomo)) + 1
+IRP_ENDIF
+              Isomo = IEOR(Isomo, Jsomo)
+              Isomo = IBCLR(Isomo,q-1)
+IRP_IF WITHOUT_TRAILZ
+              p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1
+IRP_ELSE
+              p = TRAILZ(Isomo) + 1
+IRP_ENDIF
+           end if
+        case (2)
+           ! DOMO -> SOMO
+           !print *,"obt DOMO -> SOMO"
+           extyp = 4
+           IJsomo = IEOR(Isomo, Jsomo)
+IRP_IF WITHOUT_TRAILZ
+           p = (popcnt(ieor(IAND(Jsomo,IJsomo) ,IAND(Jsomo,IJsomo) -1))-1) + 1
+IRP_ELSE
+           p = TRAILZ(IAND(Jsomo,IJsomo)) + 1
+IRP_ENDIF
+           IJsomo = IBCLR(IJsomo,p-1)
+IRP_IF WITHOUT_TRAILZ
+           q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1
+IRP_ELSE
+           q = TRAILZ(IJsomo) + 1
+IRP_ENDIF
+        case default
+           print *,"something went wront in get connectedI"
+        end select
+        starti = psi_config_data(idxJ,1)
+        endi   = psi_config_data(idxJ,2)
+        nconnectedI += 1
+        connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+        idxs_connectedI(nconnectedI)=starti
+        excitationIds(1,nconnectedI)=p
+        excitationIds(2,nconnectedI)=q
+        excitationTypes(nconnectedI) = extyp
+        diagfactors(nconnectedI) = 1.0d0
+     else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then
+        ! find out all pq holes possible
+        nholes = 0
+        ! holes in SOMO
+        Isomo = listconnectedJ(1,1,i)
+        Idomo = listconnectedJ(1,2,i)
+        do ii = 1,mo_num
+           if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 1
+           endif
+        end do
+        ! holes in DOMO
+        do ii = 1,mo_num
+           if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then
+              nholes += 1
+              listholes(nholes) = ii
+              holetype(nholes) = 2
+           endif
+        end do
+
+        do k=1,nholes
+           p = listholes(k)
+           q = p
+           extyp = 1
+           if(holetype(k) .EQ. 1) then
+              starti = psi_config_data(idxJ,1)
+              endi   = psi_config_data(idxJ,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 1.0d0
+           else
+              starti = psi_config_data(idxJ,1)
+              endi   = psi_config_data(idxJ,2)
+              nconnectedI += 1
+              connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i)
+              idxs_connectedI(nconnectedI)=starti
+              excitationIds(1,nconnectedI)=p
+              excitationIds(2,nconnectedI)=q
+              excitationTypes(nconnectedI) = extyp
+              diagfactors(nconnectedI) = 2.0d0
+           endif
+        enddo
+     endif
+  end do
+
+end subroutine obtain_connected_I_foralpha_fromfilterdlist
+
+
+subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel)
   implicit none
   BEGIN_DOC
-  ! Documentation for DetToCSFTransformationMatrix
-  ! Provides the matrix of transformatons for the
-  ! conversion between determinant to CSF basis (in BFs)
+  ! This function converts the orbital ids
+  ! in real space to those used in model space
+  ! in order to identify the matrices required
+  ! for the calculation of MEs.
+  !
+  ! The type of excitations are ordered as follows:
+  ! Type 1 - SOMO -> SOMO
+  ! Type 2 - DOMO -> VMO
+  ! Type 3 - SOMO -> VMO
+  ! Type 4 - DOMO -> SOMO
   END_DOC
-  integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int)
-  integer   :: rows, cols, i, j, k
-  integer   :: startdet, enddet
-  integer*8 MS, Isomo, Idomo
-  integer ndetI
-  integer :: getNSOMO
-  real*8,dimension(:,:),allocatable    :: tempBuffer
-  real*8,dimension(:),allocatable    :: tempCoeff
-  real*8  :: norm_det1, phasedet
-  norm_det1 = 0.d0
-  MS = elec_alpha_num - elec_beta_num
-  ! initialization
-  psi_coef_config = 0.d0
-  DetToCSFTransformationMatrix(0,:,:) = 1.d0
-  do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2
-    Isomo = IBSET(0_8, i) - 1_8
-    ! rows = Ncsfs
-    ! cols = Ndets
-    bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1))))
-    ndetI = max(1,nint((binom(i,(i+1)/2))))
+  integer(bit_kind),intent(in)   :: Ialpha(N_int,2)
+  integer(bit_kind),intent(in)   :: Jcfg(N_int,2)
+  integer,intent(in)             :: p,q
+  integer,intent(in)             :: extype
+  integer,intent(out)            :: pmodel,qmodel
+  !integer(bit_kind)              :: Isomo(N_int)
+  !integer(bit_kind)              :: Idomo(N_int)
+  !integer(bit_kind)              :: Jsomo(N_int)
+  !integer(bit_kind)              :: Jdomo(N_int)
+  integer*8                       :: Isomo
+  integer*8                       :: Idomo
+  integer*8                       :: Jsomo
+  integer*8                       :: Jdomo
+  integer*8                      :: mask
+  integer                        :: iint, ipos
+  !integer(bit_kind)              :: Isomotmp(N_int)
+  !integer(bit_kind)              :: Jsomotmp(N_int)
+  integer*8             :: Isomotmp
+  integer*8             :: Jsomotmp
+  integer                        :: pos0,pos0prev
 
-    allocate(tempBuffer(bfIcfg,ndetI))
-    call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer)
-    DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) =  tempBuffer(1:bfIcfg,1:ndetI)
-    deallocate(tempBuffer)
+  ! TODO Flag (print) when model space indices is > 64
+  Isomo = Ialpha(1,1)
+  Idomo = Ialpha(1,2)
+  Jsomo = Jcfg(1,1)
+  Jdomo = Jcfg(1,2)
+  pos0prev = 0
+  pmodel = p
+  qmodel = q
+
+  if(p .EQ. q) then
+     pmodel = 1
+     qmodel = 1
+  else
+     select case(extype)
+       case (1)
+          ! SOMO -> SOMO
+          ! remove all domos
+          !print *,"type -> SOMO -> SOMO"
+          mask = ISHFT(1_8,p) - 1
+          Isomotmp = IAND(Isomo,mask)
+          pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+          mask = ISHFT(1_8,q) - 1
+          Isomotmp = IAND(Isomo,mask)
+          qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask))
+       case (2)
+          ! DOMO -> VMO
+          ! remove all domos except one at p
+          !print *,"type -> DOMO -> VMO"
+          mask = ISHFT(1_8,p) - 1
+          Jsomotmp = IAND(Jsomo,mask)
+          pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+          mask = ISHFT(1_8,q) - 1
+          Jsomotmp = IAND(Jsomo,mask)
+          qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask))
+       case (3)
+          ! SOMO -> VMO
+          !print *,"type -> SOMO -> VMO"
+          !Isomo = IEOR(Isomo,Jsomo)
+          if(p.LT.q) then
+             mask = ISHFT(1_8,p) - 1
+             Isomo = IAND(Isomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+             mask = ISHFT(1_8,q) - 1
+             Jsomo = IAND(Jsomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+          else
+             mask = ISHFT(1_8,p) - 1
+             Isomo = IAND(Isomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+             mask = ISHFT(1_8,q) - 1
+             Jsomo = IAND(Jsomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+          endif
+       case (4)
+          ! DOMO -> SOMO
+          ! remove all domos except one at p
+          !print *,"type -> DOMO -> SOMO"
+          !Isomo = IEOR(Isomo,Jsomo)
+          if(p.LT.q) then
+             mask = ISHFT(1_8,p) - 1
+             Jsomo = IAND(Jsomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask))
+             mask = ISHFT(1_8,q) - 1
+             Isomo = IAND(Isomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1
+          else
+             mask = ISHFT(1_8,p) - 1
+             Jsomo = IAND(Jsomo,mask)
+             pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1
+             mask = ISHFT(1_8,q) - 1
+             Isomo = IAND(Isomo,mask)
+             qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask))
+          endif
+       case default
+          print *,"something is wrong in convertOrbIdsToModelSpaceIds"
+     end select
+  endif
+  !print *,p,q,"model ids=",pmodel,qmodel
+end subroutine convertOrbIdsToModelSpaceIds
+
+subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep)
+  implicit none
+  use bitmasks
+  use omp_lib
+  BEGIN_DOC
+  ! Documentation for sigma-vector calculation
+  !
+  ! Calculates the result of the
+  ! application of the hamiltonian to the
+  ! wavefunction in CFG basis once
+  ! TODO : Things prepare outside this routine
+  !  1. Touch the providers for
+  !     a. ApqIJ containers
+  !     b. DET to CSF transformation matrices
+  !  2. DET to CSF transcormation
+  !  2. CSF to DET back transcormation
+  ! returns : psi_coef_out_det :
+  END_DOC
+  integer,intent(in)             :: sze, istart,iend, istep, ishift, n_st
+  real*8,intent(in)              :: psi_in(n_st,sze)
+  real*8,intent(out)             :: psi_out(n_st,sze)
+  integer(bit_kind)              :: Icfg(N_INT,2)
+  integer                        :: i,j,k,l,p,q,noccp,noccq, m, n, idxI, nocck,orbk
+  integer                        :: ii,jj,kk,ll,pp,qq
+  integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ
+  integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
+  integer(bit_kind),dimension(:,:,:),allocatable :: singlesI
+  integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha
+  integer,dimension(:),allocatable :: idxs_singlesI
+  integer,dimension(:),allocatable :: idxs_connectedI_alpha
+  integer,dimension(:,:),allocatable :: excitationIds_single
+  integer,dimension(:),allocatable :: excitationTypes_single
+  integer,dimension(:,:),allocatable :: excitationIds
+  integer,dimension(:),allocatable :: excitationTypes
+  integer,dimension(:),allocatable :: idslistconnectedJ
+  real*8,dimension(:),allocatable :: diagfactors
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI
+  integer                        :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel
+  integer                        :: getNSOMO
+  integer                        :: totcolsTKI
+  integer                        :: rowsTKI
+  integer                        :: noccpp
+  integer                        :: istart_cfg, iend_cfg, num_threads_max
+  integer                        :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ
+  integer*8                      :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
+  integer                        :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
+  real*8                         :: norm_coef_cfg, fac2eints
+  real*8                         :: norm_coef_det
+  real*8                         :: meCC1, meCC2, diagfac
+  real*8,dimension(:,:,:),allocatable :: TKI
+  real*8,dimension(:,:),allocatable :: GIJpqrs
+  real*8,dimension(:,:,:),allocatable :: TKIGIJ
+  real*8,dimension(:),allocatable :: psi_out_tmp
+  real*8,dimension(:,:),allocatable :: CCmattmp
+  real*8, external               :: mo_two_e_integral
+  real*8, external               :: get_two_e_integral
+  real*8,dimension(:),allocatable:: diag_energies
+  real*8                         :: tmpvar, tmptot
+  real*8                         :: core_act_contrib
+
+  integer(omp_lock_kind), allocatable :: lock(:)
+  call omp_set_max_active_levels(1)
+
+  !print *," sze = ",sze
+  allocate(lock(sze))
+  do i=1,sze
+    call omp_init_lock(lock(i))
+  enddo
+  !do i=1,size(psi_config_data,1)
+  !  print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2)
+  !end do
+
+  allocate(diag_energies(n_CSF))
+  call calculate_preconditioner_cfg(diag_energies)
+  !print *," diag energy =",diag_energies(1)
+
+  MS = 0
+  norm_coef_cfg=0.d0
+
+  psi_out=0.d0
+
+  istart_cfg = psi_csf_to_config_data(istart)
+  iend_cfg   = psi_csf_to_config_data(iend)
+
+  !nconnectedtotalmax = 1000
+  !nconnectedmaxJ = 1000
+  maxnalphas = elec_num*mo_num
+  Icfg(1,1) = psi_configuration(1,1,1)
+  Icfg(1,2) = psi_configuration(1,2,1)
+  allocate(listconnectedJ(N_INT,2,max(sze,10000)))
+  allocate(idslistconnectedJ(max(sze,10000)))
+  call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax)
+  deallocate(listconnectedJ)
+  deallocate(idslistconnectedJ)
+
+  integer*8, allocatable :: bit_tmp(:)
+  integer*8, external :: configuration_search_key
+  double precision :: diagfactors_0
+  allocate( bit_tmp(0:N_configuration+1))
+  do j=1,N_configuration
+    bit_tmp(j) = configuration_search_key(psi_configuration(1,1,j),N_int)
   enddo
 
-  integer s, bfIcfg
-  integer countcsf
-  countcsf = 0
-  integer countdet
-  countdet = 0
-  integer idx
-  integer istate
-  istate = 1
-  phasedet = 1.0d0
-  do i = 1,N_configuration
-      startdet = psi_configuration_to_psi_det(1,i)
-      enddet = psi_configuration_to_psi_det(2,i)
-      ndetI = enddet-startdet+1
+  call omp_set_max_active_levels(1)
+  !$OMP PARALLEL                                              & 
+      !$OMP DEFAULT(NONE)                               &
+      !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,&
+      !$OMP    holetype, vmotype, nvmos, listvmos, starti, endi,     &
+      !$OMP    nsinglesI, singlesI,idxs_singlesI,excitationIds_single,&
+      !$OMP    excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,&
+      !$OMP    Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,&
+      !$OMP   nconnectedJ,listconnectedJ,idslistconnectedJ,ntotJ,       &
+      !$OMP   Nalphas_Icfg,alphas_Icfg,connectedI_alpha,             &
+      !$OMP   idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,&
+      !$OMP   totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq,                &
+      !$OMP   colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,&
+      !$OMP   diagfac, tmpvar, diagfactors_0)                                            &
+      !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,&
+      !$OMP    N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,&
+      !$OMP     pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp,       &
+      !$OMP     AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,&
+      !$OMP     n_core_orb, n_act_orb, list_act, n, list_core,  list_core_is_built,core_act_contrib, num_threads_max,&
+      !$OMP     n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built)
 
-      allocate(tempCoeff(ndetI))
-      countdet = 1
-      do j = startdet, enddet
-         idx = psi_configuration_to_psi_det_data(j)
-         Ialpha(:) = psi_det(:,1,idx)
-         Ibeta(:)  = psi_det(:,2,idx)
-         call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet)
-         tempCoeff(countdet) = psi_coef(idx, istate)*phasedet
-         norm_det1 += tempCoeff(countdet)*tempCoeff(countdet)
-         countdet += 1
+  allocate(singlesI(N_INT,2,max(sze,10000)))
+  allocate(idxs_singlesI(max(sze,10000)))
+  allocate(excitationIds_single(2,max(sze,10000)))
+  allocate(excitationTypes_single(max(sze,10000)))
+!
+
+  !!!====================!!!
+  !!! Single Excitations !!!
+  !!!====================!!!
+
+  !$OMP DO SCHEDULE(dynamic,16)
+  do i=istart_cfg,iend_cfg
+
+    ! if Seniority_range > 8 then
+    ! continue
+    ! else
+    ! cycle
+
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     Isomo = Icfg(1,1)
+     Idomo = Icfg(1,2)
+     NSOMOI = getNSOMO(Icfg)
+
+     ! find out all pq holes possible
+     nholes = 0
+     ! holes in SOMO
+     ! list_act
+     ! list_core
+     ! list_core_inact
+     ! bitmasks
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then
+         nholes += 1
+         listholes(nholes) = k
+         holetype(nholes) = 1
+       endif
+     enddo
+     ! holes in DOMO
+     !do k = 1,mo_num
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then
+         nholes += 1
+         listholes(nholes) = k
+         holetype(nholes) = 2
+       endif
+     enddo
+
+     ! find vmos
+     listvmos = -1
+     vmotype = -1
+     nvmos = 0
+     do kk = 1,n_act_orb
+       k = list_act(kk)
+       !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1))))
+       if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then
+         nvmos += 1
+         listvmos(nvmos) = k
+         vmotype(nvmos) = 0
+       else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then
+         nvmos += 1
+         listvmos(nvmos) = k
+         vmotype(nvmos) = 1
+       end if
+     enddo
+
+
+     ! Icsf ids
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+     NSOMOI = getNSOMO(Icfg)
+
+     call generate_all_singles_cfg_with_type(bit_tmp,Icfg,singlesI,idxs_singlesI,excitationIds_single,&
+         excitationTypes_single,nsinglesI,N_int)
+
+     do j = 1,nsinglesI
+       idxI = idxs_singlesI(j)
+       NSOMOJ = getNSOMO(singlesI(1,1,j))
+      p = excitationIds_single(1,j)
+      q = excitationIds_single(2,j)
+      extype = excitationTypes_single(j)
+      ! Off diagonal terms
+      call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel)
+      Jsomo = singlesI(1,1,j)
+      Jdomo = singlesI(1,2,j)
+
+      ! Add the hole on J
+      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes += 1
+        listholes(nholes) = q
+        holetype(nholes) = 1
+      endif
+      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes += 1
+        listholes(nholes) = q
+        holetype(nholes) = 2
+      endif
+
+      startj = psi_config_data(idxI,1)
+      endj   = psi_config_data(idxI,2)
+      !print *,"i=",i," idxI=",idxI," startj=",startj," endj=",endj," sze=",sze
+
+      !!! One-electron contribution !!!
+      do ii = starti, endi
+        cnti = ii-starti+1
+        do jj = startj, endj
+          cntj = jj-startj+1
+          !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q)
+          core_act_contrib = 0.0d0
+          if(p.ne.q)then
+            do pp=1,n_core_orb
+              n=list_core(pp)
+              core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map)
+            end do
+          endif
+          meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib)
+          !if(jj.eq.1.and.ii.eq.1)then
+          !  print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q
+          !endif
+          call omp_set_lock(lock(jj))
+          do kk = 1,n_st
+            psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii)
+          enddo
+          call omp_unset_lock(lock(jj))
+        enddo
       enddo
 
-      s = 0
-      do k=1,N_int
-        if (psi_configuration(k,1,i) == 0_bit_kind) cycle
-        s = s + popcnt(psi_configuration(k,1,i))
-      enddo
-      bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1))))
-
-      ! perhaps blocking with CFGs of same seniority
-      ! can be more efficient
-      allocate(tempBuffer(bfIcfg,ndetI))
-      tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI)
-
-       call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1))
-       !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1)
-
-      deallocate(tempCoeff)
-      deallocate(tempBuffer)
-      psi_config_data(i,1) = countcsf + 1
-      countcsf += bfIcfg
-      psi_config_data(i,2) = countcsf
+      ! Undo setting in listholes
+      if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1  .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes -= 1
+      endif
+      if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then
+        nholes -= 1
+      endif
+    enddo
   enddo
+  !$OMP END DO
+  deallocate(singlesI)
+  deallocate(idxs_singlesI)
+  deallocate(excitationIds_single)
+  deallocate(excitationTypes_single)
 
-  END_PROVIDER
+  !print *," singles part psi(1,1)=",psi_out(1,1)
+  
+  allocate(listconnectedJ(N_INT,2,max(sze,10000)))
+  allocate(alphas_Icfg(N_INT,2,max(sze,10000)))
+  allocate(connectedI_alpha(N_INT,2,max(sze,10000)))
+  allocate(idxs_connectedI_alpha(max(sze,10000)))
+  allocate(excitationIds(2,max(sze,10000)))
+  allocate(excitationTypes(max(sze,10000)))
+  allocate(diagfactors(max(sze,10000)))
+  allocate(idslistconnectedJ(max(sze,10000)))
+  allocate(CCmattmp(n_st,NBFmax))
+
+  !!!====================!!!
+  !!! Double Excitations !!!
+  !!!====================!!!
+
+  ! Loop over all selected configurations
+  !$OMP DO SCHEDULE(static)
+  do i = istart_cfg,iend_cfg
+
+     ! if Seniority_range > 8 then
+     ! continue
+     ! else
+     ! cycle
+
+     Icfg(1,1) = psi_configuration(1,1,i)
+     Icfg(1,2) = psi_configuration(1,2,i)
+     starti = psi_config_data(i,1)
+     endi   = psi_config_data(i,2)
+
+     ! Returns all unique (checking the past) singly excited cfgs connected to I
+     Nalphas_Icfg = 0
+     ! TODO:
+     ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate
+
+     Nalphas_Icfg = NalphaIcfg_list(i)
+     alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
+     if(Nalphas_Icfg .GT. maxnalphas) then
+       print *,"Nalpha > maxnalpha"
+     endif
+
+     call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ)
+
+     ! TODO : remove doubly excited for return
+     !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5)
+     do k = 1,Nalphas_Icfg
+        ! Now generate all singly excited with respect to a given alpha CFG
+
+        !call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, &
+        !  listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, &
+        !  excitationIds,excitationTypes,diagfactors)
+
+        call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, &
+                                         nconnectedI, excitationIds, excitationTypes, diagfactors)
+
+        
+        if(nconnectedI .EQ. 0) then
+           cycle
+        endif
+
+        !if(i .EQ. 1) then
+        !   print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k))
+        !endif
+
+        ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
+        totcolsTKI = 0
+        rowsTKI = -1
+        NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k))
+        do j = 1,nconnectedI
+           NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+           ! for E_pp E_rs and E_ppE_rr case
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype
+           totcolsTKI += colsikpq
+           rowsTKI = rowsikpq
+        enddo
+
+        allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
+        ! Initialize the integral container
+        ! dims : (totcolsTKI, nconnectedI)
+        allocate(GIJpqrs(totcolsTKI,nconnectedI))  ! gpqrs
+        allocate(TKIGIJ(n_st,rowsTKI,nconnectedI))  ! TKI * gpqrs
+        !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI
+        TKI = 0.d0
+        GIJpqrs = 0.d0
+        TKIGIJ = 0.d0
+
+        totcolsTKI = 0
+        do j = 1,nconnectedI
+           NSOMOI = getNSOMO(connectedI_alpha(:,:,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel)
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           rowsTKI = rowsikpq
+           !if(i.eq.1) then
+           !  print *,rowsTKI,colsikpq," | ",pmodel,qmodel,extype,NSOMOalpha
+           !endif
+           do m = 1,colsikpq
+              do l = 1,rowsTKI
+              do kk = 1,n_st
+                 TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) &
+                    * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
+              enddo
+           !if(i.eq.1) then
+           !      print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha)
+           !endif
+           enddo
+           enddo
+
+           diagfactors_0 = diagfactors(j)*0.5d0
+           moi = excitationIds(1,j) ! p
+           mok = excitationIds(2,j) ! q
+           do l=1,nconnectedI
+            moj = excitationIds(2,l) ! s
+            mol = excitationIds(1,l) ! r
+            diagfac =  diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = <ps,qr>
+            !print *,"p=",mok,"q=",mol,"r=",moi,"s=",moj
+            do m = 1,colsikpq
+               ! <ij|kl> = (ik|jl)
+               GIJpqrs(totcolsTKI+m,l) = diagfac
+            enddo
+           enddo
+           totcolsTKI += colsikpq
+        enddo
+
+
+        ! Do big BLAS
+        call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,  &
+             TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, &
+             TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
+
+
+        ! Collect the result
+        totcolsTKI = 0
+        do j = 1,nconnectedI
+           NSOMOI     = getNSOMO(connectedI_alpha(1,1,j))
+           p = excitationIds(1,j)
+           q = excitationIds(2,j)
+           extype = excitationTypes(j)
+           call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+           rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+           colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+           rowsTKI = rowsikpq
+           CCmattmp = 0.d0
+
+           call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0,        &
+               TKIGIJ(1,1,j), size(TKIGIJ,1),                        &
+               AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha),  &
+               size(AIJpqContainer,1), 0.d0,                         &
+               CCmattmp, size(CCmattmp,1) )
+
+           do m = 1,colsikpq
+              call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1))
+              do kk = 1,n_st
+                 psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m)
+              enddo
+              call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1))
+           enddo
+           totcolsTKI += colsikpq
+        enddo
+
+        deallocate(TKI) ! coefficients of CSF
+        deallocate(GIJpqrs)  ! gpqrs
+        deallocate(TKIGIJ)  ! gpqrs
+
+     enddo ! loop over alphas
+  enddo ! loop over I
+  !$OMP END DO
+  call omp_set_max_active_levels(4)
+  deallocate(CCmattmp)
+  deallocate(connectedI_alpha)
+  deallocate(idxs_connectedI_alpha)
+  deallocate(excitationIds)
+  deallocate(excitationTypes)
+  deallocate(diagfactors)
+
+  !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb
+  !print *," psi(1,1)=",psi_out(1,1)
+
+  ! Add the diagonal contribution
+  !$OMP DO
+  do i = 1,n_CSF
+    do kk=1,n_st
+     psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+  call omp_set_max_active_levels(4)
+
+  deallocate(diag_energies)
+  deallocate(bit_tmp)
+
+end subroutine calculate_sigma_vector_cfg_nst_naive_store
+
+
+
+
+subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep)
+  implicit none
+  use bitmasks
+  BEGIN_DOC
+  ! Documentation for sigma-vector calculation
+  !
+  ! Calculates the result of the
+  ! application of the hamiltonian to the
+  ! wavefunction in CFG basis once
+  ! TODO : Things prepare outside this routine
+  !  1. Touch the providers for
+  !     a. ApqIJ containers
+  !     b. DET to CSF transformation matrices
+  !  2. DET to CSF transcormation
+  !  2. CSF to DET back transcormation
+  ! returns : psi_coef_out_det :
+  END_DOC
+  integer,intent(in)             :: sze, istart,iend, istep, ishift, n_st
+  real*8,intent(in)              :: psi_in(sze,n_st)
+  real*8,intent(out)             :: psi_out(sze,n_st)
+  integer(bit_kind)              :: Icfg(N_INT,2)
+  integer                        :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk
+  integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg
+  integer(bit_kind),dimension(:,:,:),allocatable :: singlesI
+  integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha
+  integer,dimension(:),allocatable :: idxs_singlesI
+  integer,dimension(:),allocatable :: idxs_connectedI_alpha
+  integer,dimension(:,:),allocatable :: excitationIds_single
+  integer,dimension(:),allocatable :: excitationTypes_single
+  integer,dimension(:,:),allocatable :: excitationIds
+  integer,dimension(:),allocatable :: excitationTypes
+  real*8,dimension(:),allocatable :: diagfactors
+  integer                        :: nholes
+  integer                        :: nvmos
+  integer                        :: listvmos(mo_num)
+  integer                        :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO
+  integer                        :: listholes(mo_num)
+  integer                        :: holetype(mo_num) ! 1-> SOMO 2->DOMO
+  integer                        :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI
+  integer                        :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel
+  integer                        :: getNSOMO
+  integer                        :: totcolsTKI
+  integer                        :: rowsTKI
+  integer                        :: noccpp
+  integer                        :: istart_cfg, iend_cfg
+  integer*8                      :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta
+  integer                        :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk
+  real*8                         :: norm_coef_cfg, fac2eints
+  real*8                         :: norm_coef_det
+  real*8                         :: meCC1, meCC2, diagfac
+  real*8,dimension(:,:,:),allocatable :: TKI
+  real*8,dimension(:,:),allocatable :: GIJpqrs
+  real*8,dimension(:,:,:),allocatable :: TKIGIJ
+  real*8, external               :: mo_two_e_integral
+  real*8, external               :: get_two_e_integral
+  real*8                         :: diag_energies(n_CSF)
+
+  ! allocate
+  allocate(alphas_Icfg(N_INT,2,max(sze/2,100)))
+  allocate(singlesI(N_INT,2,max(sze/2,100)))
+  allocate(connectedI_alpha(N_INT,2,max(sze/2,100)))
+  allocate(idxs_singlesI(max(sze/2,100)))
+  allocate(idxs_connectedI_alpha(max(sze/2,100)))
+  allocate(excitationIds_single(2,max(sze/2,100)))
+  allocate(excitationTypes_single(max(sze/2,100)))
+  allocate(excitationIds(2,max(sze/2,100)))
+  allocate(excitationTypes(max(sze/2,100)))
+  allocate(diagfactors(max(sze/2,100)))
+
+
+  !print *," sze = ",sze
+  call calculate_preconditioner_cfg(diag_energies)
+
+  MS = 0
+  norm_coef_cfg=0.d0
+
+  psi_out=0.d0
+
+  istart_cfg = psi_csf_to_config_data(istart)
+  iend_cfg   = psi_csf_to_config_data(iend)
+
+
+  !!! Single Excitations !!!
+  do i=istart_cfg,iend_cfg
+    print *,"I=",i
+
+    ! if Seniority_range > 8 then
+    ! continue
+    ! else
+    ! cycle
+
+    Icfg(1,1) = psi_configuration(1,1,i)
+    Icfg(1,2) = psi_configuration(1,2,i)
+    starti = psi_config_data(i,1)
+    endi   = psi_config_data(i,2)
+
+    ! Returns all unique (checking the past) singly excited cfgs connected to I
+    Nalphas_Icfg = 0
+    ! TODO:
+    ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate
+    !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg)
+    Nalphas_Icfg = NalphaIcfg_list(i)
+    alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg)
+
+    ! TODO : remove doubly excited for return
+    ! Here we do 2x the loop. One to count for the size of the matrix, then we compute.
+    do k = 1,Nalphas_Icfg
+      ! Now generate all singly excited with respect to a given alpha CFG
+      call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors)
+
+      totcolsTKI = 0
+      rowsTKI = -1
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+        ! for E_pp E_rs and E_ppE_rr case
+        if(p.EQ.q) then
+          NSOMOalpha = NSOMOI
+        endif
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        totcolsTKI += colsikpq
+!        if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then
+!          print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI
+!          !rowsTKI = rowsikpq
+!        else
+          rowsTKI = rowsikpq
+!        endif
+      enddo
+
+      allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF
+      ! Initialize the inegral container
+      ! dims : (totcolsTKI, nconnectedI)
+      allocate(GIJpqrs(totcolsTKI,nconnectedI))  ! gpqrs
+      allocate(TKIGIJ(n_st,rowsTKI,nconnectedI))  ! TKI * gpqrs
+
+      totcolsTKI = 0
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel)
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        do m = 1,colsikpq
+          do l = 1,rowsTKI
+            do kk = 1,n_st
+              TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1)
+            enddo
+          enddo
+        enddo
+        do m = 1,colsikpq
+          do l = 1,nconnectedI
+            ! <ij|kl> = (ik|jl)
+            moi = excitationIds(1,j) ! p
+            mok = excitationIds(2,j) ! q
+            moj = excitationIds(2,l) ! s
+            mol = excitationIds(1,l) ! r
+            if(moi.EQ.mok .AND. moj.EQ.mol)then
+              diagfac = diagfactors(j)
+              diagfac *= diagfactors(l)
+              !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac
+              GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = <ps,qr>
+            else
+              diagfac = diagfactors(j)*diagfactors(l)
+              !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac
+              GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = <ps,qr>
+              !endif
+            endif
+          enddo
+        enddo
+        totcolsTKI += colsikpq
+      enddo
+
+
+
+      ! Do big BLAS
+      ! TODO TKI, size(TKI,1)*size(TKI,2)
+      call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,&
+          TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,&
+          TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) )
+
+
+      ! Collect the result
+      totcolsTKI = 0
+      do j = 1,nconnectedI
+        NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k))
+        NSOMOI     = getNSOMO(connectedI_alpha(1,1,j))
+        p = excitationIds(1,j)
+        q = excitationIds(2,j)
+        extype = excitationTypes(j)
+        call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel)
+        rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1)
+        colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2)
+        do m = 1,colsikpq
+          do l = 1,rowsTKI
+            do kk = 1,n_st
+              psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + &
+                AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j)
+            enddo
+          enddo
+        enddo
+        totcolsTKI += colsikpq
+      enddo
+
+      deallocate(TKI) ! coefficients of CSF
+      ! Initialize the inegral container
+      ! dims : (totcolsTKI, nconnectedI)
+      deallocate(GIJpqrs)  ! gpqrs
+      deallocate(TKIGIJ)  ! gpqrs
+
+    enddo ! loop over alphas
+  enddo ! loop over I
+  deallocate(connectedI_alpha)
+  deallocate(idxs_connectedI_alpha)
+  deallocate(excitationIds)
+  deallocate(excitationTypes)
+  deallocate(diagfactors)
+
+
+  ! Add the diagonal contribution
+  do i = 1,n_CSF
+    do kk=1,n_st
+      psi_out(kk,i) += diag_energies(i)*psi_in(kk,i)
+    enddo
+  enddo
+  call omp_set_max_active_levels(4)
+
+end subroutine calculate_sigma_vector_cfg_nst_naive_store
diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c
index 1266b890..91bae8b2 100644
--- a/src/csf/tree_utils.c
+++ b/src/csf/tree_utils.c
@@ -1,3 +1,4 @@
+#include <assert.h>
 #include "tree_utils.h"
 
 void buildTree(Tree *bftree,
@@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){
     int icpl  = 0; // keep track of the ith ms (cannot be -ve)
     int addr  = 0; // Counts the total BF's
 
+    assert(bftree->rootNode->addr == 0);
     buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS);
 
     *NBF = bftree->rootNode->addr;
@@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){
     int NSOMO=0;
     getSetBits(Isomo, &NSOMO);
     genDetsDriver(dettree, NSOMO, MS, ndets);
+    // Closed shell case
+    if(NSOMO==0) (*ndets) = 1;
 
 }
 
@@ -311,3 +315,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col
             break;
     }
 }
+
+void printRealMatrix(double *orthoMatrix, int rows, int cols){
+  int i,j;
+  for(i=0;i<rows;++i){
+    for(j=0;j<cols;++j){
+      printf(" %3.5f ",orthoMatrix[i*cols + j]);
+    }
+    printf("\n");
+  }
+}
diff --git a/src/csf/tree_utils.h b/src/csf/tree_utils.h
index 5a788149..42fce59c 100644
--- a/src/csf/tree_utils.h
+++ b/src/csf/tree_utils.h
@@ -47,6 +47,7 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM
 void getSetBits(int64_t n, int *nsetbits);
 void getOverlapMatrix(int64_t Isomo, int64_t MS, double **overlapMatrixptr, int *rows, int *cols, int *NSOMOout);
 void getOverlapMatrix_withDet(double *bftodetmatrixI, int rowsbftodetI, int colsbftodetI, int64_t Isomo, int64_t MS, double **overlapMatrixI, int *rowsI, int *colsI, int *NSOMO);
+void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix);
 void gramSchmidt(double *overlapMatrix, int rows, int cols, double *orthoMatrix);
 
 
diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f
index 2f3d7f80..4331b0e0 100644
--- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f
@@ -27,7 +27,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
   double precision,  intent(in)   :: H_jj(sze),Dress_jj(sze)
   double precision, intent(inout) :: u_in(sze,N_st_diag_in)
   double precision, intent(out)   :: energies(N_st)
-  external hcalc
+  external                        :: hcalc
 
   integer                        :: iter, N_st_diag
   integer                        :: i,j,k,l,m
@@ -207,7 +207,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
   enddo
   ! Normalize all states 
   do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
   enddo
 
   ! Copy from the guess input "u_in" to the working vectors "U"
@@ -238,10 +238,10 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
           call ortho_qr(U,size(U,1),sze,shift2)
           !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
           !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
           ! Compute then the DIAGONAL PART OF THE DRESSING 
           ! <i|W_k> += Dress_jj(i) * <i|U>
-          call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze)
+          call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze)
       else
          ! Already computed in update below
          continue
@@ -303,9 +303,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
       ! --------------------------------------------------
 
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))
 
       ! Compute residual vector and davidson step
       ! -----------------------------------------
@@ -319,7 +319,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d
         enddo
 
         if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
           to_print(1,k) = lambda(k) 
           to_print(2,k) = residual_norm(k)
         endif
diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f
index 884fd672..e59d21d1 100644
--- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f
@@ -31,7 +31,8 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
   double precision, intent(inout) :: u_in(sze,N_st_diag)
   double precision, intent(out)   :: energies(N_st_diag)
   logical, intent(out)            :: converged
-  external hcalc
+
+  external                        :: hcalc
 
   double precision, allocatable   :: H_jj_tmp(:)
   ASSERT (N_st > 0)
@@ -224,7 +225,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
     u_in(k,k) = u_in(k,k) + 10.d0
   enddo
   do k=1,N_st_diag_in
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
   enddo
 
   do k=1,N_st_diag_in
@@ -248,10 +249,10 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
       if ((iter > 1).or.(itertot == 1)) then
         ! Compute |W_k> = \sum_i |i><i|H|u_k>
         ! -----------------------------------
-        call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag_in,sze)
+        call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag_in,sze)
         ! Compute then the DIAGONAL PART OF THE DRESSING 
         ! <i|W_k> += Dress_jj(i) * <i|U>
-        call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze)
+        call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze)
       else
          ! Already computed in update below
          continue
@@ -275,20 +276,20 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
 !
 !          call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, &
 !            psi_coef, size(psi_coef,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, &
 !            Dressing_vec, size(Dressing_vec,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
 !
 !          call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, &
 !            Dressing_vec, size(Dressing_vec,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, &
 !            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
         endif
 
@@ -376,9 +377,9 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
       ! --------------------------------------------------
 
       call dgemm('N','N', sze, N_st_diag_in, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
       call dgemm('N','N', sze, N_st_diag_in, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))
 
       ! Compute residual vector and davidson step
       ! -----------------------------------------
@@ -392,7 +393,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies
         enddo
 
         if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
           to_print(1,k) = lambda(k) 
           to_print(2,k) = residual_norm(k)
         endif
diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f
index c3bfe91a..c045aa1a 100644
--- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f
@@ -214,7 +214,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
   enddo
   ! Normalize all states 
   do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
   enddo
   ! Copy from the guess input "u_in" to the working vectors "U"
 
@@ -244,7 +244,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
           call ortho_qr(U,size(U,1),sze,shift2)
           !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
           !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
       else
          ! Already computed in update below
          continue
@@ -268,20 +268,20 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
           stop
 !          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
 !            psi_coef, size(psi_coef,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
 !            dressing_vec, size(dressing_vec,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 !
 !
 !          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
 !            dressing_vec, size(dressing_vec,1), &
-!            U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+!            U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
 !
 !          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
 !            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
-!            1.d0, W(1,shift+1), size(W,1))
+!            1.d0, W(:,shift+1), size(W,1))
 
         endif
       endif
@@ -370,9 +370,9 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
       ! --------------------------------------------------
 
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))
 
       ! Compute residual vector and davidson step
       ! -----------------------------------------
@@ -386,7 +386,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di
         enddo
 
         if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
           to_print(1,k) = lambda(k) 
           to_print(2,k) = residual_norm(k)
         endif
diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f
index aee4ba09..a4c47c27 100644
--- a/src/dav_general_mat/dav_ext_rout.irp.f
+++ b/src/dav_general_mat/dav_ext_rout.irp.f
@@ -196,7 +196,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
   enddo
   ! Normalize all states 
   do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
   enddo
 
   ! Copy from the guess input "u_in" to the working vectors "U"
@@ -226,7 +226,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
           call ortho_qr(U,size(U,1),sze,shift2)
           !    it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag)
           !    where sze is the size of the vector, N_st_diag is the number of states 
-          call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
+          call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
       else
          ! Already computed in update below
          continue
@@ -288,9 +288,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
       ! --------------------------------------------------
 
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))
 
       ! Compute residual vector and davidson step
       ! -----------------------------------------
@@ -304,7 +304,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co
         enddo
 
         if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
           to_print(1,k) = lambda(k) 
           to_print(2,k) = residual_norm(k)
         endif
diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f
index 39cb68bb..96775c50 100644
--- a/src/dav_general_mat/dav_general.irp.f
+++ b/src/dav_general_mat/dav_general.irp.f
@@ -206,7 +206,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
   enddo
   ! Normalize all states 
   do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(u_in(:,k),sze)
   enddo
 
   ! Copy from the guess input "u_in" to the working vectors "U"
@@ -236,8 +236,8 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
           call ortho_qr(U,size(U,1),sze,shift2)
           call ortho_qr(U,size(U,1),sze,shift2)
 
-!            call H_S2_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze)
-            call hpsi(W(1,shift+1),U(1,shift+1),N_st_diag,sze,h_mat)
+!            call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze)
+            call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat)
       else
          ! Already computed in update below
          continue
@@ -299,9 +299,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
       ! --------------------------------------------------
 
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1))
       call dgemm('N','N', sze, N_st_diag, shift2,                    &
-          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1))
 
       ! Compute residual vector and davidson step
       ! -----------------------------------------
@@ -315,7 +315,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv
         enddo
 
         if (k <= N_st) then
-          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          residual_norm(k) = u_dot_u(U(:,shift2+k),sze)
           to_print(1,k) = lambda(k) 
           to_print(2,k) = residual_norm(k)
         endif
diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f
new file mode 100644
index 00000000..659602a1
--- /dev/null
+++ b/src/davidson/diagonalization_hcfg.irp.f
@@ -0,0 +1,624 @@
+subroutine davidson_diag_h_cfg(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
+  double precision, intent(out)  :: energies(N_st_diag)
+  integer, intent(in)            :: dressing_state
+  logical, intent(out)           :: converged
+  double precision, allocatable  :: H_jj(:)
+
+  double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
+  integer                        :: i,k
+  ASSERT (N_st > 0)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint)
+  !$OMP PARALLEL DEFAULT(NONE)                                       &
+      !$OMP  SHARED(sze,H_jj, dets_in,Nint)                    &
+      !$OMP  PRIVATE(i)
+  !$OMP DO SCHEDULE(static)
+  do i=2,sze
+    H_jj(i)  = diag_H_mat_elem(dets_in(1,1,i),Nint)
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (dressing_state > 0) then
+    do k=1,N_st
+      do i=1,sze
+        H_jj(i)  += u_in(i,k) * dressing_column_h(i,k)
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged)
+  deallocate(H_jj)
+end
+
+
+subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged)
+  use bitmasks
+  use mmap_module
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision,  intent(in)  :: H_jj(sze)
+  integer, intent(in)            :: dressing_state
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag_in)
+  double precision, intent(out)  :: energies(N_st_diag_in)
+
+  integer                        :: iter, N_st_diag
+  integer                        :: i,j,k,l,m,kk,ii,ll
+  logical, intent(inout)         :: converged
+
+  double precision, external     :: u_dot_v, u_dot_u
+
+  integer                        :: k_pairs, kl
+
+  integer                        :: iter2, itertot
+  double precision, allocatable  :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable  :: s_tmp(:,:)
+  double precision               :: diag_h_mat_elem
+  double precision, allocatable  :: residual_norm(:)
+  character*(16384)              :: write_buffer
+  double precision               :: to_print(2,N_st)
+  double precision               :: cpu, wall
+  integer                        :: shift, shift2, itermax, istate
+  double precision               :: r1, r2, alpha
+  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
+  integer                        :: nproc_target
+  integer                        :: order(N_st_diag_in)
+  double precision               :: cmax
+  double precision, allocatable  :: U(:,:), U_csf(:,:), overlap(:,:)
+  double precision, allocatable  :: tmpU(:,:), tmpW(:,:)
+  double precision, pointer      :: W(:,:), W_csf(:,:)
+  logical                        :: disk_based
+  double precision               :: energy_shift(N_st_diag_in*davidson_sze_max)
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if (N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1
+  itertot = 0
+
+  if (state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag)                  &! U
+         + dble(sze_csf)*(N_st_diag*itermax)      &! U_csf
+         + dble(sze)*(N_st_diag)                  &! W
+         + dble(sze_csf)*(N_st_diag*itermax)      &! W_csf
+         + 3.0d0*(N_st_diag*itermax)**2           &! h,y,s_tmp
+         + 1.d0*(N_st_diag*itermax)               &! lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_u_0_nstates_zmq
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on collector
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on slave
+         + 0.5d0*maxab                            &! idx0 in H_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if (nproc_target == 0) then
+      call check_mem(r1,irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if (r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if (itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m=0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+  call write_int(6,N_st,'Number of states')
+  call write_int(6,N_st_diag,'Number of states in diagonalization')
+  call write_int(6,sze,'Number of determinants')
+  call write_int(6,sze_csf,'Number of CSFs')
+  call write_int(6,nproc_target,'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if (disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy        Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+
+  if (disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8, fd_w, .False., ptr_w)
+    call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/))
+  else
+    allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax))
+  endif
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag),                                              &
+      U_csf(sze_csf,N_st_diag*itermax),                              &
+
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      residual_norm(N_st_diag),                                      &
+      lambda(N_st_diag*itermax))
+
+
+  h = 0.d0
+  U = 0.d0
+  y = 0.d0
+  s_tmp = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+  call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1))
+  do k=N_st+1,N_st_diag
+    do i=1,sze_csf
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st)
+    enddo
+    U_csf(k,k) = u_csf(k,k) + 10.d0
+  enddo
+  do k=1,N_st_diag
+    call normalize(U_csf(1,k),sze_csf)
+  enddo
+  call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1))
+
+  do while (.not.converged)
+    itertot = itertot+1
+    if (itertot == 8) then
+      exit
+    endif
+
+    do iter=1,itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+!      if ((iter > 1).or.(itertot == 1)) then
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U)
+        PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals
+        if ((sze > 100000).and.distributed_davidson) then
+            !call H_u_0_nstates_zmq   (W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+            deallocate(tmpW)
+            deallocate(tmpU)
+        else
+            !call H_u_0_nstates_openmp(W,U,N_st_diag,sze)
+            allocate(tmpW(N_st_diag,sze_csf))
+            allocate(tmpU(N_st_diag,sze_csf))
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                tmpU(kk,ii) = U_csf(ii,shift+kk)
+              enddo
+            enddo
+            !tmpU     =0.0d0
+            !tmpU(1,2)=1.0d0
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
+            call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
+            do kk=1,N_st_diag
+              do ii=1,sze_csf
+                W_csf(ii,shift+kk)=tmpW(kk,ii)
+              enddo
+            enddo
+
+            !U_csf = 0.0d0
+            !U_csf(1,1) = 1.0d0
+            !u_in = 0.0d0
+            !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2)
+            !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze)
+            !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1))
+            !do i=1,sze_csf
+            !  print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then
+            !    print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1))
+            !  endif
+            !end do
+            !stop
+            deallocate(tmpW)
+            deallocate(tmpU)
+        endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif
+
+      if (dressing_state > 0) then
+
+        if (N_st == 1) then
+
+          l = dressed_column_idx(1)
+          double precision :: f
+          f = 1.0d0/psi_coef(l,1)
+          do istate=1,N_st_diag
+            do i=1,sze
+              W(i,istate) += dressing_column_h(i,1) *f * U(l,istate)
+              W(l,istate) += dressing_column_h(i,1) *f * U(i,istate)
+            enddo
+
+          enddo
+
+        else
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            psi_coef, size(psi_coef,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+
+          call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, &
+            dressing_column_h, size(dressing_column_h,1), &
+            U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1))
+
+          call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, &
+            psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), &
+            1.d0, W(1,1), size(W,1))
+
+        endif
+      endif
+
+      !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1))
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1),                          &
+          0.d0, h, size(h,1))
+      call dgemm('T','N', shift2, shift2, sze_csf,                       &
+          1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      ! Diagonalize h
+      ! ---------------
+
+       integer :: lwork, info
+       double precision, allocatable :: work(:)
+
+       y = h
+       lwork = -1
+       allocate(work(1))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       lwork = int(work(1))
+       deallocate(work)
+       allocate(work(lwork))
+       call dsygv(1,'V','U',shift2,y,size(y,1), &
+          s_tmp,size(s_tmp,1), lambda, work,lwork,info)
+       deallocate(work)
+       if (info /= 0) then
+         stop 'DSYGV Diagonalization failed'
+       endif
+
+      ! Compute Energy for each eigenvector
+      ! -----------------------------------
+
+      call dgemm('N','N',shift2,shift2,shift2,                       &
+          1.d0, h, size(h,1), y, size(y,1),                          &
+          0.d0, s_tmp, size(s_tmp,1))
+
+      call dgemm('T','N',shift2,shift2,shift2,                       &
+          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+          0.d0, h, size(h,1))
+
+      do k=1,shift2
+        lambda(k) = h(k,k)
+      enddo
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do i=1,shift2
+          do k=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+        enddo
+
+      endif
+
+
+      ! Express eigenvectors of h in the csf basis
+      ! ------------------------------------------
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U)
+
+      call dgemm('N','N', sze_csf, N_st_diag, shift2,                    &
+          1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1))
+      call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W)
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !if (without_diagonal) then
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k=1,N_st_diag
+        do i=1,sze
+           U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )      &
+              /max(H_jj(i) - lambda (k),1.d-2)
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+      !else
+      !  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      !  do k=1,N_st_diag
+      !    do i=1,sze
+      !       U(i,k) =  (lambda(k) * U(i,k) - W(i,k) )  
+      !    enddo
+      !  enddo
+      !  !$OMP END PARALLEL DO
+      !endif
+
+      do k=1,N_st
+        residual_norm(k) = u_dot_u(U(1,k),sze)
+        to_print(1,k) = lambda(k) + nuclear_repulsion
+        to_print(2,k) = residual_norm(k)
+      enddo
+      call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1))
+
+      if ((itertot>1).and.(iter == 1)) then
+        !don't print
+        continue
+      else
+        write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if (iter > 1) then
+        if (threshold_davidson_from_pt2) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+        else
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson
+        endif
+      endif
+
+      do k=1,N_st
+        if (residual_norm(k) > 1.d8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if (converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if (qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U
+    ! -------------
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        W_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,W_csf,W)
+
+    call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0,      &
+        U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+    do k=1,N_st_diag
+      do i=1,sze_csf
+        U_csf(i,k) = u_in(i,k)
+      enddo
+    enddo
+    call convertWFfromCSFtoDET(N_st_diag,U_csf,U)
+
+  enddo
+
+
+  call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2)
+  do k=1,N_st_diag
+    do i=1,sze
+      u_in(i,k) = U(i,k)
+    enddo
+  enddo
+
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '======'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if (disk_based)then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+  else
+    deallocate(W, W_csf)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, U_csf, overlap,                                             &
+      h, y, s_tmp,                                                   &
+      lambda                                                         &
+      )
+  FREE nthreads_davidson
+end
+
+
+
+
+
+
+
diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f
index 89e18d65..3020ecd8 100644
--- a/src/davidson/diagonalization_hcsf_dressed.irp.f
+++ b/src/davidson/diagonalization_hcsf_dressed.irp.f
@@ -89,7 +89,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
   double precision, intent(out)  :: energies(N_st_diag_in)
 
   integer                        :: iter, N_st_diag
-  integer                        :: i,j,k,l,m
+  integer                        :: i,j,k,l,m,kk
   logical, intent(inout)         :: converged
 
   double precision, external     :: u_dot_v, u_dot_u
diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f
index 68f3420d..0fd9f091 100644
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@@ -154,7 +154,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
   character*(16384)              :: write_buffer
   double precision               :: to_print(3,N_st)
   double precision               :: cpu, wall
-  integer                        :: shift, shift2, itermax, istate
+  integer                        :: shift, shift2, itermax, istate, ii
   double precision               :: r1, r2, alpha
   logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
   integer                        :: nproc_target
@@ -361,7 +361,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
         if ((sze > 100000).and.distributed_davidson) then
             call H_S2_u_0_nstates_zmq   (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
         else
+            double precision               :: irp_rdtsc
+            double precision               :: ticks_0, ticks_1
+            integer*8                      :: irp_imax
+            irp_imax = 1
+            !ticks_0 = irp_rdtsc()
             call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
+            !ticks_1 = irp_rdtsc()
+            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
         endif
         S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag))
       else
diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f
index 42f4fcdb..a34637a0 100644
--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@@ -1,9 +1,20 @@
+BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ]
+ implicit none
+ BEGIN_DOC
+ ! If 'det', use <Psi_det|H|Psi_det> in Davidson
+ !
+ ! If 'cfg', use <Psi_csf|H|Psi_csf> in Davidson
+ END_DOC
+ sigma_vector_algorithm = 'det'
+ !sigma_vector_algorithm = 'cfg'
+END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]
   implicit none
   BEGIN_DOC
   ! :c:data:`n_states` lowest eigenvalues of the |CI| matrix
   END_DOC
+  PROVIDE distributed_davidson
 
   integer                        :: j
   character*(8)                  :: st
@@ -61,7 +72,7 @@ END_PROVIDER
    if (diag_algorithm == 'Davidson') then
 
      if (do_csf) then
-!      if (sigma_vector_algorithm == 'det') then
+       if (sigma_vector_algorithm == 'det') then
          call davidson_diag_H_csf (psi_det,                  &
                                    CI_eigenvectors,          &
                                    size(CI_eigenvectors,1),  &
@@ -73,14 +84,14 @@ END_PROVIDER
                                    N_int,                    &
                                    0,                        &
                                    converged)
-!       else if (sigma_vector_algorithm == 'cfg') then
-!       call davidson_diag_H_csf(psi_det,CI_eigenvectors, &
-!         size(CI_eigenvectors,1),CI_electronic_energy,               &
-!         N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
-!       else
-!         print *, irp_here
-!         stop 'bug'
-!      endif
+       else if (sigma_vector_algorithm == 'cfg') then
+          call davidson_diag_H_cfg(psi_det,CI_eigenvectors, &
+          size(CI_eigenvectors,1),CI_electronic_energy,               &
+          N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged)
+       else
+          print *, irp_here
+          stop 'bug'
+       endif
      else
        call davidson_diag_HS2(psi_det,                  &
                               CI_eigenvectors,          &
diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg
index b9c736a9..c6323cd0 100644
--- a/src/determinants/EZFIO.cfg
+++ b/src/determinants/EZFIO.cfg
@@ -136,9 +136,8 @@ doc: If |true|, discard any Slater determinants with an interaction smaller than
 interface: ezfio,provider,ocaml
 default: False
 
-
-[thresh_save_wf]
+[save_threshold]
 type: Threshold
-doc: Thresholds to save wave function
+doc: Cut-off to apply to the CI coefficients when the wave function is stored
 interface: ezfio,provider,ocaml
-default: 1.e-15
+default: 1.e-14
diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg
new file mode 100644
index 00000000..129fb2ea
--- /dev/null
+++ b/src/mo_localization/EZFIO.cfg
@@ -0,0 +1,54 @@
+[localization_method]
+type: character*(32)
+doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey.
+interface: ezfio,provider,ocaml
+default: boys
+
+[localization_max_nb_iter]
+type: integer
+doc: Maximal number of iterations for the orbital localization.
+interface: ezfio,provider,ocaml
+default: 1000
+
+[localization_use_hessian]
+type: logical
+doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option requieres a way smaller amount of memory but is not easy to converge. 
+interface: ezfio,provider,ocaml
+default: true
+
+[security_mo_class]
+type: logical
+doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization.
+interface: ezfio,provider,ocaml
+default: true
+
+[thresh_loc_max_elem_grad]
+type: double precision
+doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad.
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[kick_in_mos]
+type: logical
+doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization.
+interface: ezfio,provider,ocaml
+default: true
+
+[angle_pre_rot]
+type: double precision
+doc: To define the angle for the rotation of the MOs before the localization (in rad).
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[sort_mos_by_e]
+type: logical
+doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix.
+interface: ezfio,provider,ocaml
+default: false
+
+[debug_hf]
+type: logical
+doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging.
+interface: ezfio,provider,ocaml
+default: false
+
diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED
new file mode 100644
index 00000000..e9d51654
--- /dev/null
+++ b/src/mo_localization/NEED
@@ -0,0 +1,2 @@
+hartree_fock
+utils_trust_region
diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md
new file mode 100644
index 00000000..a142ec20
--- /dev/null
+++ b/src/mo_localization/README.md
@@ -0,0 +1,108 @@
+# mo_localization
+
+Some parameters can be changed with qp edit in the mo_localization section
+(cf below). Similarly for the trust region parameters in the 
+utils_trust_region section. The localization without the trust region 
+is not available for the moment.  
+
+The irf.f files can be generated from the org ones using emacs. 
+If you modify the .org files, don't forget to do (you need emacs):  
+``` 
+./TANGLE_org_mode.sh  
+ninja  
+```  
+
+# Orbital localisation
+To localize the MOs:  
+```
+qp run localization  
+```
+After that the ezfio directory contains the localized MOs  
+ 
+But to do so the mo_class must be defined before, run 
+```
+qp set_mo_class -q
+```
+for more information or  
+```
+qp set_mo_class -c [] -a [] -v [] -i [] -d [] 
+```
+to set the mo classes. We don't care about the name of the   
+mo classes. The algorithm just localizes all the MOs of  
+a given class between them, for all the classes, except the deleted MOs.  
+
+If you just on kind of mo class to localize all the MOs between them  
+you have to put:
+```
+qp set mo_localization security_mo_class false
+```
+
+Before the localization, a kick is done for each mo class  
+(except the deleted ones) to break the MOs. This is done by   
+doing a rotation between the MOs.
+This feature can be removed by setting:
+```
+qp set mo_localization kick_in_mos false
+```
+and the default angle for the rotation can be changed with:
+```
+qp set mo_localization angle_pre_rot 1e-3 # or something else
+```
+
+After the localization, the MOs of each class (except the deleted ones)  
+can be sorted between them using the diagonal elements of  
+the fock matrix with:
+```
+qp set mo_localization sort_mos_by_e true
+```
+
+You can check the Hartree-Fock energy before/during/after the localization  
+by putting (only for debugging):
+```
+qp set mo_localization debug_hf true 
+```
+
+## Foster-Boys & Pipek-Mezey
+Foster-Boys:  
+``` 
+qp set mo_localization localization_method boys 
+``` 
+ 
+Pipek-Mezey:  
+``` 
+qp set mo_localization localization_method pipek 
+``` 
+
+# Break the spatial symmetry of the MOs
+To break the spatial symmetry of the MOs:   
+```
+qp run break_spatial_sym
+```
+The default angle for the rotations is too big for this kind of
+application, a value between 1e-3 and 1e-6 should break the spatial
+symmetry with just a small change in the energy:
+```
+qp set mo_localization angle_pre_rot 1e-3
+``` 
+
+# With or without hessian + trust region
+With hessian +  trust region
+```
+qp set mo_localization localisation_use_hessian true
+```
+It uses the trust region algorithm with the diagonal of the hessian of the
+localization criterion with respect to the MO rotations.
+
+Without the hessian and the trust region
+```
+qp set mo_localization localisation_use_hessian false
+```
+By doing so it does not require to store the hessian but the
+convergence is not easy, in particular for virtual MOs.
+It seems that it not possible to converge with Pipek-Mezey
+localization with this approach.
+
+# Further improvements: 
+- Cleaner repo 
+- Correction of the errors in the documentations 
+- option with/without trust region 
diff --git a/src/mo_localization/TANGLE_org_mode.sh b/src/mo_localization/TANGLE_org_mode.sh
new file mode 100755
index 00000000..059cbe7d
--- /dev/null
+++ b/src/mo_localization/TANGLE_org_mode.sh
@@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f
new file mode 100644
index 00000000..6a1003df
--- /dev/null
+++ b/src/mo_localization/break_spatial_sym.irp.f
@@ -0,0 +1,42 @@
+! ! A small program to break the spatial symmetry of the MOs.
+
+! ! You have to defined your MO classes or set security_mo_class to false
+! ! with:
+! ! qp set orbital_optimization security_mo_class false
+
+! ! The default angle for the rotations is too big for this kind of
+! ! application, a value between 1e-3 and 1e-6 should break the spatial
+! ! symmetry with just a small change in the energy. 
+
+
+program break_spatial_sym
+
+  !BEGIN_DOC
+  ! Break the symmetry of the MOs with a rotation
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbitals:'
+    print*, 'qp set orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
diff --git a/src/mo_localization/break_spatial_sym.org b/src/mo_localization/break_spatial_sym.org
new file mode 100644
index 00000000..5995b138
--- /dev/null
+++ b/src/mo_localization/break_spatial_sym.org
@@ -0,0 +1,43 @@
+! A small program to break the spatial symmetry of the MOs.
+
+! You have to defined your MO classes or set security_mo_class to false
+! with:
+! qp set orbital_optimization security_mo_class false
+
+! The default angle for the rotations is too big for this kind of
+! application, a value between 1e-3 and 1e-6 should break the spatial
+! symmetry with just a small change in the energy. 
+
+#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f
+program break_spatial_sym
+
+  !BEGIN_DOC
+  ! Break the symmetry of the MOs with a rotation
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbitals:'
+    print*, 'qp set orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
+#+END_SRC
diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f
new file mode 100644
index 00000000..a0e5432d
--- /dev/null
+++ b/src/mo_localization/debug_gradient_loc.irp.f
@@ -0,0 +1,62 @@
+program debug_gradient_loc
+
+  !BEGIN_DOC
+  ! Check if the gradient is correct
+  !END_DOC
+
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: v_grad(:), v_grad2(:)
+  double precision :: norm, max_elem, threshold, max_error
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(v_grad(n),v_grad2(n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call gradient_FB(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm)
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call gradient_PM(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) 
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,v_grad(i)
+  enddo
+
+  v_grad = v_grad - v_grad2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(v_grad(i)) > threshold) then
+      print*,v_grad(i)
+      nb_error = nb_error + 1
+      if (dabs(v_grad(i)) > max_elem) then
+        max_elem = v_grad(i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(v_grad,v_grad2)
+ 
+end
diff --git a/src/mo_localization/debug_gradient_loc.org b/src/mo_localization/debug_gradient_loc.org
new file mode 100644
index 00000000..1fcb7fca
--- /dev/null
+++ b/src/mo_localization/debug_gradient_loc.org
@@ -0,0 +1,64 @@
+#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f
+program debug_gradient_loc
+
+  !BEGIN_DOC
+  ! Check if the gradient is correct
+  !END_DOC
+
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: v_grad(:), v_grad2(:)
+  double precision :: norm, max_elem, threshold, max_error
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(v_grad(n),v_grad2(n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call gradient_FB(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm)
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call gradient_PM(n,list_size,list,v_grad,max_elem,norm)
+    call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) 
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,v_grad(i)
+  enddo
+
+  v_grad = v_grad - v_grad2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(v_grad(i)) > threshold) then
+      print*,v_grad(i)
+      nb_error = nb_error + 1
+      if (dabs(v_grad(i)) > max_elem) then
+        max_elem = v_grad(i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(v_grad,v_grad2)
+ 
+end
+#+END_SRC
diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f
new file mode 100644
index 00000000..bfb1cbbc
--- /dev/null
+++ b/src/mo_localization/debug_hessian_loc.irp.f
@@ -0,0 +1,62 @@
+program debug_hessian_loc
+
+  !BEGIN_DOC
+  ! Check if the hessian is correct
+  !END_DOC
+  
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: H(:,:), H2(:,:)
+  double precision :: threshold, max_error, max_elem
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(H(n,n),H2(n,n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call hessian_FB(n,list_size,list,H)
+    call hessian_FB_omp(n,list_size,list,H2)
+  elseif(localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call hessian_PM(n,list_size,list,H)
+    call hessian_PM(n,list_size,list,H2)
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,H(i,i)
+  enddo
+
+  H = H - H2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(H(i,i)) > threshold) then
+      print*,H(i,i)
+      nb_error = nb_error + 1
+      if (dabs(H(i,i)) > max_elem) then
+        max_elem = H(i,i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(H,H2)
+  
+end
diff --git a/src/mo_localization/debug_hessian_loc.org b/src/mo_localization/debug_hessian_loc.org
new file mode 100644
index 00000000..bc23818c
--- /dev/null
+++ b/src/mo_localization/debug_hessian_loc.org
@@ -0,0 +1,64 @@
+#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f
+program debug_hessian_loc
+
+  !BEGIN_DOC
+  ! Check if the hessian is correct
+  !END_DOC
+  
+  implicit none
+
+  integer :: list_size, n
+  integer, allocatable :: list(:)
+  double precision, allocatable :: H(:,:), H2(:,:)
+  double precision :: threshold, max_error, max_elem
+  integer :: i, nb_error
+
+  threshold = 1d-12
+
+  list = list_act
+  list_size = dim_list_act_orb
+
+  n = list_size*(list_size-1)/2
+  
+  allocate(H(n,n),H2(n,n))
+
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys'
+    call hessian_FB(n,list_size,list,H)
+    call hessian_FB_omp(n,list_size,list,H2)
+  elseif(localization_method == 'pipek') then
+    print*,'Pipek-Mezey'
+    call hessian_PM(n,list_size,list,H)
+    call hessian_PM(n,list_size,list,H2)
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+ 
+  do i = 1, n
+    print*,i,H(i,i)
+  enddo
+
+  H = H - H2
+
+  nb_error = 0
+  max_elem = 0d0
+
+  do i = 1, n
+    if (dabs(H(i,i)) > threshold) then
+      print*,H(i,i)
+      nb_error = nb_error + 1
+      if (dabs(H(i,i)) > max_elem) then
+        max_elem = H(i,i)
+      endif
+    endif
+  enddo
+
+  print*,'Threshold error', threshold
+  print*, 'Nb error', nb_error
+  print*,'Max error', max_elem
+
+  deallocate(H,H2)
+  
+end
+#+END_SRC
diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f
new file mode 100644
index 00000000..3ca90fb5
--- /dev/null
+++ b/src/mo_localization/kick_the_mos.irp.f
@@ -0,0 +1,31 @@
+program kick_the_mos
+
+  !BEGIN_DOC
+  ! To do a small rotation of the MOs
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbital:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
diff --git a/src/mo_localization/kick_the_mos.org b/src/mo_localization/kick_the_mos.org
new file mode 100644
index 00000000..3d57da37
--- /dev/null
+++ b/src/mo_localization/kick_the_mos.org
@@ -0,0 +1,33 @@
+#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f
+program kick_the_mos
+
+  !BEGIN_DOC
+  ! To do a small rotation of the MOs
+  !END_DOC
+
+  implicit none
+
+  kick_in_mos = .True.
+  TOUCH kick_in_mos
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (dim_list_act_orb == mo_num .or. &
+      dim_list_core_orb + dim_list_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to kick all the orbital:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+  
+  call apply_pre_rotation
+  
+end
+#+END_SRC
diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f
new file mode 100644
index 00000000..6d5cc876
--- /dev/null
+++ b/src/mo_localization/localization.irp.f
@@ -0,0 +1,531 @@
+program localization
+ call run_localization
+end
+
+
+
+
+! Variables:
+! | pre_rot(mo_num, mo_num)   | double precision | Matrix for the pre rotation                       |
+! | R(mo_num,mo_num)          | double precision | Rotation matrix                                   |
+! | tmp_R(:,:)                | double precision | Rottation matrix in a subsapce                    |
+! | prev_mos(ao_num, mo_num)  | double precision | Previous mo_coef                                  |
+! | spatial_extent(mo_num)    | double precision | Spatial extent of the orbitals                    |
+! | criterion                 | double precision | Localization criterion                            |
+! | prev_criterion            | double precision | Previous criterion                                |
+! | criterion_model           | double precision | Estimated next criterion                          |
+! | rho                       | double precision | Ratio to measure the agreement between the model  |
+! |                           |                  | and the reality                                   |
+! | delta                     | double precision | Radisu of the trust region                        |
+! | norm_grad                 | double precision | Norm of the gradient                              |
+! | info                      | integer          | for dsyev from Lapack                             |
+! | max_elem                  | double precision | maximal element in the gradient                   |
+! | v_grad(:)                 | double precision | Gradient                                          |
+! | H(:,:)                    | double precision | Hessian (diagonal)                                |
+! | e_val(:)                  | double precision | Eigenvalues of the hessian                        |
+! | W(:,:)                    | double precision | Eigenvectors of the hessian                       |
+! | tmp_x(:)                  | double precision | Step in 1D (in a subaspace)                       |
+! | tmp_m_x(:,:)              | double precision | Step in 2D (in a subaspace)                       |
+! | tmp_list(:)               | double precision | List of MOs in a mo_class                         |
+! | i,j,k                     | integer          | Indexes in the full MO space                      |
+! | tmp_i, tmp_j, tmp_k       | integer          | Indexes in a subspace                             |
+! | l                         | integer          | Index for the mo_class                            |
+! | key(:)                    | integer          | Key to sort the eigenvalues of the hessian        |
+! | nb_iter                   | integer          | Number of iterations                              |
+! | must_exit                 | logical          | To exit the trust region loop                     |
+! | cancel_step               | logical          | To cancel a step                                  |
+! | not_*converged            | logical          | To localize the different mo classes              |
+! | t*                        | double precision | To measure the time                               |
+! | n                         | integer          | mo_num*(mo_num-1)/2, number of orbital parameters |
+! | tmp_n                     | integer          | dim_subspace*(dim_subspace-1)/2                   |
+! |                           |                  | Number of dimension in the subspace               |
+
+! Variables in qp_edit for the localization:
+! | localization_method      |
+! | localization_max_nb_iter |
+! | default_mo_class         |
+! | thresh_loc_max_elem_grad |
+! | kick_in_mos              |
+! | angle_pre_rot            |
+
+! + all the variables for the trust region
+
+! Cf. qp_edit orbital optimization
+
+
+subroutine run_localization
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Orbital localization
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  double precision, allocatable :: pre_rot(:,:), R(:,:)
+  double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:)
+  double precision              :: criterion, norm_grad
+  integer                       :: i,j,k,l,p, tmp_i, tmp_j, tmp_k
+  integer                       :: info
+  integer                       :: n, tmp_n, tmp_list_size
+  double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:)
+  double precision              :: max_elem, t1, t2, t3, t4, t5, t6
+  integer, allocatable          :: tmp_list(:), key(:)
+  double precision              :: prev_criterion, rho, delta, criterion_model
+  integer                       :: nb_iter, nb_sub_iter
+  logical                       :: not_converged, not_core_converged
+  logical                       :: not_act_converged, not_inact_converged, not_virt_converged
+  logical                       :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation
+
+  n = mo_num*(mo_num-1)/2
+
+  ! Allocation
+  allocate(spatial_extent(mo_num))
+  allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Locality before the localization
+  call compute_spatial_extent(spatial_extent)
+
+  ! Choice of the method (with qp_edit)
+  print*,''
+  print*,'Localization method:',localization_method
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys localization'
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey localization'
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+  print*,''
+
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### Before the pre rotation'
+  
+  ! Debug
+  if (debug_hf) then
+    print*,'HF energy:', HF_energy
+  endif
+  
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+     ! Allocation tmp array  
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    if (tmp_list_size >= 2) then
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, mo_class(tmp_list(1))
+    endif
+
+    deallocate(tmp_list)    
+
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (n_act_orb == mo_num .or. &
+      n_core_orb + n_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to localize all the orbitals:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+  
+  endif
+
+! Loc
+
+  ! Pre rotation, to give a little kick in the MOs
+  call apply_pre_rotation()
+
+  ! Criterion after the pre rotation
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### After the pre rotation'
+  
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+  
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+    
+    if (tmp_list_size >= 2) then
+      ! Allocation tmp array  
+      allocate(tmp_list(tmp_list_size))
+
+      ! To give the list of MOs in a mo_class
+      if (l==1) then ! core
+        tmp_list = list_core
+      elseif (l==2) then
+        tmp_list = list_act
+      elseif (l==3) then
+        tmp_list = list_inact
+      else
+        tmp_list = list_virt
+      endif
+
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, trim(mo_class(tmp_list(1)))
+      
+      deallocate(tmp_list)    
+    endif
+    
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*,''
+  print*,'========================'
+  print*,'  Orbital localization'
+  print*,'========================'
+  print*,'' 
+
+  !Initialization
+  not_converged = .TRUE.
+
+  ! To do the localization only if there is at least 2 MOs
+  if (dim_list_core_orb >= 2) then
+    not_core_converged = .TRUE.
+  else
+    not_core_converged = .FALSE. 
+  endif
+  
+  if (dim_list_act_orb >= 2) then
+    not_act_converged = .TRUE.
+  else
+    not_act_converged = .FALSE.
+  endif
+
+  if (dim_list_inact_orb >= 2) then
+    not_inact_converged = .TRUE.
+  else
+    not_inact_converged = .FALSE.
+  endif
+
+  if (dim_list_virt_orb >= 2) then
+    not_virt_converged = .TRUE.
+  else
+    not_virt_converged = .FALSE.
+  endif
+ 
+  ! Loop over the mo_classes
+  do l = 1, 4
+
+    if (l==1) then ! core
+      not_converged = not_core_converged
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      not_converged = not_act_converged
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      not_converged = not_inact_converged
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      not_converged = not_virt_converged
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    ! Next iteration if converged = true 
+    if (.not. not_converged) then
+      cycle
+    endif
+ 
+    ! Allocation tmp array  
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    ! Display
+    if (not_converged) then
+      print*,''
+      print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###'
+      print*,''
+    endif
+
+    ! Size for the 2D -> 1D transformation 
+    tmp_n = tmp_list_size * (tmp_list_size - 1)/2
+
+    ! Without hessian + trust region 
+    if (.not. localization_use_hessian) then
+       
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n))
+
+      ! Criterion
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Init
+      nb_iter = 0
+      delta = 1d0
+
+      !Loop
+      do while (not_converged)
+         
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+        
+        ! Angles of rotation
+        call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+        tmp_m_x = - tmp_m_x * delta
+
+        ! Rotation submatrix
+        call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+             info, enforce_step_cancellation)
+
+        ! To ensure that the rotation matrix is unitary
+        if (enforce_step_cancellation) then
+          print*, 'Step cancellation, too large error in the rotation matrix'
+          delta = delta * 0.5d0
+          cycle
+        else
+          delta = min(delta * 2d0, 1d0)
+        endif
+
+        ! Full rotation matrix and application of the rotation
+        call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+        call apply_mo_rotation(R, prev_mos)
+
+        ! Update the needed data
+        call update_data_localization()
+
+        ! New criterion
+        call criterion_localization(tmp_list_size, tmp_list, criterion)
+        print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+        print*,'Max elem :', max_elem
+        print*,'Delta    :', delta
+        
+        nb_iter = nb_iter + 1
+
+        ! Exit
+        if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then
+           not_converged = .False.
+        endif
+      enddo
+
+      ! Save the changes
+      call update_data_localization()
+      call save_mos()
+      TOUCH mo_coef
+
+      ! Deallocate
+      deallocate(v_grad, tmp_m_x, tmp_list)
+      deallocate(tmp_R, tmp_x)
+
+    ! Trust region
+    else
+    
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size))
+      allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n))
+
+      ! ### Initialization ###
+      delta = 0d0 ! can be deleted (normally)
+      nb_iter = 0 ! Must start at 0 !!!
+      rho = 0.5d0 ! Must be 0.5
+
+      ! Compute the criterion before the loop
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Loop until the convergence
+      do while (not_converged)
+
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+  
+        ! Gradient
+        call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+        ! Diagonal hessian
+        call hessian_localization(tmp_n, tmp_list_size, tmp_list, H)
+        
+        ! Diagonalization of the diagonal hessian by hands
+        !call diagonalization_hessian(tmp_n,H,e_val,w)
+        do i = 1, tmp_n
+          e_val(i) = H(i,i)
+        enddo
+
+        ! Key list for dsort
+        do i = 1, tmp_n 
+          key(i) = i
+        enddo
+
+        ! Sort of the eigenvalues
+        call dsort(e_val, key, tmp_n)
+
+        ! Eigenvectors
+        W = 0d0
+        do i = 1, tmp_n
+          j = key(i)
+          W(j,i) = 1d0
+        enddo
+
+        ! To enter in the loop just after
+        cancel_step = .True.
+        nb_sub_iter = 0 
+
+        ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho
+        do while (cancel_step)
+          print*,'-----------------------------'
+          print*, mo_class(tmp_list(1))
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, &
+               rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          ! Internal loop exit condition
+          if (must_exit) then
+            print*,'trust_region_step_w_expected_e sent: Exit'
+            exit 
+          endif
+
+          ! 1D tmp -> 2D tmp 
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+               info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+        
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)   
+
+          ! Update the things related to mo_coef
+          call update_data_localization()
+
+          ! Update the criterion
+          call criterion_localization(tmp_list_size, tmp_list, criterion)
+          print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, &
+               criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step, previous MOs
+          if (cancel_step) then
+            mo_coef = prev_mos
+          endif
+
+          nb_sub_iter = nb_sub_iter + 1
+        enddo
+        !call save_mos() !### depend of the time for 1 iteration
+
+        ! To exit the external loop if must_exti = .True.
+        if (must_exit) then
+          exit
+        endif 
+
+        ! Step accepted, nb iteration + 1
+        nb_iter = nb_iter + 1
+
+        ! External loop exit conditions
+        if (DABS(max_elem) < thresh_loc_max_elem_grad) then
+          not_converged = .False.
+        endif
+        if (nb_iter > localization_max_nb_iter) then
+          not_converged = .False.
+        endif
+      enddo
+
+      ! Deallocation of temporary arrays
+      deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key)
+      
+      ! Save the MOs
+      call save_mos()
+      TOUCH mo_coef
+ 
+      ! Debug
+      if (debug_hf) then
+        touch mo_coef
+        print*,'HF energy:', HF_energy
+      endif
+      
+    endif
+  enddo
+
+
+  TOUCH mo_coef 
+
+  ! To sort the MOs using the diagonal elements of the Fock matrix
+  if (sort_mos_by_e) then
+    call run_sort_by_fock_energies()
+  endif
+
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+
+  ! Locality after the localization
+  call compute_spatial_extent(spatial_extent)
+
+end
diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org
new file mode 100644
index 00000000..ad42ef74
--- /dev/null
+++ b/src/mo_localization/localization.org
@@ -0,0 +1,2860 @@
+* Orbital localization
+
+Molecular orbitals localization
+
+** Doc
+
+The program localizes the orbitals in function of their mo_class:
+- core MOs
+- inactive MOs
+- active MOs
+- virtual MOs
+- deleted MOs -> no orbital localization
+
+Core MOs are localized with core MOs, inactives MOs are localized with
+inactives MOs and so on. But deleted orbitals are not localized.
+
+WARNING:
+- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class
+  is false the localization will be done for all the orbitals between
+  them, so the occupied and virtual MOs will be combined together
+  which is clearly not what we want to do. If default lpmo class is true
+  the localization will be done for the core, occupied and virtual
+  orbitals, but pay attention the mo_class are not deleted after...
+- The mo class is not important (except "deleted") because it is not
+  link to the kind of MOs for CASSCF or CIPSI. It is just a way to
+  separate the MOs in order to localize them separetely, for example
+  to separate the core MOs, the occupied MOs and the virtuals MOs.
+- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order
+  to have the right mo class for his next calculation...
+
+For more information on the mo_class:
+qp set_mo_class -h
+
+*** Foster-Boys localization
+Boys, S. F., 1960, Rev. Mod. Phys. 32, 296.
+DOI:https://doi.org/10.1103/RevModPhys.32.300
+Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules,
+and the Solid State, edited by P.-O. Löwdin (Academic
+Press, New York), p. 253.
+Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William
+N. Lipscomb, J. Chem. Phys. 61, 3905 (1974)
+doi: 10.1063/1.1681683
+Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34,
+1456– 1462. DOI: 10.1002/jcc.23281
+Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory
+Comput. 2012, 8, 9, 3137–3146
+DOI: https://doi.org/10.1021/ct300473g
+Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114
+(2012)
+DOI: https://doi.org/10.1063/1.4769866
+Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt
+Rev. Mod. Phys. 84, 1419
+https://doi.org/10.1103/RevModPhys.84.1419
+
+The Foster-Boys localization is a method to generate localized MOs
+(LMOs) by minimizing the Foster-Boys criterion:
+$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r |
+\phi_i >^2 \right] $$.
+In fact it is equivalent to maximise
+$$ C_2 = \sum_{i>j, \ i=1}^N \left[  < \phi_i | r | \phi_i > -  <
+\phi_j | r | \phi_j > \left]^2$$
+or
+$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$
+
+Locality of the orbitals:
+\begin{align*}
+\sigma_i &= \sqrt{ <i|r^2|i> - <i|r|i>^2} \\
+&= \sqrt{ <i|x^2|i> - <i|x|i>^2 + <i|y^2|i> - <i|y|i>^2 + <i|z^2|i> - <i|z|i>^2}
+\end{align*}
+
+
+*** Pipek-Mezey localization
+J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989)
+DOI: 10.1063/1.456588
+
+Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the
+MOs, it leads to "banana" orbitals. The Pipek-Mezey localization
+normally preserves this separation.
+
+** Localization procedure
+
+Localization procedure:
+
+To do the localization we compute the gradient and the
+diagonal hessian of the Foster-Boys criterion with respect to the MO
+rotations and we minimize it with the Newton method.
+
+In order to avoid the problem of starting on a saddle point, the
+localization procedure starts by giving a little kick in the MOs, by
+putting "kick in mos" true, in order to break the symmetry and escape
+from a possible saddle point.
+
+In order to speed up the iteration we compute the gradient, the
+diagonal hessian and the step in temporary matrices of the size
+(number MOs in mo class by number MOs in mo class)
+
+** Remarks
+
+Variables:
+
+The indexes i and j refere to the positions of the elements in
+the "full space", i.e., the arrays containing elements for all the MOs,
+but the indexes tmp_i and tmp_j to the positions of the elements in
+the "reduced space/subspace", i.e., the arrays containing elements for
+a restricted number of MOs.
+Example:
+The gradient for the localization of the core MOs can be expressed
+as a vector of length mo_num*(mo_num-1)/2 with only
+n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant
+to use a vector of size n_act_orb*(n_core_orb-1)/2.
+So here the gradient is a vector of size
+tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of
+MOs is the corresponding mo class.
+The same thing happened for the hessian, the matrix containing the
+step and the rotation matrix, which are tmp_list_size by tmp_list_size
+matrices.
+
+Ex gradient for 4 core orbitales:
+\begin{align*}
+\begin{pmatrix}
+0 & -a & -b & -d & \hdots & 0 \\
+a &  0 & -c & -e & \hdots & 0 \\
+b &  c &  0 & -f & \hdots & 0 \\
+d &  e &  f &  0 & \hdots & 0 \\
+\vdots &  \vdots &  \vdots & \vdots  & \ddots & \vdots \\
+0 &  0 &  0 &  0 & \hdots & 0 \\
+\end{pmatrix}
+\Rightarrow
+\begin{pmatrix}
+a \\
+b \\
+c \\
+e \\
+f \\
+0 \\
+\vdots \\
+0 \\
+\end{pmatrix}
+\end{align*}
+
+\begin{align*}
+\begin{pmatrix}
+0 & -a & -b & -d & \hdots & 0 \\
+a &  0 & -c & -e & \hdots & 0 \\
+b &  c &  0 & -f & \hdots & 0 \\
+d &  e &  f &  0 & \hdots & 0 \\
+\vdots &  \vdots &  \vdots & \vdots  & \ddots & \vdots \\
+0 &  0 &  0 &  0 & \hdots & 0 \\
+\end{pmatrix}
+\Rightarrow
+\begin{pmatrix}
+0 & -a & -b & -d \\
+a &  0 & -c & -e \\
+b &  c &  0 & -f \\
+d &  e &  f &  0 \\
+\end{pmatrix}
+\Rightarrow
+\begin{pmatrix}
+a \\
+b \\
+c \\
+e \\
+f \\
+\end{pmatrix}
+\end{align*}
+
+The same thing can be done if indexes of the orbitales are not
+consecutives since it's done with lists of MOs:
+
+\begin{align*}
+\begin{pmatrix}
+0 & -a &  0 & -b & -d & \hdots & 0 \\
+a &  0 &  0 & -c & -e & \hdots & 0 \\
+0 &  0 &  0 &  0 &  0 & \hdots & 0 \\
+b &  c &  0 &  0 & -f & \hdots & 0 \\
+d &  e &  0 &  f &  0 & \hdots & 0 \\
+\vdots &  \vdots & \vdots & \vdots & \vdots  & \ddots & \vdots \\
+0 &  0 &  0 &  0 &  0 & \hdots & 0 \\
+\end{pmatrix}
+\Rightarrow
+\begin{pmatrix}
+0 & -a & -b & -d \\
+a &  0 & -c & -e \\
+b &  c &  0 & -f \\
+d &  e &  f &  0 \\
+\end{pmatrix}
+\Rightarrow
+\begin{pmatrix}
+a \\
+b \\
+c \\
+e \\
+f \\
+\end{pmatrix}
+\end{align*}
+
+The dipoles are updated using the "ao to mo" subroutine without the
+"restore symmetry" which is actually in N^4 but can be rewrite in N^2
+log(N^2).
+The bottleneck of the program is normally N^3 with the matrix
+multiplications/diagonalizations. The use of the full hessian can be
+an improvement but it will scale in N^4...
+
+** Program
+
+#+BEGIN_SRC f90 org :tangle localization.irp.f
+program localization
+ call run_localization
+end
+#+END_SRC
+
+
+Variables:
+| pre_rot(mo_num, mo_num)   | double precision | Matrix for the pre rotation                       |
+| R(mo_num,mo_num)          | double precision | Rotation matrix                                   |
+| tmp_R(:,:)                | double precision | Rottation matrix in a subsapce                    |
+| prev_mos(ao_num, mo_num)  | double precision | Previous mo_coef                                  |
+| spatial_extent(mo_num)    | double precision | Spatial extent of the orbitals                    |
+| criterion                 | double precision | Localization criterion                            |
+| prev_criterion            | double precision | Previous criterion                                |
+| criterion_model           | double precision | Estimated next criterion                          |
+| rho                       | double precision | Ratio to measure the agreement between the model  |
+|                           |                  | and the reality                                   |
+| delta                     | double precision | Radisu of the trust region                        |
+| norm_grad                 | double precision | Norm of the gradient                              |
+| info                      | integer          | for dsyev from Lapack                             |
+| max_elem                  | double precision | maximal element in the gradient                   |
+| v_grad(:)                 | double precision | Gradient                                          |
+| H(:,:)                    | double precision | Hessian (diagonal)                                |
+| e_val(:)                  | double precision | Eigenvalues of the hessian                        |
+| W(:,:)                    | double precision | Eigenvectors of the hessian                       |
+| tmp_x(:)                  | double precision | Step in 1D (in a subaspace)                       |
+| tmp_m_x(:,:)              | double precision | Step in 2D (in a subaspace)                       |
+| tmp_list(:)               | integer          | List of MOs in a mo_class                         |
+| i,j,k                     | integer          | Indexes in the full MO space                      |
+| tmp_i, tmp_j, tmp_k       | integer          | Indexes in a subspace                             |
+| l                         | integer          | Index for the mo_class                            |
+| key(:)                    | integer          | Key to sort the eigenvalues of the hessian        |
+| nb_iter                   | integer          | Number of iterations                              |
+| must_exit                 | logical          | To exit the trust region loop                     |
+| cancel_step               | logical          | To cancel a step                                  |
+| not_*converged            | logical          | To localize the different mo classes              |
+| t*                        | double precision | To measure the time                               |
+| n                         | integer          | mo_num*(mo_num-1)/2, number of orbital parameters |
+| tmp_n                     | integer          | dim_subspace*(dim_subspace-1)/2                   |
+|                           |                  | Number of dimension in the subspace               |
+
+Variables in qp_edit for the localization:
+| localization_method      |
+| localization_max_nb_iter |
+| default_mo_class         |
+| thresh_loc_max_elem_grad |
+| kick_in_mos              |
+| angle_pre_rot            |
+
++ all the variables for the trust region
+
+Cf. qp_edit orbital optimization
+
+#+BEGIN_SRC f90 :comments org :tangle localization.irp.f
+subroutine run_localization
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Orbital localization
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  double precision, allocatable :: pre_rot(:,:), R(:,:)
+  double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:)
+  double precision              :: criterion, norm_grad
+  integer                       :: i,j,k,l,p, tmp_i, tmp_j, tmp_k
+  integer                       :: info
+  integer                       :: n, tmp_n, tmp_list_size
+  double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:)
+  double precision              :: max_elem, t1, t2, t3, t4, t5, t6
+  integer, allocatable          :: tmp_list(:), key(:)
+  double precision              :: prev_criterion, rho, delta, criterion_model
+  integer                       :: nb_iter, nb_sub_iter
+  logical                       :: not_converged, not_core_converged
+  logical                       :: not_act_converged, not_inact_converged, not_virt_converged
+  logical                       :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation
+
+  n = mo_num*(mo_num-1)/2
+
+  ! Allocation
+  allocate(spatial_extent(mo_num))
+  allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Locality before the localization
+  call compute_spatial_extent(spatial_extent)
+
+  ! Choice of the method (with qp_edit)
+  print*,''
+  print*,'Localization method:',localization_method
+  if (localization_method == 'boys') then
+    print*,'Foster-Boys localization'
+  elseif (localization_method == 'pipek') then
+    print*,'Pipek-Mezey localization'
+  else
+    print*,'Unknown localization_method, please select boys or pipek'
+    call abort
+  endif
+  print*,''
+
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### Before the pre rotation'
+
+  ! Debug
+  if (debug_hf) then
+    print*,'HF energy:', HF_energy
+  endif
+
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+     ! Allocation tmp array
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    if (tmp_list_size >= 2) then
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, mo_class(tmp_list(1))
+    endif
+
+    deallocate(tmp_list)
+
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*, 'Security mo_class:', security_mo_class
+
+  ! The default mo_classes are setted only if the MOs to localize are not specified
+  if (security_mo_class .and. (n_act_orb == mo_num .or. &
+      n_core_orb + n_act_orb == mo_num)) then
+
+    print*, 'WARNING'
+    print*, 'You must set different mo_class with qp set_mo_class'
+    print*, 'If you want to localize all the orbitals:'
+    print*, 'qp set Orbital_optimization security_mo_class false'
+    print*, ''
+    print*, 'abort'
+
+    call abort
+
+  endif
+#+END_SRC
+
+** Loc
+#+BEGIN_SRC f90 :comments org :tangle localization.irp.f
+  ! Pre rotation, to give a little kick in the MOs
+  call apply_pre_rotation()
+
+  ! Criterion after the pre rotation
+  ! Localization criterion (FB, PM, ...) for each mo_class
+  print*,'### After the pre rotation'
+
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    if (tmp_list_size >= 2) then
+      ! Allocation tmp array
+      allocate(tmp_list(tmp_list_size))
+
+      ! To give the list of MOs in a mo_class
+      if (l==1) then ! core
+        tmp_list = list_core
+      elseif (l==2) then
+        tmp_list = list_act
+      elseif (l==3) then
+        tmp_list = list_inact
+      else
+        tmp_list = list_virt
+      endif
+
+      call criterion_localization(tmp_list_size, tmp_list,criterion)
+      print*,'Criterion:', criterion, trim(mo_class(tmp_list(1)))
+
+      deallocate(tmp_list)
+    endif
+
+  enddo
+
+  ! Debug
+  !print*,'HF', HF_energy
+
+  print*,''
+  print*,'========================'
+  print*,'  Orbital localization'
+  print*,'========================'
+  print*,''
+
+  !Initialization
+  not_converged = .TRUE.
+
+  ! To do the localization only if there is at least 2 MOs
+  if (dim_list_core_orb >= 2) then
+    not_core_converged = .TRUE.
+  else
+    not_core_converged = .FALSE.
+  endif
+
+  if (dim_list_act_orb >= 2) then
+    not_act_converged = .TRUE.
+  else
+    not_act_converged = .FALSE.
+  endif
+
+  if (dim_list_inact_orb >= 2) then
+    not_inact_converged = .TRUE.
+  else
+    not_inact_converged = .FALSE.
+  endif
+
+  if (dim_list_virt_orb >= 2) then
+    not_virt_converged = .TRUE.
+  else
+    not_virt_converged = .FALSE.
+  endif
+
+  ! Loop over the mo_classes
+  do l = 1, 4
+
+    if (l==1) then ! core
+      not_converged = not_core_converged
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      not_converged = not_act_converged
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      not_converged = not_inact_converged
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      not_converged = not_virt_converged
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    ! Next iteration if converged = true
+    if (.not. not_converged) then
+      cycle
+    endif
+
+    ! Allocation tmp array
+    allocate(tmp_list(tmp_list_size))
+
+    ! To give the list of MOs in a mo_class
+    if (l==1) then ! core
+      tmp_list = list_core
+    elseif (l==2) then
+      tmp_list = list_act
+    elseif (l==3) then
+      tmp_list = list_inact
+    else
+      tmp_list = list_virt
+    endif
+
+    ! Display
+    if (not_converged) then
+      print*,''
+      print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###'
+      print*,''
+    endif
+
+    ! Size for the 2D -> 1D transformation
+    tmp_n = tmp_list_size * (tmp_list_size - 1)/2
+
+    ! Without hessian + trust region
+    if (.not. localization_use_hessian) then
+
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n))
+
+      ! Criterion
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Init
+      nb_iter = 0
+      delta = 1d0
+
+      !Loop
+      do while (not_converged)
+
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+
+        ! Angles of rotation
+        call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+        tmp_m_x = - tmp_m_x * delta
+
+        ! Rotation submatrix
+        call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+             info, enforce_step_cancellation)
+
+        ! To ensure that the rotation matrix is unitary
+        if (enforce_step_cancellation) then
+          print*, 'Step cancellation, too large error in the rotation matrix'
+          delta = delta * 0.5d0
+          cycle
+        else
+          delta = min(delta * 2d0, 1d0)
+        endif
+
+        ! Full rotation matrix and application of the rotation
+        call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+        call apply_mo_rotation(R, prev_mos)
+
+        ! Update the needed data
+        call update_data_localization()
+
+        ! New criterion
+        call criterion_localization(tmp_list_size, tmp_list, criterion)
+        print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+        print*,'Max elem :', max_elem
+        print*,'Delta    :', delta
+
+        nb_iter = nb_iter + 1
+
+        ! Exit
+        if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then
+           not_converged = .False.
+        endif
+      enddo
+
+      ! Save the changes
+      call update_data_localization()
+      call save_mos()
+      TOUCH mo_coef
+
+      ! Deallocate
+      deallocate(v_grad, tmp_m_x, tmp_list)
+      deallocate(tmp_R, tmp_x)
+
+    ! Trust region
+    else
+
+      ! Allocation of temporary arrays
+      allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size))
+      allocate(tmp_R(tmp_list_size, tmp_list_size))
+      allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n))
+
+      ! ### Initialization ###
+      delta = 0d0 ! can be deleted (normally)
+      nb_iter = 0 ! Must start at 0 !!!
+      rho = 0.5d0 ! Must be 0.5
+
+      ! Compute the criterion before the loop
+      call criterion_localization(tmp_list_size, tmp_list, prev_criterion)
+
+      ! Loop until the convergence
+      do while (not_converged)
+
+        print*,''
+        print*,'***********************'
+        print*,'Iteration', nb_iter
+        print*,'***********************'
+        print*,''
+
+        ! Gradient
+        call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+        ! Diagonal hessian
+        call hessian_localization(tmp_n, tmp_list_size, tmp_list, H)
+
+        ! Diagonalization of the diagonal hessian by hands
+        !call diagonalization_hessian(tmp_n,H,e_val,w)
+        do i = 1, tmp_n
+          e_val(i) = H(i,i)
+        enddo
+
+        ! Key list for dsort
+        do i = 1, tmp_n
+          key(i) = i
+        enddo
+
+        ! Sort of the eigenvalues
+        call dsort(e_val, key, tmp_n)
+
+        ! Eigenvectors
+        W = 0d0
+        do i = 1, tmp_n
+          j = key(i)
+          W(j,i) = 1d0
+        enddo
+
+        ! To enter in the loop just after
+        cancel_step = .True.
+        nb_sub_iter = 0
+
+        ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho
+        do while (cancel_step)
+          print*,'-----------------------------'
+          print*, mo_class(tmp_list(1))
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x
+          call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, &
+               rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          ! Internal loop exit condition
+          if (must_exit) then
+            print*,'trust_region_step_w_expected_e sent: Exit'
+            exit
+          endif
+
+          ! 1D tmp -> 2D tmp
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, &
+               info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)
+
+          ! Update the things related to mo_coef
+          call update_data_localization()
+
+          ! Update the criterion
+          call criterion_localization(tmp_list_size, tmp_list, criterion)
+          print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion
+
+          ! Criterion -> step accepted or rejected
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, &
+               criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step, previous MOs
+          if (cancel_step) then
+            mo_coef = prev_mos
+          endif
+
+          nb_sub_iter = nb_sub_iter + 1
+        enddo
+        !call save_mos() !### depend of the time for 1 iteration
+
+        ! To exit the external loop if must_exti = .True.
+        if (must_exit) then
+          exit
+        endif
+
+        ! Step accepted, nb iteration + 1
+        nb_iter = nb_iter + 1
+
+        ! External loop exit conditions
+        if (DABS(max_elem) < thresh_loc_max_elem_grad) then
+          not_converged = .False.
+        endif
+        if (nb_iter > localization_max_nb_iter) then
+          not_converged = .False.
+        endif
+      enddo
+
+      ! Deallocation of temporary arrays
+      deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key)
+
+      ! Save the MOs
+      call save_mos()
+      TOUCH mo_coef
+
+      ! Debug
+      if (debug_hf) then
+        touch mo_coef
+        print*,'HF energy:', HF_energy
+      endif
+
+    endif
+  enddo
+
+
+  TOUCH mo_coef
+
+  ! To sort the MOs using the diagonal elements of the Fock matrix
+  if (sort_mos_by_e) then
+    call run_sort_by_fock_energies()
+  endif
+
+  ! Debug
+  if (debug_hf) then
+    touch mo_coef
+    print*,'HF energy:', HF_energy
+  endif
+
+  ! Locality after the localization
+  call compute_spatial_extent(spatial_extent)
+
+end
+#+END_SRC
+
+** Gathering
+Gradient/hessian/criterion for the localization:
+They are chosen in function of the localization method
+
+Gradient:
+
+qp_edit :
+| localization_method | method for the localization |
+
+Input:
+| tmp_n                   | integer          | Number of parameters in the MO subspace           |
+| tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+| tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+Output:
+| v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+| max_elem                | double precision | Maximal element in the gradient                   |
+| norm_grad               | double precision | Norm of the gradient                              |
+
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+
+  if (localization_method == 'boys') then
+    call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+    !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+  elseif (localization_method== 'pipek') then
+    call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+  else
+    print*,'Unkown method:'//localization_method
+    call abort
+  endif
+
+end
+#+END_SRC
+
+Hessian:
+
+Output:
+| H(tmp_n,tmp_n) | double precision | Gradient in the subspace        |
+| max_elem       | double precision | Maximal element in the gradient |
+| norm_grad      | double precision | Norm of the gradient            |
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+
+  if (localization_method == 'boys') then
+    call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H)
+    !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging
+  elseif (localization_method == 'pipek') then
+    call hessian_PM(tmp_n, tmp_list_size, tmp_list, H)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+#+END_SRC
+
+Criterion:
+
+Output:
+| criterion | double precision | Criterion for the orbital localization |
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine criterion_localization(tmp_list_size, tmp_list,criterion)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the localization criterion of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+
+  if (localization_method == 'boys') then
+    call criterion_FB(tmp_list_size, tmp_list, criterion)
+  elseif (localization_method == 'pipek') then
+    !call criterion_PM(tmp_list_size, tmp_list,criterion)
+    call criterion_PM_v3(tmp_list_size, tmp_list, criterion)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+#+END_SRC
+
+Subroutine to update the datas needed for the localization
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine update_data_localization()
+
+  include 'pi.h'
+
+  implicit none
+
+  if (localization_method == 'boys') then
+    ! Update the dipoles
+    call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num)
+    call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num)
+    call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num)
+  elseif (localization_method == 'pipek') then
+    ! Nothing required
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+end
+#+END_SRC
+
+Angles:
+
+Output:
+| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace |
+| max_elem                              | double precision | Maximal angle                            |
+
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the rotation angles between the MOs for the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem
+
+  if (localization_method == 'boys') then
+    call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+  elseif (localization_method== 'pipek') then
+    call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+#+END_SRC
+
+** Foster-Boys
+*** Gradient
+Input:
+| tmp_n                   | integer          | Number of parameters in the MO subspace           |
+| tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+| tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+Output:
+| v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+| max_elem                | double precision | Maximal element in the gradient                   |
+| norm_grad               | double precision | Norm of the gradient                              |
+
+Internal:
+| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form |
+| i,j,k               | integer          | indexes in the full space   |
+| tmp_i,tmp_j,tmp_k   | integer          | indexes in the subspace     |
+| t*                  | double precision | to compute the time         |
+
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k
+  double precision              :: t1, t2, t3
+
+  print*,''
+  print*,'---gradient_FB---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size))
+
+  ! Calculation
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                           +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                           +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_FB:', t3
+
+  print*,''
+  print*,'---End gradient_FB---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+*** Gradient (OMP)
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  use omp_lib
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k
+  double precision              :: t1, t2, t3
+
+  print*,''
+  print*,'---gradient_FB_omp---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size))
+
+  ! Initialization omp
+  call omp_set_max_active_levels(1)
+
+  !$OMP PARALLEL                                                             &
+      !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k)                                         &
+      !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) &
+      !$OMP DEFAULT(NONE)
+
+  ! Calculation
+  !$OMP DO
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                           +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                           +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! 2D -> 1D
+  !$OMP DO
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_FB_omp:', t3
+
+  print*,''
+  print*,'---End gradient_FB_omp---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+*** Hessian
+
+Output:
+| H(tmp_n,tmp_n) | double precision | Gradient in the subspace        |
+| max_elem       | double precision | Maximal element in the gradient |
+| norm_grad      | double precision | Norm of the gradient            |
+
+Internal:
+Internal:
+| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian |
+| i,j,k             | integer          | indexes in the full space                             |
+| tmp_i,tmp_j,tmp_k | integer          | indexes in the subspace                               |
+| t*                | double precision | to compute the time                                   |
+
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_FB---'
+  print*,''
+
+  call wall_time(t1)
+
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size))
+
+  ! Calculation
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 &
+                         +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 &
+                         +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2
+    enddo
+  enddo
+
+  ! Diagonal of the hessian
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+  ! Min elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  ! Max elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  ! Near 0
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_FB:', t3
+
+  print*,''
+  print*,'---End hessian_FB---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+*** Hessian (OMP)
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:)
+  integer                       :: i,j,tmp_k,tmp_i,tmp_j
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_FB_omp---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size))
+
+  ! Initialization omp
+  call omp_set_max_active_levels(1)
+
+  !$OMP PARALLEL                                                             &
+      !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k)                                         &
+      !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) &
+      !$OMP DEFAULT(NONE)
+
+
+  ! Calculation
+  !$OMP DO
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 &
+                         +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 &
+                         +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! Initialization
+  !$OMP DO
+  do j = 1, tmp_n
+    do i = 1, tmp_n
+      H(i,j) = 0d0
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! Diagonalm of the hessian
+  !$OMP DO
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  ! Min elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  ! Max elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  ! Near 0
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_FB_omp:', t3
+
+  print*,''
+  print*,'---End hessian_FB_omp---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+** Pipek-Mezey
+*** Gradient v1
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute gradient for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:), tmp_int(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size))
+
+  ! Initialization
+  m_grad = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+     tmp_int = 0d0 ! Initialization for each nuclei
+
+     ! Loop over the MOs of the a given mo_class to compute <i|P_a|j>
+     do tmp_j = 1, tmp_list_size
+        j = tmp_list(tmp_j)
+        do tmp_i = 1, tmp_list_size
+           i = tmp_list(tmp_i)
+           do rho = 1, ao_num ! loop over all the AOs
+              do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+                 mu = nucl_aos(a,b) ! AO centered on atom a
+
+                 tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                      + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+              enddo
+           enddo
+        enddo
+     enddo
+
+     ! Gradient
+     do tmp_j = 1, tmp_list_size
+        do tmp_i = 1, tmp_list_size
+
+           m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) +  4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))
+
+        enddo
+     enddo
+
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+     call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+     v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+     if (ABS(v_grad(tmp_k)) > max_elem) then
+        max_elem = ABS(v_grad(tmp_k))
+     endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+     norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad,tmp_int)
+
+end subroutine grad_pipek
+#+END_SRC
+
+*** Gradient
+
+The gradient is
+
+\begin{align*}
+\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM}
+\end{align*}
+with
+\begin{align*}
+\gamma_{st}^{PM} = \sum_{A=1}^N <s|P_A|t> \left[ <s| P_A |s> - <t|P_A|t> \right]
+\end{align*}
+
+\begin{align*}
+<s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+\end{align*}
+$\sum_{\rho}$ -> sum over all the AOs
+$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A
+$c^t$ -> expansion coefficient of orbital |t>
+
+Input:
+| tmp_n                   | integer          | Number of parameters in the MO subspace           |
+| tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+| tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+Output:
+| v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+| max_elem                | double precision | Maximal element in the gradient                   |
+| norm_grad               | double precision | Norm of the gradient                              |
+
+Internal:
+| m_grad(tmp_list_size,tmp_list_size)       | double precision | Gradient in a 2D array                                   |
+| tmp_int(tmp_list_size,tmp_list_size)      |                  | Temporary array to store the integrals                   |
+| tmp_accu(tmp_list_size,tmp_list_size)     |                  | Temporary array to store a matrix                        |
+|                                           |                  | product and compute tmp_int                              |
+| CS(tmp_list_size,ao_num)                  |                  | Array to store the result of mo_coef * ao_overlap        |
+| tmp_mo_coef(ao_num,tmp_list_size)         |                  | Array to store just the useful MO coefficients           |
+|                                           |                  | depending of the mo_class                                |
+| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) |                  | Array to store just the useful MO coefficients           |
+|                                           |                  | depending of the nuclei                                  |
+| tmp_CS(tmp_list_size,nucl_n_aos(a))       |                  | Array to store just the useful mo_coef * ao_overlap      |
+|                                           |                  | values depending of the nuclei                           |
+| a                                         |                  | index to loop over the nuclei                            |
+| b                                         |                  | index to loop over the AOs which belongs to the nuclei a |
+| mu                                        |                  | index to refer to an AO which belongs to the nuclei a    |
+| rho                                       |                  | index to loop over all the AOs                           |
+
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute gradient for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---gradient_PM---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+
+  ! submatrix of the mo_coef
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  m_grad = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+        tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) +  4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))
+
+      enddo
+    enddo
+
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad,tmp_int,CS,tmp_mo_coef)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_PM:', t3
+
+  print*,''
+  print*,'---End gradient_PM---'
+  print*,''
+
+end
+#+END_SRC
+
+*** Hessian v1
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute diagonal hessian for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:),tmp_int(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu
+  double precision              :: max_elem
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size))
+
+  beta = 0d0
+
+  do a = 1, nucl_num
+    tmp_int = 0d0
+
+    do tmp_j = 1, tmp_list_size
+      j = tmp_list(tmp_j)
+      do tmp_i = 1, tmp_list_size
+        i = tmp_list(tmp_i)
+        do rho = 1, ao_num
+          do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+            mu = nucl_aos(a,b)
+
+            tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                   + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! Calculation
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) +  (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2
+
+      enddo
+    enddo
+
+  enddo
+
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+!  max_elem = 0d0
+!  do tmp_k = 1, tmp_n
+!    if (H(tmp_k,tmp_k) < max_elem) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Min elem H:', max_elem
+!
+!  max_elem = 0d0
+!  do tmp_k = 1, tmp_n
+!    if (H(tmp_k,tmp_k) > max_elem) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Max elem H:', max_elem
+!
+!  max_elem = 1d10
+!  do tmp_k = 1, tmp_n
+!    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta,tmp_int)
+
+end
+#+END_SRC
+
+*** Hessian
+
+The hessian is
+\begin{align*}
+\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM}
+\end{align*}
+\begin{align*}
+\beta_{st}^{PM} = \sum_{A=1}^N \left( <s|P_A|t>^2 - \frac{1}{4} \left[<s|P_A|s> - <t|P_A|t> \right]^2 \right)
+\end{align*}
+
+with
+\begin{align*}
+<s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+\end{align*}
+$\sum_{\rho}$ -> sum over all the AOs
+$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A
+$c^t$ -> expansion coefficient of orbital |t>
+
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute diagonal hessian for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_PM---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+  beta = 0d0
+
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+        tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    ! Calculation
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) +  (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2
+
+      enddo
+    enddo
+
+  enddo
+
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta,tmp_int)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_PM:', t3
+
+  print*,''
+  print*,'---End hessian_PM---'
+  print*,''
+
+end
+
+#+END_SRC
+
+** Criterion
+*** Criterion PM (old)
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine compute_crit_pipek(criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  ! Allocation
+  allocate(tmp_int(mo_num, mo_num))
+
+  criterion = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    do i = 1, mo_num
+      do rho = 1, ao_num ! loop over all the AOs
+        do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+          mu = nucl_aos(a,b)
+
+          tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) &
+                                 + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i))
+
+        enddo
+      enddo
+    enddo
+
+    do i = 1, mo_num
+      criterion = criterion + tmp_int(i,i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int)
+
+end
+#+END_SRC
+
+*** Criterion PM
+
+The criterion is computed as
+\begin{align*}
+\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ <i|P_A|i> \right]^2
+\end{align*}
+with
+\begin{align*}
+<s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+\end{align*}
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine criterion_PM(tmp_list_size,tmp_list,criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:),CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  print*,''
+  print*,'---criterion_PM---'
+
+  ! Allocation
+  allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num))
+
+  ! Initialization
+  criterion = 0d0
+
+  call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+      do tmp_i = 1, tmp_list_size
+        i = tmp_list(tmp_i)
+        do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+          mu = nucl_aos(a,b)
+
+          tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu))
+
+                                 !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                 !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+      enddo
+    enddo
+
+    do tmp_i = 1, tmp_list_size
+      criterion = criterion + tmp_int(tmp_i,tmp_i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int,CS)
+
+  print*,'---End criterion_PM---'
+  print*,''
+
+end
+#+END_SRC
+
+*** Criterion PM v3
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---criterion_PM_v3---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+  criterion = 0d0
+
+  ! submatrix of the mo_coef
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  ! ao_overlap(ao_num,ao_num)
+  ! mo_coef(ao_num,mo_num)
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+
+    do j = 1, tmp_list_size
+      do i = 1, tmp_list_size
+        tmp_int(i,j) = 0d0
+      enddo
+    enddo
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+         tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    ! Integrals
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    ! Criterion
+    do tmp_i = 1, tmp_list_size
+      criterion = criterion + tmp_int(tmp_i,tmp_i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in criterion_PM_v3:', t3
+
+  print*,'---End criterion_PM_v3---'
+  print*,''
+
+end
+#+END_SRC
+
+*** Criterion FB (old)
+
+The criterion is just computed as
+
+\begin{align*}
+C = - \sum_i^{mo_{num}} (<i|x|i>^2 + <i|y|i>^2 + <i|z|i>^2)
+\end{align*}
+
+The minus sign is here in order to minimize this criterion
+
+Output:
+| criterion | double precision | criterion for the Foster-Boys localization |
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine criterion_FB_old(criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Foster-Boys localization criterion
+  END_DOC
+
+  double precision, intent(out) :: criterion
+  integer                       :: i
+
+  ! Criterion (= \sum_i <i|r|i>^2 )
+  criterion = 0d0
+  do i = 1, mo_num
+    criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2
+  enddo
+  criterion = - criterion
+
+end subroutine
+#+END_SRC
+
+*** Criterion FB
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine criterion_FB(tmp_list_size, tmp_list, criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Foster-Boys localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  integer                       :: i, tmp_i
+
+  ! Criterion (= - \sum_i <i|r|i>^2 )
+  criterion = 0d0
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2
+  enddo
+  criterion = - criterion
+
+end subroutine
+#+END_SRC
+
+** Theta
+
+In:
+| n | integer | number of MOs in the considered MO class |
+| l | integer | list of MOs of the considered class      |
+
+Out:
+| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different |
+|          |                  | pairs of MOs to apply the rotations (need a minus sign)        |
+| max_elem | double precision | Maximal angle in absolute value                                |
+
+$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$
+$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$
+$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$
+where $\theta$ is in fact $\theta_{ij}$
+
+For Foster-Boys localization:
+$$A_{ij} = <i|r|j>^2 - \frac{1}{4} (<i|r|i> - <j|r|j>)^2$$
+$$B_{ij} = <i|r|j> (<i|r|i> - <j|r|j>)$$
+
+
+For Pipek-Mezey localization:
+$$A_{ij} = \sum_A <i|P_A|j>^2 - \frac{1}{4} (<i|P_A|i> - <j|P_A|j>)^2$$
+$$B_{ij} = \sum_A <i|P_A|j> (<i|P_A|i> - <j|P_A|j>)$$
+with
+$$<i|P_A|j> = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho
+\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$
+$i,j$ MOs
+$\mu, \rho$ AOs
+$A$ nucleus
+$S$ overlap matrix
+$c$ MO coefficient
+$r$ position operator
+
+#+begin_src f90 :tangle localization_sub.irp.f
+subroutine theta_FB(l, n, m_x, max_elem)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs
+  ! Warning: you must give - the angles to build the rotation matrix...
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)           :: n, l(n)
+  double precision, intent(out) :: m_x(n,n), max_elem
+
+  integer                       :: i,j, tmp_i, tmp_j
+  double precision, allocatable :: cos4theta(:,:), sin4theta(:,:)
+  double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:)
+  integer                       :: idx_i,idx_j
+
+  allocate(cos4theta(n, n), sin4theta(n, n))
+  allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n))
+
+  do tmp_j = 1, n
+    j = l(tmp_j)
+    do tmp_i = 1, n
+      i = l(tmp_i)
+      A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 &
+                     + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 &
+                     + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2
+    enddo
+    A(j,j) = 0d0
+  enddo
+
+  do tmp_j = 1, n
+    j = l(tmp_j)
+    do tmp_i = 1, n
+      i = l(tmp_i)
+      B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                     + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                     + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+
+  !do tmp_j = 1, n
+  !  j = l(tmp_j)
+  !  do tmp_i = 1, n
+  !    i = l(tmp_i)
+  !    beta(tmp_i,tmp_j) =  (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 &
+  !               + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 &
+  !               + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2
+  !  enddo
+  !enddo
+
+  !do tmp_j = 1, n
+  !  j = l(tmp_j)
+  !  do tmp_i = 1, n
+  !    i = l(tmp_i)
+  !    gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+  !                       + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+  !                       + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)))
+  !  enddo
+  !enddo
+
+  !
+  !do j = 1, n
+  !  do i = 1, n
+  !    cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2)
+  !  enddo
+  !enddo
+
+  !do j = 1, n
+  !  do i = 1, n
+  !    sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2)
+  !  enddo
+  !enddo
+
+  ! Theta
+  do j = 1, n
+    do i = 1, n
+      m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j))
+      !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j))
+    enddo
+  enddo
+
+  ! Enforce a perfect antisymmetry
+  do j = 1, n-1
+    do i = j+1, n
+      m_x(j,i) = - m_x(i,j)
+    enddo
+  enddo
+  do i = 1, n
+    m_x(i,i) = 0d0
+  enddo
+
+  ! Max
+  max_elem = 0d0
+  do j = 1, n-1
+    do i = j+1, n
+      if (dabs(m_x(i,j)) > dabs(max_elem)) then
+        max_elem = m_x(i,j)
+        !idx_i = i
+        !idx_j = j
+      endif
+    enddo
+  enddo
+
+  ! Debug
+  !print*,''
+  !print*,'sin/B'
+  !do i = 1, n
+  !  write(*,'(100F10.4)') sin4theta(i,:)
+  !  !B(i,:)
+  !enddo
+  !print*,'cos/A'
+  !do i = 1, n
+  !  write(*,'(100F10.4)') cos4theta(i,:)
+  !  !A(i,:)
+  !enddo
+  !print*,'X'
+  !!m_x = 0d0
+  !!m_x(idx_i,idx_j) = max_elem
+  !!m_x(idx_j,idx_i) = -max_elem
+  !do i = 1, n
+  !  write(*,'(100F10.4)') m_x(i,:)
+  !enddo
+  !print*,idx_i,idx_j,max_elem
+
+  max_elem = dabs(max_elem)
+
+  deallocate(cos4theta, sin4theta)
+  deallocate(A,B,beta,gamma)
+
+end
+#+end_src
+
+#+begin_src f90 :comments org :tangle localization_sub.irp.f
+subroutine theta_PM(l, n, m_x, max_elem)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs
+  ! Warning: you must give - the angles to build the rotation matrix...
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)           :: n, l(n)
+  double precision, intent(out) :: m_x(n,n), max_elem
+
+  integer                       :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j
+  double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:)
+
+  allocate(Aij(n,n), Bij(n,n), Pa(n,n))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    Pa = 0d0 ! Initialization for each nuclei
+
+    ! Loop over the MOs of the a given mo_class to compute <i|P_a|j>
+    do tmp_j = 1, n
+      j = l(tmp_j)
+      do tmp_i = 1, n
+         i = l(tmp_i)
+        do rho = 1, ao_num ! loop over all the AOs
+          do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+            mu = nucl_aos(a,b) ! AO centered on atom a
+
+            Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                   + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! A
+    do j = 1, n
+      do i = 1, n
+        Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2
+      enddo
+    enddo
+
+    ! B
+    do j = 1, n
+      do i = 1, n
+        Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j))
+      enddo
+    enddo
+
+  enddo
+
+  ! Theta
+  do j = 1, n
+    do i = 1, n
+      m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j))
+    enddo
+  enddo
+
+  ! Enforce a perfect antisymmetry
+  do j = 1, n-1
+    do i = j+1, n
+      m_x(j,i) = - m_x(i,j)
+    enddo
+  enddo
+  do i = 1, n
+    m_x(i,i) = 0d0
+  enddo
+
+  ! Max
+  max_elem = 0d0
+  do j = 1, n-1
+    do i = j+1, n
+      if (dabs(m_x(i,j)) > dabs(max_elem)) then
+        max_elem = m_x(i,j)
+        idx_i = i
+        idx_j = j
+      endif
+    enddo
+  enddo
+
+  ! Debug
+  !do i = 1, n
+  !  write(*,'(100F10.4)') m_x(i,:)
+  !enddo
+  !print*,'Max',idx_i,idx_j,max_elem
+
+  max_elem = dabs(max_elem)
+
+  deallocate(Aij,Bij,Pa)
+
+end
+#+end_src
+
+** Spatial extent
+
+The spatial extent of an orbital $i$ is computed as
+\begin{align*}
+\sum_{\lambda=x,y,z}\sqrt{<i|\lambda^2|i> - <i|\lambda|i>^2}
+\end{align*}
+
+From that we can also compute the average and the standard deviation
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine compute_spatial_extent(spatial_extent)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the spatial extent of the MOs
+  END_DOC
+
+  double precision, intent(out) :: spatial_extent(mo_num)
+  double precision              :: average_core, average_act, average_inact, average_virt
+  double precision              :: std_var_core, std_var_act, std_var_inact, std_var_virt
+  integer                       :: i,j,k,l
+
+  spatial_extent = 0d0
+
+  do i = 1, mo_num
+    spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2
+  enddo
+  do i = 1, mo_num
+    spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2
+  enddo
+  do i = 1, mo_num
+    spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2
+  enddo
+
+  do i = 1, mo_num
+    spatial_extent(i) = dsqrt(spatial_extent(i))
+  enddo
+
+  average_core = 0d0
+  std_var_core = 0d0
+  if (dim_list_core_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core)
+    call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core)
+  endif
+
+  average_act = 0d0
+  std_var_act = 0d0
+  if (dim_list_act_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act)
+    call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act)
+  endif
+
+  average_inact = 0d0
+  std_var_inact = 0d0
+  if (dim_list_inact_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact)
+    call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact)
+  endif
+
+  average_virt = 0d0
+  std_var_virt = 0d0
+  if (dim_list_virt_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt)
+    call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt)
+  endif
+
+  print*,''
+  print*,'============================='
+  print*,'  Spatial extent of the MOs'
+  print*,'============================='
+  print*,''
+
+  print*, 'elec_num:', elec_num
+  print*, 'elec_alpha_num:', elec_alpha_num
+  print*, 'elec_beta_num:', elec_beta_num
+  print*, 'core:', dim_list_core_orb
+  print*, 'act:', dim_list_act_orb
+  print*, 'inact:', dim_list_inact_orb
+  print*, 'virt:', dim_list_virt_orb
+  print*, 'mo_num:', mo_num
+  print*,''
+
+  print*,'-- Core MOs --'
+  print*,'Average:', average_core
+  print*,'Std var:', std_var_core
+  print*,''
+
+  print*,'-- Active MOs --'
+  print*,'Average:', average_act
+  print*,'Std var:', std_var_act
+  print*,''
+
+  print*,'-- Inactive MOs --'
+  print*,'Average:', average_inact
+  print*,'Std var:', std_var_inact
+  print*,''
+
+  print*,'-- Virtual MOs --'
+  print*,'Average:', average_virt
+  print*,'Std var:', std_var_virt
+  print*,''
+
+  print*,'Spatial extent:'
+  do i = 1, mo_num
+    print*, i, spatial_extent(i)
+  enddo
+
+end
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine compute_average_sp_ext(spatial_extent, list, list_size, average)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the average spatial extent of the MOs
+  END_DOC
+
+  integer, intent(in) :: list_size, list(list_size)
+  double precision, intent(in) :: spatial_extent(mo_num)
+  double precision, intent(out) :: average
+  integer :: i, tmp_i
+
+  average = 0d0
+  do tmp_i = 1, list_size
+    i = list(tmp_i)
+    average = average + spatial_extent(i)
+  enddo
+
+  average = average / DBLE(list_size)
+
+end
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the standard deviation of the spatial extent of the MOs
+  END_DOC
+
+  integer, intent(in)           :: list_size, list(list_size)
+  double precision, intent(in)  :: spatial_extent(mo_num)
+  double precision, intent(in)  :: average
+  double precision, intent(out) :: std_var
+  integer                       :: i, tmp_i
+
+  std_var = 0d0
+
+  do tmp_i = 1, list_size
+    i = list(tmp_i)
+    std_var = std_var + (spatial_extent(i) - average)**2
+  enddo
+
+  std_var = dsqrt(1d0/DBLE(list_size) * std_var)
+
+end
+#+END_SRC
+
+** Utils
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine apply_pre_rotation()
+
+  implicit none
+
+  BEGIN_DOC
+  ! Apply a rotation between the MOs
+  END_DOC
+
+  double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:)
+  double precision              :: t1,t2,t3
+  integer                       :: i,j,tmp_i,tmp_j
+  integer                       :: info
+  logical                       :: enforce_step_cancellation
+
+  print*,'---apply_pre_rotation---'
+  call wall_time(t1)
+
+  allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num))
+
+  ! Initialization of the matrix
+  pre_rot = 0d0
+
+  if (kick_in_mos) then
+    ! Pre rotation for core MOs
+    if (dim_list_core_orb >= 2) then
+      do tmp_j = 1, dim_list_core_orb
+        j = list_core(tmp_j)
+        do tmp_i = 1, dim_list_core_orb
+          i = list_core(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for active MOs
+    if (dim_list_act_orb >= 2) then
+      do tmp_j = 1, dim_list_act_orb
+        j = list_act(tmp_j)
+        do tmp_i = 1, dim_list_act_orb
+          i = list_act(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for inactive MOs
+    if (dim_list_inact_orb >= 2) then
+      do tmp_j = 1, dim_list_inact_orb
+        j = list_inact(tmp_j)
+        do tmp_i = 1, dim_list_inact_orb
+          i = list_inact(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for virtual MOs
+    if (dim_list_virt_orb >= 2) then
+      do tmp_j = 1, dim_list_virt_orb
+        j = list_virt(tmp_j)
+        do tmp_i = 1, dim_list_virt_orb
+          i = list_virt(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Nothing for deleted ones
+
+    ! Compute pre rotation matrix from pre_rot
+    call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation)
+
+    if (enforce_step_cancellation) then
+      print*, 'Cancellation of the pre rotation, too big error in the rotation matrix'
+      print*, 'Reduce the angle for the pre rotation, abort'
+      call abort
+    endif
+
+    ! New Mos (we don't car eabout the previous MOs prev_mos)
+    call apply_mo_rotation(R,prev_mos)
+
+    ! Update the things related to mo_coef
+    TOUCH mo_coef
+    call save_mos
+  endif
+
+  deallocate(pre_rot, prev_mos, R)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in apply_pre_rotation:', t3
+  print*,'---End apply_pre_rotation---'
+
+end
+#+END_SRC
+
+#+BEGIN_SRC  f90 :comments org :tangle localization_sub.irp.f
+subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x)
+
+  implicit none
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(in)  :: v_grad(tmp_n)
+  double precision, intent(in)  :: H(tmp_n, tmp_n)
+  double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size)
+  !double precision, allocatable :: x(:)
+  double precision              :: lambda , accu, max_elem
+  integer                       :: i,j,tmp_i,tmp_j,tmp_k
+
+  ! Allocation
+  !allocate(x(tmp_n))
+
+  ! Level shifted hessian
+  lambda = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < lambda) then
+      lambda = H(tmp_k,tmp_k)
+    endif
+  enddo
+
+  ! min element in the hessian
+  if (lambda < 0d0) then
+    lambda = -lambda + 1d-6
+  endif
+
+  print*, 'lambda', lambda
+
+  ! Good
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) > 1d-6) then
+       tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k))
+      !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k))
+    endif
+  enddo
+
+  ! 1D tmp -> 2D tmp
+  tmp_m_x = 0d0
+  do tmp_j = 1, tmp_list_size - 1
+    do tmp_i = tmp_j + 1, tmp_list_size
+      call mat_to_vec_index(tmp_i,tmp_j,tmp_k)
+      tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k)
+    enddo
+  enddo
+
+  ! Antisym
+  do tmp_i = 1, tmp_list_size - 1
+    do tmp_j = tmp_i + 1, tmp_list_size
+      tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i)
+    enddo
+  enddo
+
+  ! Deallocation
+  !deallocate(x)
+
+end subroutine
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the |MO| basis
+  !
+  ! $C^\dagger.A_{ao}.C$
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  double precision, intent(in)   :: A_ao(LDA_ao,ao_num)
+  double precision, intent(out)  :: A_mo(LDA_mo,mo_num)
+  double precision, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  call dgemm('N','N', ao_num, mo_num, ao_num,                    &
+      1.d0, A_ao,LDA_ao,                                             &
+      mo_coef, size(mo_coef,1),                                      &
+      0.d0, T, size(T,1))
+
+  call dgemm('T','N', mo_num, mo_num, ao_num,                &
+      1.d0, mo_coef,size(mo_coef,1),                                 &
+      T, ao_num,                                                     &
+      0.d0, A_mo, size(A_mo,1))
+
+  deallocate(T)
+end
+#+END_SRC
+
+#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f
+subroutine run_sort_by_fock_energies()
+
+  implicit none
+
+  BEGIN_DOC
+  ! Saves the current MOs ordered by diagonal element of the Fock operator.
+  END_DOC
+
+  integer                        :: i,j,k,l,tmp_i,tmp_k,tmp_list_size
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: fock_energies_tmp(:), tmp_mo_coef(:,:)
+  integer, allocatable           :: tmp_list(:)
+
+!  allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num))
+!
+!  do i = 1, mo_num
+!    fock_energies_tmp(i) = Fock_matrix_diag_mo(i)
+!    print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i)
+!    iorder(i) = i
+!  enddo
+!
+!  print*,''
+!  print*,'Sorting by Fock energies'
+!  print*,''
+!
+!  call dsort(fock_energies_tmp, iorder, mo_num)
+!
+!  do i = 1, mo_num
+!    k = iorder(i)
+!    print*,'fock_energies_new(i) = ',fock_energies_tmp(i)
+!    do j = 1, ao_num
+!      new_mo_coef(j,i) = mo_coef(j,k)
+!    enddo
+!  enddo
+
+  ! Test
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    if (tmp_list_size >= 2) then
+      ! Allocation tmp array
+      allocate(tmp_list(tmp_list_size))
+
+      ! To give the list of MOs in a mo_class
+      if (l==1) then ! core
+        tmp_list = list_core
+      elseif (l==2) then
+        tmp_list = list_act
+      elseif (l==3) then
+        tmp_list = list_inact
+      else
+        tmp_list = list_virt
+      endif
+      print*,'MO class: ',trim(mo_class(tmp_list(1)))
+
+      allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size))
+      !print*,'MOs before sorting them by f_p^p energies:'
+      do i = 1, tmp_list_size
+        tmp_i = tmp_list(i)
+        fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i)
+        iorder(i) = i
+        !print*, tmp_i, fock_energies_tmp(i)
+      enddo
+
+      call dsort(fock_energies_tmp, iorder, tmp_list_size)
+
+      print*,'MOs after sorting them by f_p^p energies:'
+      do i = 1, tmp_list_size
+        k = iorder(i)
+        tmp_k = tmp_list(k)
+        print*, tmp_k, fock_energies_tmp(k)
+        do j = 1, ao_num
+          tmp_mo_coef(j,k) = mo_coef(j,tmp_k)
+        enddo
+      enddo
+
+      ! Update the MOs after sorting them by energies
+      do i = 1, tmp_list_size
+        tmp_i = tmp_list(i)
+        do j = 1, ao_num
+          mo_coef(j,tmp_i) = tmp_mo_coef(j,i)
+        enddo
+      enddo
+
+      if (debug_hf) then
+        touch mo_coef
+        print*,'HF energy:', HF_energy
+      endif
+      print*,''
+
+      deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef)
+    endif
+
+  enddo
+
+  touch mo_coef
+  call save_mos
+
+end
+
+#+END_SRC
+
diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f
new file mode 100644
index 00000000..39442a12
--- /dev/null
+++ b/src/mo_localization/localization_sub.irp.f
@@ -0,0 +1,2055 @@
+! Gathering
+! Gradient/hessian/criterion for the localization:
+! They are chosen in function of the localization method
+
+! Gradient:
+
+! qp_edit :
+! | localization_method | method for the localization |
+
+! Input:
+! | tmp_n                   | integer          | Number of parameters in the MO subspace           |
+! | tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+! | tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+! Output:
+! | v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+! | max_elem                | double precision | Maximal element in the gradient                   |
+! | norm_grad               | double precision | Norm of the gradient                              |
+
+
+
+subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+
+  if (localization_method == 'boys') then
+    call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+    !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+  elseif (localization_method== 'pipek') then
+    call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+  else
+    print*,'Unkown method:'//localization_method
+    call abort
+  endif
+
+end
+
+
+
+! Hessian:
+
+! Output:
+! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace        |
+! | max_elem       | double precision | Maximal element in the gradient |
+! | norm_grad      | double precision | Norm of the gradient            |
+
+
+subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+
+  if (localization_method == 'boys') then
+    call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H)
+    !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging
+  elseif (localization_method == 'pipek') then
+    call hessian_PM(tmp_n, tmp_list_size, tmp_list, H)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+
+
+
+! Criterion:
+
+! Output:
+! | criterion | double precision | Criterion for the orbital localization |
+
+
+subroutine criterion_localization(tmp_list_size, tmp_list,criterion)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the localization criterion of the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+
+  if (localization_method == 'boys') then
+    call criterion_FB(tmp_list_size, tmp_list, criterion)
+  elseif (localization_method == 'pipek') then
+    !call criterion_PM(tmp_list_size, tmp_list,criterion)
+    call criterion_PM_v3(tmp_list_size, tmp_list, criterion)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+
+
+
+! Subroutine to update the datas needed for the localization
+
+subroutine update_data_localization()
+
+  include 'pi.h'
+
+  implicit none
+
+  if (localization_method == 'boys') then
+    ! Update the dipoles
+    call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num)
+    call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num)
+    call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num)
+  elseif (localization_method == 'pipek') then
+    ! Nothing required
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+end
+
+
+
+! Angles:
+
+! Output:
+! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace |
+! | max_elem                              | double precision | Maximal angle                            |
+
+
+
+subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+
+  include 'pi.h'
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the rotation angles between the MOs for the chosen localization method
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem
+
+  if (localization_method == 'boys') then
+    call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+  elseif (localization_method== 'pipek') then
+    call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem)
+  else
+    print*,'Unkown method: '//localization_method
+    call abort
+  endif
+
+end
+
+! Gradient
+! Input:
+! | tmp_n                   | integer          | Number of parameters in the MO subspace           |
+! | tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+! | tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+! Output:
+! | v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+! | max_elem                | double precision | Maximal element in the gradient                   |
+! | norm_grad               | double precision | Norm of the gradient                              |
+
+! Internal:
+! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form |
+! | i,j,k               | integer          | indexes in the full space   |
+! | tmp_i,tmp_j,tmp_k   | integer          | indexes in the subspace     |
+! | t*                  | double precision | to compute the time         |
+
+
+subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k
+  double precision              :: t1, t2, t3
+
+  print*,''
+  print*,'---gradient_FB---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size))
+
+  ! Calculation
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                           +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                           +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_FB:', t3
+
+  print*,''
+  print*,'---End gradient_FB---'
+  print*,''
+
+end subroutine
+
+! Gradient (OMP)
+
+subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  use omp_lib
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the gradient for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k
+  double precision              :: t1, t2, t3
+
+  print*,''
+  print*,'---gradient_FB_omp---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size))
+
+  ! Initialization omp
+  call omp_set_max_active_levels(1)
+
+  !$OMP PARALLEL                                                             &
+      !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k)                                         &
+      !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) &
+      !$OMP DEFAULT(NONE)
+
+  ! Calculation
+  !$OMP DO
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                           +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                           +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! 2D -> 1D
+  !$OMP DO
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_FB_omp:', t3
+
+  print*,''
+  print*,'---End gradient_FB_omp---'
+  print*,''
+
+end subroutine
+
+! Hessian
+
+! Output:
+! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace        |
+! | max_elem       | double precision | Maximal element in the gradient |
+! | norm_grad      | double precision | Norm of the gradient            |
+
+! Internal:
+! Internal:
+! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian |
+! | i,j,k             | integer          | indexes in the full space                             |
+! | tmp_i,tmp_j,tmp_k | integer          | indexes in the subspace                               |
+! | t*                | double precision | to compute the time                                   |
+
+
+subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_FB---'
+  print*,''
+
+  call wall_time(t1)
+
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size))
+
+  ! Calculation
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 &
+                         +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 &
+                         +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2
+    enddo
+  enddo
+
+  ! Diagonal of the hessian
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+  ! Min elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  ! Max elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  ! Near 0
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_FB:', t3
+
+  print*,''
+  print*,'---End hessian_FB---'
+  print*,''
+
+end subroutine
+
+! Hessian (OMP)
+
+subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the diagonal hessian for the Foster-Boys localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:)
+  integer                       :: i,j,tmp_k,tmp_i,tmp_j
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_FB_omp---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size))
+
+  ! Initialization omp
+  call omp_set_max_active_levels(1)
+
+  !$OMP PARALLEL                                                             &
+      !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k)                                         &
+      !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) &
+      !$OMP DEFAULT(NONE)
+
+
+  ! Calculation
+  !$OMP DO
+  do tmp_j = 1, tmp_list_size
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, tmp_list_size
+      i = tmp_list(tmp_i)
+      beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 &
+                         +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 &
+                         +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! Initialization
+  !$OMP DO
+  do j = 1, tmp_n
+    do i = 1, tmp_n
+      H(i,j) = 0d0
+    enddo
+  enddo
+  !$OMP END DO
+
+  ! Diagonalm of the hessian
+  !$OMP DO
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+  !$OMP END DO
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  ! Min elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  ! Max elem
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  ! Near 0
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_FB_omp:', t3
+
+  print*,''
+  print*,'---End hessian_FB_omp---'
+  print*,''
+
+end subroutine
+
+! Gradient v1
+
+subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute gradient for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:), tmp_int(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size))
+
+  ! Initialization
+  m_grad = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+     tmp_int = 0d0 ! Initialization for each nuclei
+
+     ! Loop over the MOs of the a given mo_class to compute <i|P_a|j>
+     do tmp_j = 1, tmp_list_size
+        j = tmp_list(tmp_j)
+        do tmp_i = 1, tmp_list_size
+           i = tmp_list(tmp_i)
+           do rho = 1, ao_num ! loop over all the AOs
+              do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+                 mu = nucl_aos(a,b) ! AO centered on atom a
+
+                 tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                      + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+              enddo
+           enddo
+        enddo
+     enddo
+
+     ! Gradient
+     do tmp_j = 1, tmp_list_size
+        do tmp_i = 1, tmp_list_size
+
+           m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) +  4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))
+
+        enddo
+     enddo
+
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+     call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+     v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+     if (ABS(v_grad(tmp_k)) > max_elem) then
+        max_elem = ABS(v_grad(tmp_k))
+     endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+     norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad,tmp_int)
+
+end subroutine grad_pipek
+
+! Gradient
+
+! The gradient is
+
+! \begin{align*}
+! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM}
+! \end{align*}
+! with
+! \begin{align*}
+! \gamma_{st}^{PM} = \sum_{A=1}^N <s|P_A|t> \left[ <s| P_A |s> - <t|P_A|t> \right]
+! \end{align*}
+
+! \begin{align*}
+! <s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+! \end{align*}
+! $\sum_{\rho}$ -> sum over all the AOs
+! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A
+! $c^t$ -> expansion coefficient of orbital |t>
+
+! Input:
+! | tmp_n                   | integer          | Number of parameters in the MO subspace           |
+! | tmp_list_size           | integer          | Number of MOs in the mo_class we want to localize |
+! | tmp_list(tmp_list_size) | integer          | MOs in the mo_class                               |
+
+! Output:
+! | v_grad(tmp_n)           | double precision | Gradient in the subspace                          |
+! | max_elem                | double precision | Maximal element in the gradient                   |
+! | norm_grad               | double precision | Norm of the gradient                              |
+
+! Internal:
+! | m_grad(tmp_list_size,tmp_list_size)       | double precision | Gradient in a 2D array                                   |
+! | tmp_int(tmp_list_size,tmp_list_size)      |                  | Temporary array to store the integrals                   |
+! | tmp_accu(tmp_list_size,tmp_list_size)     |                  | Temporary array to store a matrix                        |
+! |                                           |                  | product and compute tmp_int                              |
+! | CS(tmp_list_size,ao_num)                  |                  | Array to store the result of mo_coef * ao_overlap        |
+! | tmp_mo_coef(ao_num,tmp_list_size)         |                  | Array to store just the useful MO coefficients           |
+! |                                           |                  | depending of the mo_class                                |
+! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) |                  | Array to store just the useful MO coefficients           |
+! |                                           |                  | depending of the nuclei                                  |
+! | tmp_CS(tmp_list_size,nucl_n_aos(a))       |                  | Array to store just the useful mo_coef * ao_overlap      |
+! |                                           |                  | values depending of the nuclei                           |
+! | a                                         |                  | index to loop over the nuclei                            |
+! | b                                         |                  | index to loop over the AOs which belongs to the nuclei a |
+! | mu                                        |                  | index to refer to an AO which belongs to the nuclei a    |
+! | rho                                       |                  | index to loop over all the AOs                           |
+
+
+subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute gradient for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad
+  double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---gradient_PM---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+
+  ! submatrix of the mo_coef
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  m_grad = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+        tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) +  4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))
+
+      enddo
+    enddo
+
+  enddo
+
+  ! 2D -> 1D
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    v_grad(tmp_k) = m_grad(tmp_i,tmp_j)
+  enddo
+
+  ! Maximum element in the gradient
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (ABS(v_grad(tmp_k)) > max_elem) then
+      max_elem = ABS(v_grad(tmp_k))
+    endif
+  enddo
+
+  ! Norm of the gradient
+  norm_grad = 0d0
+  do tmp_k = 1, tmp_n
+    norm_grad = norm_grad + v_grad(tmp_k)**2
+  enddo
+  norm_grad = dsqrt(norm_grad)
+
+  print*, 'Maximal element in the gradient:', max_elem
+  print*, 'Norm of the gradient:', norm_grad
+
+  ! Deallocation
+  deallocate(m_grad,tmp_int,CS,tmp_mo_coef)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in gradient_PM:', t3
+
+  print*,''
+  print*,'---End gradient_PM---'
+  print*,''
+
+end
+
+! Hessian v1
+
+subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute diagonal hessian for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:),tmp_int(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu
+  double precision              :: max_elem
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size))
+
+  beta = 0d0
+
+  do a = 1, nucl_num
+    tmp_int = 0d0
+
+    do tmp_j = 1, tmp_list_size
+      j = tmp_list(tmp_j)
+      do tmp_i = 1, tmp_list_size
+        i = tmp_list(tmp_i)
+        do rho = 1, ao_num
+          do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+            mu = nucl_aos(a,b)
+
+            tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                   + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! Calculation
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) +  (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2
+
+      enddo
+    enddo
+
+  enddo
+
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+!  max_elem = 0d0
+!  do tmp_k = 1, tmp_n
+!    if (H(tmp_k,tmp_k) < max_elem) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Min elem H:', max_elem
+!
+!  max_elem = 0d0
+!  do tmp_k = 1, tmp_n
+!    if (H(tmp_k,tmp_k) > max_elem) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Max elem H:', max_elem
+!
+!  max_elem = 1d10
+!  do tmp_k = 1, tmp_n
+!    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+!      max_elem = H(tmp_k,tmp_k)
+!    endif
+!  enddo
+!  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta,tmp_int)
+
+end
+
+! Hessian
+
+! The hessian is
+! \begin{align*}
+! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM}
+! \end{align*}
+! \begin{align*}
+! \beta_{st}^{PM} = \sum_{A=1}^N \left( <s|P_A|t>^2 - \frac{1}{4} \left[<s|P_A|s> - <t|P_A|t> \right]^2 \right)
+! \end{align*}
+
+! with
+! \begin{align*}
+! <s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+! \end{align*}
+! $\sum_{\rho}$ -> sum over all the AOs
+! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A
+! $c^t$ -> expansion coefficient of orbital |t>
+
+
+subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute diagonal hessian for the Pipek-Mezey localization
+  END_DOC
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: H(tmp_n, tmp_n)
+  double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu
+  double precision              :: max_elem, t1,t2,t3
+
+  print*,''
+  print*,'---hessian_PM---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+  beta = 0d0
+
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+        tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    ! Calculation
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) +  (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2
+
+      enddo
+    enddo
+
+  enddo
+
+  H = 0d0
+  do tmp_k = 1, tmp_n
+    call vec_to_mat_index(tmp_k,tmp_i,tmp_j)
+    H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j)
+  enddo
+
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Min elem H:', max_elem
+
+  max_elem = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) > max_elem) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Max elem H:', max_elem
+
+  max_elem = 1d10
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then
+      max_elem = H(tmp_k,tmp_k)
+    endif
+  enddo
+  print*, 'Near 0 elem H:', max_elem
+
+  ! Deallocation
+  deallocate(beta,tmp_int)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in hessian_PM:', t3
+
+  print*,''
+  print*,'---End hessian_PM---'
+  print*,''
+
+end
+
+! Criterion PM (old)
+
+subroutine compute_crit_pipek(criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  ! Allocation
+  allocate(tmp_int(mo_num, mo_num))
+
+  criterion = 0d0
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+    do i = 1, mo_num
+      do rho = 1, ao_num ! loop over all the AOs
+        do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+          mu = nucl_aos(a,b)
+
+          tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) &
+                                 + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i))
+
+        enddo
+      enddo
+    enddo
+
+    do i = 1, mo_num
+      criterion = criterion + tmp_int(i,i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int)
+
+end
+
+! Criterion PM
+
+! The criterion is computed as
+! \begin{align*}
+! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ <i|P_A|i> \right]^2
+! \end{align*}
+! with
+! \begin{align*}
+! <s|P_A|t> = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right]
+! \end{align*}
+
+
+subroutine criterion_PM(tmp_list_size,tmp_list,criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:),CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho
+
+  print*,''
+  print*,'---criterion_PM---'
+
+  ! Allocation
+  allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num))
+
+  ! Initialization
+  criterion = 0d0
+
+  call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    tmp_int = 0d0
+
+      do tmp_i = 1, tmp_list_size
+        i = tmp_list(tmp_i)
+        do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+          mu = nucl_aos(a,b)
+
+          tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu))
+
+                                 !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                 !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+      enddo
+    enddo
+
+    do tmp_i = 1, tmp_list_size
+      criterion = criterion + tmp_int(tmp_i,tmp_i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int,CS)
+
+  print*,'---End criterion_PM---'
+  print*,''
+
+end
+
+! Criterion PM v3
+
+subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Pipek-Mezey localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:)
+  integer                       :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---criterion_PM_v3---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size))
+  allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size))
+
+  criterion = 0d0
+
+  ! submatrix of the mo_coef
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    do j = 1, ao_num
+
+      tmp_mo_coef(j,tmp_i) = mo_coef(j,i)
+
+    enddo
+  enddo
+
+  ! ao_overlap(ao_num,ao_num)
+  ! mo_coef(ao_num,mo_num)
+  call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1))
+
+  do a = 1, nucl_num ! loop over the nuclei
+
+    do j = 1, tmp_list_size
+      do i = 1, tmp_list_size
+        tmp_int(i,j) = 0d0
+      enddo
+    enddo
+
+    !do tmp_j = 1, tmp_list_size
+    !  do tmp_i = 1, tmp_list_size
+    !    do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+    !      mu = nucl_aos(a,b)
+
+    !      tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu))
+
+    !                             !  (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+    !                             !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+    !    enddo
+    !  enddo
+    !enddo
+
+    allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a)))
+
+    do tmp_i = 1, tmp_list_size
+      do b = 1, nucl_n_aos(a)
+        mu = nucl_aos(a,b)
+
+        tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i)
+
+      enddo
+    enddo
+
+    do b = 1, nucl_n_aos(a)
+      mu = nucl_aos(a,b)
+      do tmp_i = 1, tmp_list_size
+
+         tmp_CS(tmp_i,b) = CS(tmp_i,mu)
+
+      enddo
+    enddo
+
+    call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1))
+
+    ! Integrals
+    do tmp_j = 1, tmp_list_size
+      do tmp_i = 1, tmp_list_size
+
+        tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i))
+
+      enddo
+    enddo
+
+    deallocate(tmp_mo_coef2,tmp_CS)
+
+    ! Criterion
+    do tmp_i = 1, tmp_list_size
+      criterion = criterion + tmp_int(tmp_i,tmp_i)**2
+    enddo
+
+  enddo
+
+  criterion = - criterion
+
+  deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in criterion_PM_v3:', t3
+
+  print*,'---End criterion_PM_v3---'
+  print*,''
+
+end
+
+! Criterion FB (old)
+
+! The criterion is just computed as
+
+! \begin{align*}
+! C = - \sum_i^{mo_{num}} (<i|x|i>^2 + <i|y|i>^2 + <i|z|i>^2)
+! \end{align*}
+
+! The minus sign is here in order to minimize this criterion
+
+! Output:
+! | criterion | double precision | criterion for the Foster-Boys localization |
+
+
+subroutine criterion_FB_old(criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Foster-Boys localization criterion
+  END_DOC
+
+  double precision, intent(out) :: criterion
+  integer                       :: i
+
+  ! Criterion (= \sum_i <i|r|i>^2 )
+  criterion = 0d0
+  do i = 1, mo_num
+    criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2
+  enddo
+  criterion = - criterion
+
+end subroutine
+
+! Criterion FB
+
+subroutine criterion_FB(tmp_list_size, tmp_list, criterion)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the Foster-Boys localization criterion
+  END_DOC
+
+  integer, intent(in)           :: tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(out) :: criterion
+  integer                       :: i, tmp_i
+
+  ! Criterion (= - \sum_i <i|r|i>^2 )
+  criterion = 0d0
+  do tmp_i = 1, tmp_list_size
+    i = tmp_list(tmp_i)
+    criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2
+  enddo
+  criterion = - criterion
+
+end subroutine
+
+subroutine theta_FB(l, n, m_x, max_elem)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs
+  ! Warning: you must give - the angles to build the rotation matrix...
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)           :: n, l(n)
+  double precision, intent(out) :: m_x(n,n), max_elem
+
+  integer                       :: i,j, tmp_i, tmp_j
+  double precision, allocatable :: cos4theta(:,:), sin4theta(:,:)
+  double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:)
+  integer                       :: idx_i,idx_j
+
+  allocate(cos4theta(n, n), sin4theta(n, n))
+  allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n))
+
+  do tmp_j = 1, n
+    j = l(tmp_j)
+    do tmp_i = 1, n
+      i = l(tmp_i)
+      A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 &
+                     + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 &
+                     + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2
+    enddo
+    A(j,j) = 0d0
+  enddo
+
+  do tmp_j = 1, n
+    j = l(tmp_j)
+    do tmp_i = 1, n
+      i = l(tmp_i)
+      B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+                     + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+                     + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))
+    enddo
+  enddo
+
+  !do tmp_j = 1, n
+  !  j = l(tmp_j)
+  !  do tmp_i = 1, n
+  !    i = l(tmp_i)
+  !    beta(tmp_i,tmp_j) =  (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 &
+  !               + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 &
+  !               + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2
+  !  enddo
+  !enddo
+
+  !do tmp_j = 1, n
+  !  j = l(tmp_j)
+  !  do tmp_i = 1, n
+  !    i = l(tmp_i)
+  !    gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) &
+  !                       + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) &
+  !                       + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)))
+  !  enddo
+  !enddo
+
+  !
+  !do j = 1, n
+  !  do i = 1, n
+  !    cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2)
+  !  enddo
+  !enddo
+
+  !do j = 1, n
+  !  do i = 1, n
+  !    sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2)
+  !  enddo
+  !enddo
+
+  ! Theta
+  do j = 1, n
+    do i = 1, n
+      m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j))
+      !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j))
+    enddo
+  enddo
+
+  ! Enforce a perfect antisymmetry
+  do j = 1, n-1
+    do i = j+1, n
+      m_x(j,i) = - m_x(i,j)
+    enddo
+  enddo
+  do i = 1, n
+    m_x(i,i) = 0d0
+  enddo
+
+  ! Max
+  max_elem = 0d0
+  do j = 1, n-1
+    do i = j+1, n
+      if (dabs(m_x(i,j)) > dabs(max_elem)) then
+        max_elem = m_x(i,j)
+        !idx_i = i
+        !idx_j = j
+      endif
+    enddo
+  enddo
+
+  ! Debug
+  !print*,''
+  !print*,'sin/B'
+  !do i = 1, n
+  !  write(*,'(100F10.4)') sin4theta(i,:)
+  !  !B(i,:)
+  !enddo
+  !print*,'cos/A'
+  !do i = 1, n
+  !  write(*,'(100F10.4)') cos4theta(i,:)
+  !  !A(i,:)
+  !enddo
+  !print*,'X'
+  !!m_x = 0d0
+  !!m_x(idx_i,idx_j) = max_elem
+  !!m_x(idx_j,idx_i) = -max_elem
+  !do i = 1, n
+  !  write(*,'(100F10.4)') m_x(i,:)
+  !enddo
+  !print*,idx_i,idx_j,max_elem
+
+  max_elem = dabs(max_elem)
+
+  deallocate(cos4theta, sin4theta)
+  deallocate(A,B,beta,gamma)
+
+end
+
+subroutine theta_PM(l, n, m_x, max_elem)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs
+  ! Warning: you must give - the angles to build the rotation matrix...
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)           :: n, l(n)
+  double precision, intent(out) :: m_x(n,n), max_elem
+
+  integer                       :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j
+  double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:)
+
+  allocate(Aij(n,n), Bij(n,n), Pa(n,n))
+
+  do a = 1, nucl_num ! loop over the nuclei
+    Pa = 0d0 ! Initialization for each nuclei
+
+    ! Loop over the MOs of the a given mo_class to compute <i|P_a|j>
+    do tmp_j = 1, n
+      j = l(tmp_j)
+      do tmp_i = 1, n
+         i = l(tmp_i)
+        do rho = 1, ao_num ! loop over all the AOs
+          do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a
+            mu = nucl_aos(a,b) ! AO centered on atom a
+
+            Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) &
+                                   + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j))
+
+          enddo
+        enddo
+      enddo
+    enddo
+
+    ! A
+    do j = 1, n
+      do i = 1, n
+        Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2
+      enddo
+    enddo
+
+    ! B
+    do j = 1, n
+      do i = 1, n
+        Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j))
+      enddo
+    enddo
+
+  enddo
+
+  ! Theta
+  do j = 1, n
+    do i = 1, n
+      m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j))
+    enddo
+  enddo
+
+  ! Enforce a perfect antisymmetry
+  do j = 1, n-1
+    do i = j+1, n
+      m_x(j,i) = - m_x(i,j)
+    enddo
+  enddo
+  do i = 1, n
+    m_x(i,i) = 0d0
+  enddo
+
+  ! Max
+  max_elem = 0d0
+  do j = 1, n-1
+    do i = j+1, n
+      if (dabs(m_x(i,j)) > dabs(max_elem)) then
+        max_elem = m_x(i,j)
+        idx_i = i
+        idx_j = j
+      endif
+    enddo
+  enddo
+
+  ! Debug
+  !do i = 1, n
+  !  write(*,'(100F10.4)') m_x(i,:)
+  !enddo
+  !print*,'Max',idx_i,idx_j,max_elem
+
+  max_elem = dabs(max_elem)
+
+  deallocate(Aij,Bij,Pa)
+
+end
+
+! Spatial extent
+
+! The spatial extent of an orbital $i$ is computed as
+! \begin{align*}
+! \sum_{\lambda=x,y,z}\sqrt{<i|\lambda^2|i> - <i|\lambda|i>^2}
+! \end{align*}
+
+! From that we can also compute the average and the standard deviation
+
+
+subroutine compute_spatial_extent(spatial_extent)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the spatial extent of the MOs
+  END_DOC
+
+  double precision, intent(out) :: spatial_extent(mo_num)
+  double precision              :: average_core, average_act, average_inact, average_virt
+  double precision              :: std_var_core, std_var_act, std_var_inact, std_var_virt
+  integer                       :: i,j,k,l
+
+  spatial_extent = 0d0
+
+  do i = 1, mo_num
+    spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2
+  enddo
+  do i = 1, mo_num
+    spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2
+  enddo
+  do i = 1, mo_num
+    spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2
+  enddo
+
+  do i = 1, mo_num
+    spatial_extent(i) = dsqrt(spatial_extent(i))
+  enddo
+
+  average_core = 0d0
+  std_var_core = 0d0
+  if (dim_list_core_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core)
+    call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core)
+  endif
+
+  average_act = 0d0
+  std_var_act = 0d0
+  if (dim_list_act_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act)
+    call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act)
+  endif
+
+  average_inact = 0d0
+  std_var_inact = 0d0
+  if (dim_list_inact_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact)
+    call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact)
+  endif
+
+  average_virt = 0d0
+  std_var_virt = 0d0
+  if (dim_list_virt_orb >= 2) then
+    call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt)
+    call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt)
+  endif
+
+  print*,''
+  print*,'============================='
+  print*,'  Spatial extent of the MOs'
+  print*,'============================='
+  print*,''
+
+  print*, 'elec_num:', elec_num
+  print*, 'elec_alpha_num:', elec_alpha_num
+  print*, 'elec_beta_num:', elec_beta_num
+  print*, 'core:', dim_list_core_orb
+  print*, 'act:', dim_list_act_orb
+  print*, 'inact:', dim_list_inact_orb
+  print*, 'virt:', dim_list_virt_orb
+  print*, 'mo_num:', mo_num
+  print*,''
+
+  print*,'-- Core MOs --'
+  print*,'Average:', average_core
+  print*,'Std var:', std_var_core
+  print*,''
+
+  print*,'-- Active MOs --'
+  print*,'Average:', average_act
+  print*,'Std var:', std_var_act
+  print*,''
+
+  print*,'-- Inactive MOs --'
+  print*,'Average:', average_inact
+  print*,'Std var:', std_var_inact
+  print*,''
+
+  print*,'-- Virtual MOs --'
+  print*,'Average:', average_virt
+  print*,'Std var:', std_var_virt
+  print*,''
+
+  print*,'Spatial extent:'
+  do i = 1, mo_num
+    print*, i, spatial_extent(i)
+  enddo
+
+end
+
+subroutine compute_average_sp_ext(spatial_extent, list, list_size, average)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the average spatial extent of the MOs
+  END_DOC
+
+  integer, intent(in) :: list_size, list(list_size)
+  double precision, intent(in) :: spatial_extent(mo_num)
+  double precision, intent(out) :: average
+  integer :: i, tmp_i
+
+  average = 0d0
+  do tmp_i = 1, list_size
+    i = list(tmp_i)
+    average = average + spatial_extent(i)
+  enddo
+
+  average = average / DBLE(list_size)
+
+end
+
+subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Compute the standard deviation of the spatial extent of the MOs
+  END_DOC
+
+  integer, intent(in)           :: list_size, list(list_size)
+  double precision, intent(in)  :: spatial_extent(mo_num)
+  double precision, intent(in)  :: average
+  double precision, intent(out) :: std_var
+  integer                       :: i, tmp_i
+
+  std_var = 0d0
+
+  do tmp_i = 1, list_size
+    i = list(tmp_i)
+    std_var = std_var + (spatial_extent(i) - average)**2
+  enddo
+
+  std_var = dsqrt(1d0/DBLE(list_size) * std_var)
+
+end
+
+! Utils
+
+
+subroutine apply_pre_rotation()
+
+  implicit none
+
+  BEGIN_DOC
+  ! Apply a rotation between the MOs
+  END_DOC
+
+  double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:)
+  double precision              :: t1,t2,t3
+  integer                       :: i,j,tmp_i,tmp_j
+  integer                       :: info
+  logical                       :: enforce_step_cancellation
+
+  print*,'---apply_pre_rotation---'
+  call wall_time(t1)
+
+  allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num))
+
+  ! Initialization of the matrix
+  pre_rot = 0d0
+
+  if (kick_in_mos) then
+    ! Pre rotation for core MOs
+    if (dim_list_core_orb >= 2) then
+      do tmp_j = 1, dim_list_core_orb
+        j = list_core(tmp_j)
+        do tmp_i = 1, dim_list_core_orb
+          i = list_core(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for active MOs
+    if (dim_list_act_orb >= 2) then
+      do tmp_j = 1, dim_list_act_orb
+        j = list_act(tmp_j)
+        do tmp_i = 1, dim_list_act_orb
+          i = list_act(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for inactive MOs
+    if (dim_list_inact_orb >= 2) then
+      do tmp_j = 1, dim_list_inact_orb
+        j = list_inact(tmp_j)
+        do tmp_i = 1, dim_list_inact_orb
+          i = list_inact(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Pre rotation for virtual MOs
+    if (dim_list_virt_orb >= 2) then
+      do tmp_j = 1, dim_list_virt_orb
+        j = list_virt(tmp_j)
+        do tmp_i = 1, dim_list_virt_orb
+          i = list_virt(tmp_i)
+          if (i > j) then
+            pre_rot(i,j) = angle_pre_rot
+          elseif (i < j) then
+            pre_rot(i,j) = - angle_pre_rot
+          else
+            pre_rot(i,j) = 0d0
+          endif
+        enddo
+      enddo
+    endif
+
+    ! Nothing for deleted ones
+
+    ! Compute pre rotation matrix from pre_rot
+    call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation)
+
+    if (enforce_step_cancellation) then
+      print*, 'Cancellation of the pre rotation, too big error in the rotation matrix'
+      print*, 'Reduce the angle for the pre rotation, abort'
+      call abort
+    endif
+
+    ! New Mos (we don't car eabout the previous MOs prev_mos)
+    call apply_mo_rotation(R,prev_mos)
+
+    ! Update the things related to mo_coef
+    TOUCH mo_coef
+    call save_mos
+  endif
+
+  deallocate(pre_rot, prev_mos, R)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in apply_pre_rotation:', t3
+  print*,'---End apply_pre_rotation---'
+
+end
+
+subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x)
+
+  implicit none
+
+  integer, intent(in)           :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+  double precision, intent(in)  :: v_grad(tmp_n)
+  double precision, intent(in)  :: H(tmp_n, tmp_n)
+  double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size)
+  !double precision, allocatable :: x(:)
+  double precision              :: lambda , accu, max_elem
+  integer                       :: i,j,tmp_i,tmp_j,tmp_k
+
+  ! Allocation
+  !allocate(x(tmp_n))
+
+  ! Level shifted hessian
+  lambda = 0d0
+  do tmp_k = 1, tmp_n
+    if (H(tmp_k,tmp_k) < lambda) then
+      lambda = H(tmp_k,tmp_k)
+    endif
+  enddo
+
+  ! min element in the hessian
+  if (lambda < 0d0) then
+    lambda = -lambda + 1d-6
+  endif
+
+  print*, 'lambda', lambda
+
+  ! Good
+  do tmp_k = 1, tmp_n
+    if (ABS(H(tmp_k,tmp_k)) > 1d-6) then
+       tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k))
+      !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k))
+    endif
+  enddo
+
+  ! 1D tmp -> 2D tmp
+  tmp_m_x = 0d0
+  do tmp_j = 1, tmp_list_size - 1
+    do tmp_i = tmp_j + 1, tmp_list_size
+      call mat_to_vec_index(tmp_i,tmp_j,tmp_k)
+      tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k)
+    enddo
+  enddo
+
+  ! Antisym
+  do tmp_i = 1, tmp_list_size - 1
+    do tmp_j = tmp_i + 1, tmp_list_size
+      tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i)
+    enddo
+  enddo
+
+  ! Deallocation
+  !deallocate(x)
+
+end subroutine
+
+subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo)
+  implicit none
+  BEGIN_DOC
+  ! Transform A from the |AO| basis to the |MO| basis
+  !
+  ! $C^\dagger.A_{ao}.C$
+  END_DOC
+  integer, intent(in)            :: LDA_ao,LDA_mo
+  double precision, intent(in)   :: A_ao(LDA_ao,ao_num)
+  double precision, intent(out)  :: A_mo(LDA_mo,mo_num)
+  double precision, allocatable  :: T(:,:)
+
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  call dgemm('N','N', ao_num, mo_num, ao_num,                    &
+      1.d0, A_ao,LDA_ao,                                             &
+      mo_coef, size(mo_coef,1),                                      &
+      0.d0, T, size(T,1))
+
+  call dgemm('T','N', mo_num, mo_num, ao_num,                &
+      1.d0, mo_coef,size(mo_coef,1),                                 &
+      T, ao_num,                                                     &
+      0.d0, A_mo, size(A_mo,1))
+
+  deallocate(T)
+end
+
+subroutine run_sort_by_fock_energies()
+
+  implicit none
+
+  BEGIN_DOC
+  ! Saves the current MOs ordered by diagonal element of the Fock operator.
+  END_DOC
+
+  integer                        :: i,j,k,l,tmp_i,tmp_k,tmp_list_size
+  integer, allocatable           :: iorder(:)
+  double precision, allocatable  :: fock_energies_tmp(:), tmp_mo_coef(:,:)
+  integer, allocatable           :: tmp_list(:)
+
+!  allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num))
+!
+!  do i = 1, mo_num
+!    fock_energies_tmp(i) = Fock_matrix_diag_mo(i)
+!    print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i)
+!    iorder(i) = i
+!  enddo
+!
+!  print*,''
+!  print*,'Sorting by Fock energies'
+!  print*,''
+!
+!  call dsort(fock_energies_tmp, iorder, mo_num)
+!
+!  do i = 1, mo_num
+!    k = iorder(i)
+!    print*,'fock_energies_new(i) = ',fock_energies_tmp(i)
+!    do j = 1, ao_num
+!      new_mo_coef(j,i) = mo_coef(j,k)
+!    enddo
+!  enddo
+
+  ! Test
+  do l = 1, 4
+    if (l==1) then ! core
+      tmp_list_size = dim_list_core_orb
+    elseif (l==2) then ! act
+      tmp_list_size = dim_list_act_orb
+    elseif (l==3) then ! inact
+      tmp_list_size = dim_list_inact_orb
+    else ! virt
+      tmp_list_size = dim_list_virt_orb
+    endif
+
+    if (tmp_list_size >= 2) then
+      ! Allocation tmp array
+      allocate(tmp_list(tmp_list_size))
+
+      ! To give the list of MOs in a mo_class
+      if (l==1) then ! core
+        tmp_list = list_core
+      elseif (l==2) then
+        tmp_list = list_act
+      elseif (l==3) then
+        tmp_list = list_inact
+      else
+        tmp_list = list_virt
+      endif
+      print*,'MO class: ', trim(mo_class(tmp_list(1)))
+
+      allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size))
+      !print*,'MOs before sorting them by f_p^p energies:'
+      do i = 1, tmp_list_size
+        tmp_i = tmp_list(i)
+        fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i)
+        iorder(i) = i
+        !print*, tmp_i, fock_energies_tmp(i)
+      enddo
+
+      call dsort(fock_energies_tmp, iorder, tmp_list_size)
+
+      print*,'MOs after sorting them by f_p^p energies:'
+      do i = 1, tmp_list_size
+        k = iorder(i)
+        tmp_k = tmp_list(k)
+        print*, tmp_k, fock_energies_tmp(k)
+        do j = 1, ao_num
+          tmp_mo_coef(j,k) = mo_coef(j,tmp_k)
+        enddo
+      enddo
+
+      ! Update the MOs after sorting them by energies
+      do i = 1, tmp_list_size
+        tmp_i = tmp_list(i)
+        do j = 1, ao_num
+          mo_coef(j,tmp_i) = tmp_mo_coef(j,i)
+        enddo
+      enddo
+
+      if (debug_hf) then
+        touch mo_coef
+        print*,'HF energy:', HF_energy
+      endif
+      print*,''
+
+      deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef)
+    endif
+
+  enddo
+
+  touch mo_coef
+  call save_mos
+
+end
diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f
index 3642365e..2afdcf89 100644
--- a/src/mo_two_e_ints/core_quantities.irp.f
+++ b/src/mo_two_e_ints/core_quantities.irp.f
@@ -59,3 +59,45 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
  enddo
 END_PROVIDER
 
+
+BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Active Hamiltonian with 3-index exchange integrals:
+ !
+ ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$
+ END_DOC
+
+ integer :: i,j, k
+ integer :: p,q, r
+ ! core-core contribution
+ h_act_ri = core_fock_operator
+ !print *,' Bef----hact(1,14)=',h_act_ri(4,14)
+ ! act-act contribution
+ do p=1,n_act_orb
+   j=list_act(p)
+   do q=1,n_act_orb
+     i=list_act(q)
+     h_act_ri(i,j) = mo_one_e_integrals(i,j)
+   enddo
+   do r=1,n_act_orb
+     k=list_act(r)
+     do q=1,n_act_orb
+       i=list_act(q)
+       h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+     enddo
+   enddo
+ enddo
+ ! core-act contribution
+ !do p=1,n_act_orb
+ !  j=list_core(p)
+ !  do k=1,n_core_orb
+ !    do q=1,n_act_orb
+ !      i=list_act(q)
+ !      h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
+ !    enddo
+ !  enddo
+ !enddo
+ !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14)
+END_PROVIDER
+
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index c2bff2e8..f68465c7 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
   !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
   !
-  ! WARNING ::: IF fact_k is too smal then: 
+  ! WARNING ::: IF fact_k is too smal then:
   ! returns a "s" function centered in zero
   ! with an inifinite exponent and a zero polynom coef
   END_DOC
@@ -86,13 +86,11 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   !DIR$ FORCEINLINE
   call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
   if (fact_k < thresh) then
-    ! IF fact_k is too smal then: 
+    ! IF fact_k is too smal then:
     ! returns a "s" function centered in zero
     ! with an inifinite exponent and a zero polynom coef
     P_center = 0.d0
     p = 1.d+15
-    P_new = 0.d0
-    iorder = 0
     fact_k = 0.d0
     return
   endif
@@ -129,6 +127,91 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
 
 end
 
+!---
+
+subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points)
+  BEGIN_DOC
+  ! Transforms the product of
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+  !
+  ! WARNING                      :: : IF fact_k is too smal then:
+  ! returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  END_DOC
+  implicit none
+  include 'constants.include.F'
+  integer, intent(in)            :: n_points, ldp
+  integer, intent(in)            :: a(3),b(3)         ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)   :: alpha, beta       ! exponents
+  double precision, intent(in)   :: A_center(n_points,3) ! A center
+  double precision, intent(in)   :: B_center (3)         ! B center
+  double precision, intent(out)  :: P_center(n_points,3) ! new center
+  double precision, intent(out)  :: p                 ! new exponent
+  double precision, intent(out)  :: fact_k(n_points)            ! constant factor
+  double precision, intent(out)  :: P_new(n_points,0:ldp,3)! polynomial
+  integer, intent(out)           :: iorder(3)         ! i_order(i) = order of the polynomials
+
+  double precision, allocatable  :: P_a(:,:,:), P_b(:,:,:)
+
+  integer                        :: n_new,i,j, ipoint, lda, ldb, xyz
+
+  call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points)
+
+  if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then  ! b == (0,0,0)
+
+    lda = maxval(a)
+    ldb = 0
+    allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3))
+
+    call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points)
+
+    iorder(1:3) = a(1:3)
+    do ipoint=1,n_points
+      do xyz=1,3
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        do i=1,a(xyz)
+          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    enddo
+
+    return
+
+  endif
+
+  lda = maxval(a)
+  ldb = maxval(b)
+  allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3))
+
+  call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points)
+
+  iorder(1:3) = a(1:3) + b(1:3)
+
+  do xyz=1,3
+    if (b(xyz) == 0) then
+      do ipoint=1,n_points
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        do i=1,a(xyz)
+          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    else
+      do i=0,iorder(xyz)
+        do ipoint=1,n_points
+          P_new(ipoint,i,xyz) = 0.d0
+        enddo
+      enddo
+      call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points)
+    endif
+  enddo
+
+end
+
+!-
 
 subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
   BEGIN_DOC
@@ -231,6 +314,59 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
   xp(3) = (a*xa(3)+b*xb(3))*p_inv
 end subroutine
 
+!---
+subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Gaussian product in 1D.
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  ! Using multiple A centers
+  END_DOC
+
+  integer, intent(in)            :: n_points
+  double precision, intent(in)   :: a,b         ! Exponents
+  double precision, intent(in)   :: xa(n_points,3),xb(3) ! Centers
+  double precision, intent(out)  :: p ! New exponent
+  double precision, intent(out)  :: xp(n_points,3)       ! New center
+  double precision, intent(out)  :: k(n_points) ! Constant
+
+  double precision               :: p_inv
+
+  integer :: ipoint
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  double precision               :: xab(3), ab, ap, bp, bpxb(3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  ap = a*p_inv
+  bp = b*p_inv
+  bpxb(1) = bp*xb(1)
+  bpxb(2) = bp*xb(2)
+  bpxb(3) = bp*xb(3)
+
+  do ipoint=1,n_points
+    xab(1) = xa(ipoint,1)-xb(1)
+    xab(2) = xa(ipoint,2)-xb(2)
+    xab(3) = xa(ipoint,3)-xb(3)
+    k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3))
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      xp(ipoint,1) = 0.d0
+      xp(ipoint,2) = 0.d0
+      xp(ipoint,3) = 0.d0
+    else
+      k(ipoint) = dexp(-k(ipoint))
+      xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1)
+      xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2)
+      xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3)
+    endif
+  enddo
+end subroutine
+
 
 
 
@@ -269,6 +405,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
 end subroutine
 
 
+!-
+
+subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Gaussian product in 1D with multiple xa
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  END_DOC
+
+  integer, intent(in) :: n_points
+  double precision  , intent(in) :: a,b      ! Exponents
+  double precision  , intent(in) :: xa(n_points),xb    ! Centers
+  double precision  , intent(out) :: p(n_points)       ! New exponent
+  double precision  , intent(out) :: xp(n_points) ! New center
+  double precision  , intent(out) :: k(n_points)       ! Constant
+
+  double precision               :: p_inv
+  integer :: ipoint
+
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  double precision               :: xab, ab
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  do ipoint = 1, n_points
+    xab = xa(ipoint)-xb
+    k(ipoint) = ab*xab*xab
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      cycle
+    endif
+    k(ipoint) = exp(-k(ipoint))
+    xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv
+  enddo
+end subroutine
+
+
 
 
 
@@ -313,6 +489,45 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
 
 end
 
+subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Multiply pairs of polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb, nc, n_points
+  integer, intent(in)            :: nd
+  double precision, intent(in)   :: b(n_points,0:nb), c(n_points,0:nc)
+  double precision, intent(inout) :: d(n_points,0:nd)
+
+  integer                        :: ib, ic, id, k, ipoint
+  if (nd < nb+nc) then
+     print *, nd,  nb, nc
+     print *, irp_here, ': nd < nb+nc'
+     stop 1
+  endif
+
+  do ic = 0,nc
+    do ipoint=1, n_points
+      d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0)
+    enddo
+  enddo
+
+  do ib=1,nb
+    do ipoint=1, n_points
+      d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib)
+    enddo
+    do ic = 1,nc
+      do ipoint=1, n_points
+        d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib)
+      enddo
+    enddo
+  enddo
+
+end
+
+
 subroutine add_poly(b,nb,c,nc,d,nd)
   implicit none
   BEGIN_DOC
@@ -404,22 +619,152 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   do i =  minab+1,min(b,20)
     P_new2(i) =  binom_transp(b-i,b) * pows_b(b-i)
   enddo
-  do i =  101,a
+  do i =  21,a
     P_new(i) =  binom_func(a,a-i) * pows_a(a-i)
   enddo
-  do i =  101,b
+  do i =  21,b
     P_new2(i) =  binom_func(b,b-i) * pows_b(b-i)
   enddo
 end
 
-subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) 
+!-
+subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Recenter two polynomials
+  END_DOC
+  integer, intent(in)            :: a(3),b(3), n_points, lda, ldb
+  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, minab(3), maxab(3),ipoint, xyz
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision :: fa, fb
+
+  maxab(1:3) = max(a(1:3),b(1:3))
+  minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+
+  do xyz=1,3
+    if ((a(xyz)<0).or.(b(xyz)<0) ) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i =  2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,0,xyz) =  pows_b(ipoint,b(xyz))
+    enddo
+    do i =  1,min(minab(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(a(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(b(xyz),20)
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  21,a(xyz)
+      fa =  binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  21,b(xyz)
+      fb = binom_func(b(xyz),b(xyz)-i)
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+  enddo
+end
+
+
+subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Recenter two polynomials. Special case for b=(0,0,0)
+  END_DOC
+  integer, intent(in)            :: a(3), n_points, lda
+  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, xyz, ipoint, maxab(3)
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision               :: fa
+
+  maxab(1:3) = max(a(1:3),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+  do xyz=1,3
+    if (a(xyz)<0) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i = 2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,xyz) =  pows_b(ipoint,0)
+    enddo
+    do i =  1,min(a(xyz),20)
+      fa = binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i = 21,a(xyz)
+      fa = binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+
+  enddo !xyz
+
+  deallocate(pows_a, pows_b)
+end
+
+!--
+!--
+
+subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
 
   BEGIN_DOC
-  ! 
+  !
   ! Transform the pol centerd on A:
-  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] 
+  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ]
   ! to a pol centered on B
-  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] 
+  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ]
   !
   END_DOC
 
@@ -437,7 +782,7 @@ subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
 
   do i = 1, 3
     Lmax = iorder(i)
-    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) 
+    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) )
   enddo
 
   return
@@ -445,14 +790,14 @@ end subroutine pol_modif_center
 
 
 
-subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) 
+subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol)
 
   BEGIN_DOC
-  ! 
+  !
   ! Transform the pol centerd on A:
-  !       [ \sum_i ax_i (x-x_A)^i ] 
+  !       [ \sum_i ax_i (x-x_A)^i ]
   ! to a pol centered on B
-  !       [ \sum_i bx_i (x-x_B)^i ] 
+  !       [ \sum_i bx_i (x-x_B)^i ]
   !
   ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ]
   !      = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i)
@@ -591,7 +936,7 @@ double precision function rint_sum(n_pt_out,rho,d1)
       u_inv=1.d0/dsqrt(rho)
       u=rho*u_inv
       rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0)
-!      print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) 
+!      print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u)
     endif
 
     do i=2,n_pt_out,2
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 809f594b..2f84e753 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1136,6 +1136,104 @@ subroutine ortho_svd(A,LDA,m,n)
 
 end
 
+! QR to orthonormalize CSFs does not work :-(
+!subroutine ortho_qr_withB(A,LDA,B,m,n)
+!  implicit none
+!  BEGIN_DOC
+!  ! Orthogonalization using Q.R factorization
+!  !
+!  ! A : Overlap Matrix
+!  !
+!  ! LDA : leftmost dimension of A
+!  !
+!  ! m : Number of rows of A
+!  !
+!  ! n : Number of columns of A
+!  !
+!  ! B : Output orthogonal basis
+!  !
+!  END_DOC
+!  integer, intent(in)            :: m,n, LDA
+!  double precision, intent(inout) :: A(LDA,n)
+!  double precision, intent(inout) :: B(LDA,n)
+!
+!  integer                        :: LWORK, INFO
+!  integer, allocatable           :: jpvt(:)
+!  double precision, allocatable  :: TAU(:), WORK(:)
+!  double precision, allocatable  :: C(:,:)
+!  double precision :: norm
+!  integer :: i,j
+!
+!  allocate (TAU(min(m,n)), WORK(1))
+!  allocate (jpvt(n))
+!  !print *," In function ortho"
+!  B = A
+!
+!  jpvt(1:n)=1
+!
+!  LWORK=-1
+!  call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!
+!  ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  LWORK=max(n,int(WORK(1)))
+!
+!  deallocate(WORK)
+!  allocate(WORK(LWORK))
+!  call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO )
+!  print *,A
+!  print *,jpvt
+!  deallocate(WORK,TAU)
+!  !stop
+!
+!  !LWORK=-1
+!  !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO )
+!
+!  !LWORK=-1
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648
+!  !LWORK=max(n,int(WORK(1)))
+!
+!  !deallocate(WORK)
+!  !allocate(WORK(LWORK))
+!  !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO)
+!  !
+!  !allocate(C(LDA,n))
+!  !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA)
+!  !norm = 0.0d0
+!  !B = 0.0d0
+!  !!print *,C
+!  !do i=1,m
+!  !  norm = 0.0d0
+!  !  do j=1,n
+!  !    norm = norm + C(j,i)*C(j,i)
+!  !  end do
+!  !  norm = 1.0d0/dsqrt(norm)
+!  !  do j=1,n
+!  !    B(j,i) = C(j,i)
+!  !  end do
+!  !end do
+!  !print *,B
+!
+!
+!  !deallocate(WORK,TAU)
+!end
+
+!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf")
+!  use iso_c_binding
+!  integer(c_int32_t), value      :: LDA
+!  integer(c_int32_t), value      :: m
+!  integer(c_int32_t), value      :: n
+!  integer(c_int16_t)             :: A(LDA,n)
+!  integer(c_int16_t)             :: B(LDA,n)
+!  call ortho_qr_withB(A,LDA,B,m,n)
+!end subroutine ortho_qr_csf
+
 subroutine ortho_qr(A,LDA,m,n)
   implicit none
   BEGIN_DOC
diff --git a/src/utils/loc.f b/src/utils/loc.f
index 6e5d7345..02693281 100644
--- a/src/utils/loc.f
+++ b/src/utils/loc.f
@@ -1,6 +1,6 @@
 c************************************************************************
       subroutine maxovl(n,m,s,t,w)
-C 
+C
 C     This subprogram contains an iterative procedure to find the
 C     unitary transformation of a set of n vectors which maximizes
 C     the sum of their square overlaps with a set of m reference
@@ -10,7 +10,7 @@ C     S: overlap matrix <ref|vec>
 C     T: rotation matrix
 C     W: new overlap matrix
 C
-C 
+C
       implicit real*8(a-h,o-y),logical*1(z)
 !      parameter (id1=700)
 !      dimension s(id1,id1),t(id1,id1),w(id1,id1)
@@ -29,23 +29,26 @@ C      conv=1.d-6
      * 5x,'following the principle of maximum overlap with a set of',
      * i3,' reference vectors'/5x,'required convergence on rotation ',
      * 'angle =',f13.10///5x,'Starting overlap matrix'/)
-      do 6 i=1,m
-      write (6,145) i
-    6 write (6,150) (s(i,j),j=1,n)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (s(i,j),j=1,n)
+      end do
     8 mm=m-1
       if (m.lt.n) mm=m
       iter=0
-      do 20 j=1,n
-      do 16 i=1,n
-      t(i,j)=0.d0
-   16 continue
-      do 18 i=1,m
-   18 w(i,j)=s(i,j)
-   20 t(j,j)=1.d0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        end do
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
       sum=0.d0
-      do 10 i=1,m
-      sum=sum+s(i,i)*s(i,i)
-   10 continue
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      end do
       sum=sum/m
       if (zprt) write (6,12) sum
    12 format (//5x,'Average square overlap =',f10.6)
@@ -54,18 +57,18 @@ C      conv=1.d-6
       j=1
    21 if (j.ge.last) goto 30
       sum=0.d0
-                                                                
-      do 22 i=1,n
-   22 sum=sum+s(i,j)*s(i,j)
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
       if (sum.gt.small) goto 28
-      do 24 i=1,n
-      sij=s(i,j)
-      s(i,j)=-s(i,last)
-      s(i,last)=sij
-      tij=t(i,j)
-      t(i,j)=-t(i,last)
-      t(i,last)=tij
-   24 continue
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
       last=last-1
       goto 21
    28 j=j+1
@@ -101,17 +104,18 @@ C      conv=1.d-6
       sine=1.d0
    34 delta=sine*(a*sine+b*cosine)
       if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta
-      do 35 k=1,m
-      p=s(k,i)*cosine-s(k,j)*sine
-      q=s(k,i)*sine+s(k,j)*cosine
-      s(k,i)=p
-   35 s(k,j)=q
-      do 40 k=1,n
-      p=t(k,i)*cosine-t(k,j)*sine
-      q=t(k,i)*sine+t(k,j)*cosine
-      t(k,i)=p
-      t(k,j)=q
-   40 continue
+      do k=1,m
+        p=s(k,i)*cosine-s(k,j)*sine
+        q=s(k,i)*sine+s(k,j)*cosine
+        s(k,i)=p
+        s(k,j)=q
+      enddo
+      do k=1,n
+        p=t(k,i)*cosine-t(k,j)*sine
+        q=t(k,i)*sine+t(k,j)*cosine
+        t(k,i)=p
+        t(k,j)=q
+      enddo
    45 d=dabs(sine)
       if (d.le.amax) goto 50
       imax=i
@@ -132,43 +136,50 @@ C      conv=1.d-6
      * 'in subroutine maxovl ***'//)
       stop
   100 continue
-      do 120 j=1,n
-      if (s(j,j).gt.0.d0) goto 120
-      do 105 i=1,m
-  105 s(i,j)=-s(i,j)
-      do 110 i=1,n
-  110 t(i,j)=-t(i,j)
-  120 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
       sum=0.d0
-      do 125 i=1,m
-  125 sum=sum+s(i,i)*s(i,i)
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
       sum=sum/m
-      do 122 i=1,m
-      do 122 j=1,n
-      sw=s(i,j)
-      s(i,j)=w(i,j)
-  122 w(i,j)=sw
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
       if (.not.zprt) return
       write (6,12) sum
       write (6,130)
   130 format (//5x,'transformation matrix')
-      do 140 i=1,n
-      write (6,145) i
-  140 write (6,150) (t(i,j),j=1,n)
+      do i=1,n
+        write (6,145) i
+        write (6,150) (t(i,j),j=1,n)
+      enddo
   145 format (i8)
   150 format (2x,10f12.8)
       write (6,160)
   160 format (//5x,'new overlap matrix'/)
-      do 170 i=1,m
-      write (6,145) i
-  170 write (6,150) (w(i,j),j=1,n)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (w(i,j),j=1,n)
+      enddo
       return
       end
 
 
 c************************************************************************
       subroutine maxovl_no_print(n,m,s,t,w)
-C 
+C
 C     This subprogram contains an iterative procedure to find the
 C     unitary transformation of a set of n vectors which maximizes
 C     the sum of their square overlaps with a set of m reference
@@ -178,7 +189,7 @@ C     S: overlap matrix <ref|vec>
 C     T: rotation matrix
 C     W: new overlap matrix
 C
-C 
+C
       implicit real*8(a-h,o-y),logical*1(z)
       parameter (id1=300)
       dimension s(id1,id1),t(id1,id1),w(id1,id1)
@@ -193,17 +204,19 @@ C      conv=1.d-6
     8 mm=m-1
       if (m.lt.n) mm=m
       iter=0
-      do 20 j=1,n
-      do 16 i=1,n
-      t(i,j)=0.d0
-   16 continue
-      do 18 i=1,m
-   18 w(i,j)=s(i,j)
-   20 t(j,j)=1.d0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        enddo
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
       sum=0.d0
-      do 10 i=1,m
-      sum=sum+s(i,i)*s(i,i)
-   10 continue
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
       sum=sum/m
    12 format (//5x,'Average square overlap =',f10.6)
       if (n.eq.1) goto 100
@@ -211,18 +224,19 @@ C      conv=1.d-6
       j=1
    21 if (j.ge.last) goto 30
       sum=0.d0
-                                                                
-      do 22 i=1,n
-   22 sum=sum+s(i,j)*s(i,j)
+
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
       if (sum.gt.small) goto 28
-      do 24 i=1,n
-      sij=s(i,j)
-      s(i,j)=-s(i,last)
-      s(i,last)=sij
-      tij=t(i,j)
-      t(i,j)=-t(i,last)
-      t(i,last)=tij
-   24 continue
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
       last=last-1
       goto 21
    28 j=j+1
@@ -232,50 +246,52 @@ C      conv=1.d-6
       jmax=0
       dmax=0.d0
       amax=0.d0
-      do 60 i=1,mm
-      ip=i+1
-      do 50 j=ip,n
-      a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
-      b=-s(i,i)*s(i,j)
-      if (j.gt.m) goto 31
-      a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
-      b=b+s(j,i)*s(j,j)
-   31 b=b+b
-      if (a.eq.0.d0) goto 32
-      ba=b/a
-      if (dabs(ba).gt.small) goto 32
-      if (a.gt.0.d0) goto 33
-      tang=-0.5d0*ba
-      cosine=1.d0/dsqrt(1.d0+tang*tang)
-      sine=tang*cosine
-      goto 34
-   32 tang=0.d0
-      if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
-      cosine=1.d0/dsqrt(1.d0+tang*tang)
-      sine=tang*cosine
-      goto 34
-   33 cosine=0.d0
-      sine=1.d0
-   34 delta=sine*(a*sine+b*cosine)
-      do 35 k=1,m
-      p=s(k,i)*cosine-s(k,j)*sine
-      q=s(k,i)*sine+s(k,j)*cosine
-      s(k,i)=p
-   35 s(k,j)=q
-      do 40 k=1,n
-      p=t(k,i)*cosine-t(k,j)*sine
-      q=t(k,i)*sine+t(k,j)*cosine
-      t(k,i)=p
-      t(k,j)=q
-   40 continue
-   45 d=dabs(sine)
-      if (d.le.amax) goto 50
-      imax=i
-      jmax=j
-      amax=d
-      dmax=delta
-   50 continue
-   60 continue
+      do i=1,mm
+        ip=i+1
+        do j=ip,n
+          a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+          b=-s(i,i)*s(i,j)
+          if (j.gt.m) goto 31
+          a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+          b=b+s(j,i)*s(j,j)
+  31      b=b+b
+          if (a.eq.0.d0) goto 32
+          ba=b/a
+          if (dabs(ba).gt.small) goto 32
+          if (a.gt.0.d0) goto 33
+          tang=-0.5d0*ba
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  32      tang=0.d0
+          if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  33      cosine=0.d0
+          sine=1.d0
+  34      delta=sine*(a*sine+b*cosine)
+          do k=1,m
+            p=s(k,i)*cosine-s(k,j)*sine
+            q=s(k,i)*sine+s(k,j)*cosine
+            s(k,i)=p
+            s(k,j)=q
+          enddo
+          do k=1,n
+            p=t(k,i)*cosine-t(k,j)*sine
+            q=t(k,i)*sine+t(k,j)*cosine
+            t(k,i)=p
+            t(k,j)=q
+          enddo
+  45      d=dabs(sine)
+          if (d.le.amax) goto 50
+          imax=i
+          jmax=j
+          amax=d
+          dmax=delta
+  50      continue
+        end do
+      end do
    70 format (' iter=',i4,' largest rotation=',f12.8,
      * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
    71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
@@ -285,22 +301,27 @@ C      conv=1.d-6
      * 'in subroutine maxovl ***'//)
       stop
   100 continue
-      do 120 j=1,n
-      if (s(j,j).gt.0.d0) goto 120
-      do 105 i=1,m
-  105 s(i,j)=-s(i,j)
-      do 110 i=1,n
-  110 t(i,j)=-t(i,j)
-  120 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
       sum=0.d0
-      do 125 i=1,m
-  125 sum=sum+s(i,i)*s(i,i)
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
       sum=sum/m
-      do 122 i=1,m
-      do 122 j=1,n
-      sw=s(i,j)
-      s(i,j)=w(i,j)
-  122 w(i,j)=sw
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
       return
       end
 
diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90
index 98e73470..ceaec874 100644
--- a/src/utils/map_module.f90
+++ b/src/utils/map_module.f90
@@ -238,11 +238,11 @@ subroutine cache_map_sort(map)
       iorder(i) = i
     enddo
     if (cache_key_kind == 2) then
-      call i2radix_sort(map%key,iorder,map%n_elements,-1)
+      call i2sort(map%key,iorder,map%n_elements,-1)
     else if (cache_key_kind == 4) then
-      call iradix_sort(map%key,iorder,map%n_elements,-1)
+      call isort(map%key,iorder,map%n_elements,-1)
     else if (cache_key_kind == 8) then
-      call i8radix_sort(map%key,iorder,map%n_elements,-1)
+      call i8sort(map%key,iorder,map%n_elements,-1)
     endif
     if (integral_kind == 4) then
       call set_order(map%value,iorder,map%n_elements)
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index 9c1d2445..a62c657e 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -92,9 +92,9 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
   overlap = overlap_x * overlap_y * overlap_z
 
 end
-              
+
 ! ---
-              
+
 subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx)
 
   BEGIN_DOC
@@ -151,4 +151,71 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over
 end
 
 
+! ---
 
+subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,&
+      power_B,overlap,dim, n_points)
+  implicit none
+  BEGIN_DOC
+  !.. math::
+  !
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2)  (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
+  !   S = S_x S_y S_z
+  !
+  END_DOC
+  include 'constants.include.F'
+  integer,intent(in)             :: dim, n_points
+  double precision,intent(in)    :: A_center(n_points,3),B_center(3)  ! center of the x1 functions
+  double precision, intent(in)   :: alpha,beta
+  integer,intent(in)             :: power_A(3), power_B(3) ! power of the x1 functions
+  double precision, intent(out)  :: overlap(n_points)
+  double precision               :: F_integral_tab(0:max_dim)
+  double precision               :: p, overlap_x, overlap_y, overlap_z
+  double precision, allocatable  :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:)
+  integer                        :: iorder_p(3), ipoint, ldp
+  integer                        :: nmax
+  double precision               :: F_integral
+
+  ldp = maxval( power_A(1:3) + power_B(1:3) )
+  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), &
+           fact_pp(n_points), pp(n_points))
+
+  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points)
+
+  nmax = maxval(iorder_p)
+  do i=0, nmax
+    F_integral_tab(i) = F_integral(i,p)
+  enddo
+
+  integer                        :: i
+
+  call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points)
+
+  do ipoint=1,n_points
+    if(fact_p(ipoint).lt.1d-20)then
+      overlap(ipoint) = 1.d-10
+      cycle
+    endif
+
+    overlap_x = P_new(ipoint,0,1) * F_integral_tab(0)
+    do i = 1,iorder_p(1)
+      overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i)
+    enddo
+
+    overlap_y = P_new(ipoint,0,2) * F_integral_tab(0)
+    do i = 1,iorder_p(2)
+      overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i)
+    enddo
+
+    overlap_z = P_new(ipoint,0,3) * F_integral_tab(0)
+    do i = 1,iorder_p(3)
+      overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i)
+    enddo
+
+    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint)
+  enddo
+
+  deallocate(P_new, P_center, fact_p, pp, fact_pp)
+end
+
+! ---
diff --git a/src/utils/qsort.c b/src/utils/qsort.c
new file mode 100644
index 00000000..c011b35a
--- /dev/null
+++ b/src/utils/qsort.c
@@ -0,0 +1,373 @@
+/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
+#include <stdlib.h>
+#include <stdint.h>
+
+struct int16_t_comp {
+  int16_t    x;
+  int32_t i;
+};
+
+int compare_int16_t( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
+}
+
+
+struct int16_t_comp_big {
+  int16_t    x;
+  int64_t i;
+};
+
+int compare_int16_t_big( const void * l, const void * r )
+{
+  const int16_t * restrict _l= l;
+  const int16_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
+}
+
+
+struct int32_t_comp {
+  int32_t    x;
+  int32_t i;
+};
+
+int compare_int32_t( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
+}
+
+
+struct int32_t_comp_big {
+  int32_t    x;
+  int64_t i;
+};
+
+int compare_int32_t_big( const void * l, const void * r )
+{
+  const int32_t * restrict _l= l;
+  const int32_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
+}
+
+
+struct int64_t_comp {
+  int64_t    x;
+  int32_t i;
+};
+
+int compare_int64_t( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
+}
+
+
+struct int64_t_comp_big {
+  int64_t    x;
+  int64_t i;
+};
+
+int compare_int64_t_big( const void * l, const void * r )
+{
+  const int64_t * restrict _l= l;
+  const int64_t * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
+}
+
+
+struct double_comp {
+  double    x;
+  int32_t i;
+};
+
+int compare_double( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct double_comp* A = malloc(isize * sizeof(struct double_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx(double* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
+}
+
+
+struct double_comp_big {
+  double    x;
+  int64_t i;
+};
+
+int compare_double_big( const void * l, const void * r )
+{
+  const double * restrict _l= l;
+  const double * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_double_noidx_big(double* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
+}
+
+
+struct float_comp {
+  float    x;
+  int32_t i;
+};
+
+int compare_float( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct float_comp* A = malloc(isize * sizeof(struct float_comp));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx(float* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
+}
+
+
+struct float_comp_big {
+  float    x;
+  int64_t i;
+};
+
+int compare_float_big( const void * l, const void * r )
+{
+  const float * restrict _l= l;
+  const float * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
+  struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_float_noidx_big(float* A, int64_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
+}
+/* Generated C file:1 ends here */
diff --git a/src/utils/qsort.org b/src/utils/qsort.org
new file mode 100644
index 00000000..abf0d54a
--- /dev/null
+++ b/src/utils/qsort.org
@@ -0,0 +1,169 @@
+#+TITLE: Quick sort binding for Fortran
+
+* C template
+
+#+NAME: c_template
+#+BEGIN_SRC c
+struct TYPE_comp_big {
+  TYPE    x;
+  int32_t i;
+};
+
+int compare_TYPE_big( const void * l, const void * r )
+{
+  const TYPE * restrict _l= l;
+  const TYPE * restrict _r= r;
+  if( *_l > *_r ) return 1;
+  if( *_l < *_r ) return -1;
+  return 0;
+}
+
+void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
+  struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
+  if (A == NULL) return;
+
+  for (int i=0 ; i<isize ; ++i) {
+    A[i].x = A_in[i];
+    A[i].i = iorder[i];
+  }
+
+  qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
+
+  for (int i=0 ; i<isize ; ++i) {
+    A_in[i] = A[i].x;
+    iorder[i] = A[i].i;
+  }
+  free(A);
+}
+
+void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
+  qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
+}
+#+END_SRC
+
+* Fortran template
+
+#+NAME:f_template
+#+BEGIN_SRC f90
+subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_big_c
+
+subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
+  use iso_c_binding
+  integer(c_int32_t), value :: isize
+  real   (c_TYPE)         :: A(isize)
+end subroutine Lsort_noidx_big_c
+
+#+END_SRC
+
+#+NAME:f_template2
+#+BEGIN_SRC f90
+subroutine Lsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_TYPE)         :: A(isize)
+  call Lsort_big_c(A, iorder, isize)
+end subroutine Lsort_big
+
+subroutine Lsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_TYPE)    :: A(isize)
+  call Lsort_noidx_big_c(A, isize)
+end subroutine Lsort_noidx_big
+
+#+END_SRC
+
+* Python scripts for type replacements
+
+#+NAME: replaced
+#+begin_src python :results output :noweb yes
+data = """
+<<c_template>>
+"""
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+#+NAME: replaced_f2
+#+begin_src python :results output :noweb yes
+data = """
+<<f_template2>>
+"""
+c1 = {
+    "int16_t": "i2",
+    "int32_t": "i",
+    "int64_t": "i8",
+    "double": "d",
+    "float": ""
+}
+c2 = {
+    "int16_t": "integer",
+    "int32_t": "integer",
+    "int64_t": "integer",
+    "double": "real",
+    "float": "real"
+}
+
+for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
+    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
+#+end_src
+
+* Generated C file
+  
+#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
+#include <stdlib.h>
+#include <stdint.h>
+<<replaced()>>
+#+END_SRC
+
+* Generated Fortran file
+
+#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     <<replaced_f()>>
+  end interface
+
+end module qsort_module
+
+<<replaced_f2()>>
+
+#+END_SRC
+
diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90
new file mode 100644
index 00000000..a72a4f9e
--- /dev/null
+++ b/src/utils/qsort_module.f90
@@ -0,0 +1,347 @@
+module qsort_module
+  use iso_c_binding
+  
+  interface
+     
+     subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_c
+     
+     subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_c
+     
+     
+     
+     subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_big_c
+     
+     subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int16_t)         :: A(isize)
+     end subroutine i2sort_noidx_big_c
+     
+     
+     
+     subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_c
+     
+     subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_c
+     
+     
+     
+     subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_big_c
+     
+     subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int32_t)         :: A(isize)
+     end subroutine isort_noidx_big_c
+     
+     
+     
+     subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_c
+     
+     subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_c
+     
+     
+     
+     subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_big_c
+     
+     subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer   (c_int64_t)         :: A(isize)
+     end subroutine i8sort_noidx_big_c
+     
+     
+     
+     subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_c
+     
+     subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_c
+     
+     
+     
+     subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_big_c
+     
+     subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_double)         :: A(isize)
+     end subroutine dsort_noidx_big_c
+     
+     
+     
+     subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       integer(c_int32_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_c
+     
+     subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
+       use iso_c_binding
+       integer(c_int32_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_c
+     
+     
+     
+     subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       integer(c_int64_t)        :: iorder(isize)
+       real   (c_float)         :: A(isize)
+     end subroutine sort_big_c
+     
+     subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
+       use iso_c_binding
+       integer(c_int64_t), value :: isize
+       real   (c_float)         :: A(isize)
+     end subroutine sort_noidx_big_c
+     
+     
+     
+  end interface
+
+end module qsort_module
+
+
+subroutine i2sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_c(A, iorder, isize)
+end subroutine i2sort
+
+subroutine i2sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_c(A, isize)
+end subroutine i2sort_noidx
+
+
+
+subroutine i2sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int16_t)         :: A(isize)
+  call i2sort_big_c(A, iorder, isize)
+end subroutine i2sort_big
+
+subroutine i2sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int16_t)    :: A(isize)
+  call i2sort_noidx_big_c(A, isize)
+end subroutine i2sort_noidx_big
+
+
+
+subroutine isort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_c(A, iorder, isize)
+end subroutine isort
+
+subroutine isort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_c(A, isize)
+end subroutine isort_noidx
+
+
+
+subroutine isort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int32_t)         :: A(isize)
+  call isort_big_c(A, iorder, isize)
+end subroutine isort_big
+
+subroutine isort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int32_t)    :: A(isize)
+  call isort_noidx_big_c(A, isize)
+end subroutine isort_noidx_big
+
+
+
+subroutine i8sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_c(A, iorder, isize)
+end subroutine i8sort
+
+subroutine i8sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_c(A, isize)
+end subroutine i8sort_noidx
+
+
+
+subroutine i8sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  integer   (c_int64_t)         :: A(isize)
+  call i8sort_big_c(A, iorder, isize)
+end subroutine i8sort_big
+
+subroutine i8sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  integer   (c_int64_t)    :: A(isize)
+  call i8sort_noidx_big_c(A, isize)
+end subroutine i8sort_noidx_big
+
+
+
+subroutine dsort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_c(A, iorder, isize)
+end subroutine dsort
+
+subroutine dsort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_c(A, isize)
+end subroutine dsort_noidx
+
+
+
+subroutine dsort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_double)         :: A(isize)
+  call dsort_big_c(A, iorder, isize)
+end subroutine dsort_big
+
+subroutine dsort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_double)    :: A(isize)
+  call dsort_noidx_big_c(A, isize)
+end subroutine dsort_noidx_big
+
+
+
+subroutine sort(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int32_t)        :: isize
+  integer(c_int32_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_c(A, iorder, isize)
+end subroutine sort
+
+subroutine sort_noidx(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int32_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_c(A, isize)
+end subroutine sort_noidx
+
+
+
+subroutine sort_big(A, iorder, isize) 
+  use qsort_module
+  use iso_c_binding
+  integer(c_int64_t)        :: isize
+  integer(c_int64_t)        :: iorder(isize)
+  real   (c_float)         :: A(isize)
+  call sort_big_c(A, iorder, isize)
+end subroutine sort_big
+
+subroutine sort_noidx_big(A, isize)
+  use iso_c_binding
+  use qsort_module
+  integer(c_int64_t) :: isize
+  real   (c_float)    :: A(isize)
+  call sort_noidx_big_c(A, isize)
+end subroutine sort_noidx_big
diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f
index a63eb4a3..089c3871 100644
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@@ -1,222 +1,4 @@
 BEGIN_TEMPLATE
- subroutine insertion_$Xsort (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  $type                          :: xtmp
-  integer                        :: i, i0, j, jmax
-
-  do i=2,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j=i-1
-    do while (j>0)
-      if ((x(j) <= xtmp)) exit
-      x(j+1) = x(j)
-      iorder(j+1) = iorder(j)
-      j=j-1
-    enddo
-    x(j+1) = xtmp
-    iorder(j+1) = i0
-  enddo
- end subroutine insertion_$Xsort
-
- subroutine quick_$Xsort(x, iorder, isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the quicksort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer, external              :: omp_get_num_threads
-  call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
- end
-
- recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
-  implicit none
-  integer, intent(in)            :: isize, first, last, level
-  integer,intent(inout)          :: iorder(isize)
-  $type, intent(inout)           :: x(isize)
-  $type                          :: c, tmp
-  integer                        :: itmp
-  integer                        :: i, j
-
-  if(isize<2)return
-
-  c = x( shiftr(first+last,1) )
-  i = first
-  j = last
-  do
-    do while (x(i) < c)
-      i=i+1
-    end do
-    do while (c < x(j))
-      j=j-1
-    end do
-    if (i >= j) exit
-    tmp  = x(i)
-    x(i) = x(j)
-    x(j) = tmp
-    itmp      = iorder(i)
-    iorder(i) = iorder(j)
-    iorder(j) = itmp
-    i=i+1
-    j=j-1
-  enddo
-  if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  else
-    if (first < i-1) then
-      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
-    endif
-    if (j+1 < last) then
-      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
-    endif
-  endif
- end
-
- subroutine heap_$Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-
-  integer                        :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
- end subroutine heap_$Xsort
-
- subroutine heap_$Xsort_big(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the heap sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-
-  integer*8                      :: i, k, j, l, i0
-  $type                          :: xtemp
-
-  l = isize/2+1
-  k = isize
-  do while (.True.)
-    if (l>1) then
-      l=l-1
-      xtemp = x(l)
-      i0 = iorder(l)
-    else
-      xtemp = x(k)
-      i0 = iorder(k)
-      x(k) = x(1)
-      iorder(k) = iorder(1)
-      k = k-1
-      if (k == 1) then
-        x(1) = xtemp
-        iorder(1) = i0
-        exit
-      endif
-    endif
-    i=l
-    j = shiftl(l,1)
-    do while (j<k)
-      if ( x(j) < x(j+1) ) then
-        j=j+1
-      endif
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    enddo
-    if (j==k) then
-      if (xtemp < x(j)) then
-        x(i) = x(j)
-        iorder(i) = iorder(j)
-        i = j
-        j = shiftl(j,1)
-      else
-        j = k+1
-      endif
-    endif
-    x(i) = xtemp
-    iorder(i) = i0
-  enddo
-
- end subroutine heap_$Xsort_big
 
  subroutine sorted_$Xnumber(x,isize,n)
   implicit none
@@ -250,220 +32,6 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-!---------------------- INTEL
-IRP_IF INTEL
-
-BEGIN_TEMPLATE
- subroutine $Xsort(x,iorder,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
-  deallocate(tmp)
-  iorder(1:isize) = iorder(1:isize)+1
-  call $Xset_order(x,iorder,isize)
- end
-
- subroutine $Xsort_noidx(x,isize)
-  use intel
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer                        :: n
-  character, allocatable         :: tmp(:)
-  if (isize < 2) return
-  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
-  allocate(tmp(n))
-  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
-  deallocate(tmp)
- end
-
-SUBST [ X, type, ityp, n, ippsz ]
-   ; real ; 32f ; 4 ; 13 ;;
- i ; integer ; 32s ; 4 ; 11 ;;
- i2 ; integer*2 ; 16s ; 2 ; 7 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-    call $Xradix_sort(x,iorder,isize,-1)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i8 ; integer*8 ;;
-END_TEMPLATE
-
-!---------------------- END INTEL
-IRP_ELSE
-!---------------------- NON-INTEL
-BEGIN_TEMPLATE
-
- subroutine $Xsort_noidx(x,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer, allocatable           :: iorder(:)
-  integer                        :: i
-  allocate(iorder(isize))
-  do i=1,isize
-   iorder(i)=i
-  enddo
-  call $Xsort(x,iorder,isize)
-  deallocate(iorder)
- end subroutine $Xsort_noidx
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-!  call sorted_$Xnumber(x,isize,n)
-!  if (isize == n) then
-!    return
-!  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-!    call heap_$Xsort(x,iorder,isize)
-    call quick_$Xsort(x,iorder,isize)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
-END_TEMPLATE
-
-BEGIN_TEMPLATE
-
- subroutine $Xsort(x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize).
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  END_DOC
-  integer,intent(in)             :: isize
-  $type,intent(inout)            :: x(isize)
-  integer,intent(inout)          :: iorder(isize)
-  integer                        :: n
-  if (isize < 2) then
-    return
-  endif
-  call sorted_$Xnumber(x,isize,n)
-  if (isize == n) then
-    return
-  endif
-  if ( isize < 32) then
-    call insertion_$Xsort(x,iorder,isize)
-  else
-    call $Xradix_sort(x,iorder,isize,-1)
-  endif
- end subroutine $Xsort
-
-SUBST [ X, type ]
- i ; integer ;;
- i8 ; integer*8 ;;
- i2 ; integer*2 ;;
-END_TEMPLATE
-
-IRP_ENDIF
-!---------------------- END NON-INTEL
-
-
 
 BEGIN_TEMPLATE
  subroutine $Xset_order(x,iorder,isize)
@@ -489,47 +57,6 @@ BEGIN_TEMPLATE
   deallocate(xtmp)
  end
 
-SUBST [ X, type ]
-   ; real ;;
- d ; double precision ;;
- i ; integer ;;
- i8; integer*8 ;;
- i2; integer*2 ;;
-END_TEMPLATE
-
-
-BEGIN_TEMPLATE
- subroutine insertion_$Xsort_big (x,iorder,isize)
-  implicit none
-  BEGIN_DOC
-  ! Sort array x(isize) using the insertion sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! This is a version for very large arrays where the indices need
-  ! to be in integer*8 format
-  END_DOC
-  integer*8,intent(in)           :: isize
-  $type,intent(inout)            :: x(isize)
-  integer*8,intent(inout)        :: iorder(isize)
-  $type                          :: xtmp
-  integer*8                      :: i, i0, j, jmax
-
-  do i=2_8,isize
-    xtmp = x(i)
-    i0 = iorder(i)
-    j = i-1_8
-    do while (j>0_8)
-      if (x(j)<=xtmp) exit
-      x(j+1_8) = x(j)
-      iorder(j+1_8) = iorder(j)
-      j = j-1_8
-    enddo
-    x(j+1_8) = xtmp
-    iorder(j+1_8) = i0
-  enddo
-
- end subroutine insertion_$Xsort_big
-
  subroutine $Xset_order_big(x,iorder,isize)
   implicit none
   BEGIN_DOC
@@ -563,223 +90,3 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
-BEGIN_TEMPLATE
-
-recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
-  implicit none
-
-  BEGIN_DOC
-  ! Sort integer array x(isize) using the radix sort algorithm.
-  ! iorder in input should be (1,2,3,...,isize), and in output
-  ! contains the new order of the elements.
-  ! iradix should be -1 in input.
-  END_DOC
-  integer*$int_type, intent(in)  :: isize
-  integer*$int_type, intent(inout) :: iorder(isize)
-  integer*$type, intent(inout)   :: x(isize)
-  integer, intent(in)            :: iradix
-  integer                        :: iradix_new
-  integer*$type, allocatable     :: x2(:), x1(:)
-  integer*$type                  :: i4               ! data type
-  integer*$int_type, allocatable :: iorder1(:),iorder2(:)
-  integer*$int_type              :: i0, i1, i2, i3, i ! index type
-  integer*$type                  :: mask
-  integer                        :: err
-  !DIR$ ATTRIBUTES ALIGN : 128   :: iorder1,iorder2, x2, x1
-
-  if (isize < 2) then
-    return
-  endif
-
-  if (iradix == -1) then ! Sort Positive and negative
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-    do i=1_$int_type,isize
-      if (x(i) < 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = -x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i2
-      iorder(i1+i) = iorder2(i)
-      x(i1+i) = x2(i)
-    enddo
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i1 > 1_$int_type) then
-      call $Xradix_sort$big(x1,iorder1,i1,-2)
-      do i=1_$int_type,i1
-        x(i) = -x1(1_$int_type+i1-i)
-        iorder(i) = iorder1(1_$int_type+i1-i)
-      enddo
-    endif
-
-    if (i2>1_$int_type) then
-      call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
-    endif
-
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-    return
-
-  else if (iradix == -2) then ! Positive
-
-    ! Find most significant bit
-
-    i0 = 0_$int_type
-    i4 = maxval(x)
-
-    iradix_new = max($integer_size-1-leadz(i4),1)
-    mask = ibset(0_$type,iradix_new)
-
-    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to allocate arrays'
-      stop
-    endif
-
-    i1=1_$int_type
-    i2=1_$int_type
-
-    do i=1_$int_type,isize
-      if (iand(mask,x(i)) == 0_$type) then
-        iorder1(i1) = iorder(i)
-        x1(i1) = x(i)
-        i1 = i1+1_$int_type
-      else
-        iorder2(i2) = iorder(i)
-        x2(i2) = x(i)
-        i2 = i2+1_$int_type
-      endif
-    enddo
-    i1=i1-1_$int_type
-    i2=i2-1_$int_type
-
-    do i=1_$int_type,i1
-      iorder(i0+i) = iorder1(i)
-      x(i0+i) = x1(i)
-    enddo
-    i0 = i0+i1
-    i3 = i0
-    deallocate(x1,iorder1,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
-      stop
-    endif
-
-
-    do i=1_$int_type,i2
-      iorder(i0+i) = iorder2(i)
-      x(i0+i) = x2(i)
-    enddo
-    i0 = i0+i2
-    deallocate(x2,iorder2,stat=err)
-    if (err /= 0) then
-      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
-      stop
-    endif
-
-
-    if (i3>1_$int_type) then
-      call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
-    endif
-
-    if (isize-i3>1_$int_type) then
-      call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
-    endif
-
-    return
-  endif
-
-  ASSERT (iradix >= 0)
-
-  if (isize < 48) then
-    call insertion_$Xsort$big(x,iorder,isize)
-    return
-  endif
-
-
-  allocate(x2(isize),iorder2(isize),stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x1, iorder1'
-    stop
-  endif
-
-
-  mask = ibset(0_$type,iradix)
-  i0=1_$int_type
-  i1=1_$int_type
-
-  do i=1_$int_type,isize
-    if (iand(mask,x(i)) == 0_$type) then
-      iorder(i0) = iorder(i)
-      x(i0) = x(i)
-      i0 = i0+1_$int_type
-    else
-      iorder2(i1) = iorder(i)
-      x2(i1) = x(i)
-      i1 = i1+1_$int_type
-    endif
-  enddo
-  i0=i0-1_$int_type
-  i1=i1-1_$int_type
-
-  do i=1_$int_type,i1
-    iorder(i0+i) = iorder2(i)
-    x(i0+i) = x2(i)
-  enddo
-
-  deallocate(x2,iorder2,stat=err)
-  if (err /= 0) then
-    print *,  irp_here, ': Unable to allocate arrays x2, iorder2'
-    stop
-  endif
-
-
-  if (iradix == 0) then
-    return
-  endif
-
-
-  if (i1>1_$int_type) then
-    call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
-  endif
-  if (i0>1) then
-    call $Xradix_sort$big(x,iorder,i0,iradix-1)
-  endif
-
- end
-
-SUBST [ X, type, integer_size, is_big, big, int_type ]
- i  ; 4 ; 32 ; .False. ;      ; 4 ;;
- i8 ; 8 ; 64 ; .False. ;      ; 4 ;;
- i2 ; 2 ; 16 ; .False. ;      ; 4 ;;
- i  ; 4 ; 32 ; .True.  ; _big ; 8 ;;
- i8 ; 8 ; 64 ; .True.  ; _big ; 8 ;;
-END_TEMPLATE
-
-
-
diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f
index 184d8052..c9e35ee1 100644
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@@ -430,3 +430,28 @@ subroutine lowercase(txt,n)
   enddo
 end
 
+subroutine v2_over_x(v,x,res)
+
+  !BEGIN_DOC
+  ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0
+  !END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: v, x
+  double precision, intent(out) :: res
+
+  double precision :: delta_E, tmp, val
+
+  res = 0d0
+  delta_E = x
+  if (v == 0.d0) return
+
+  val = 2d0 * v
+  tmp = dsqrt(delta_E * delta_E + val * val)
+  if (delta_E < 0.d0) then
+      tmp = -tmp
+  endif
+  res = 0.5d0 * (tmp - delta_E)
+
+end
diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg
new file mode 100644
index 00000000..9c9f6248
--- /dev/null
+++ b/src/utils_trust_region/EZFIO.cfg
@@ -0,0 +1,89 @@
+[thresh_delta]
+type: double precision
+doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[thresh_rho]
+type: double precision
+doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho)
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_eig]
+type: double precision
+doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[thresh_model]
+type: double precision
+doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[absolute_eig]
+type: logical
+doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region)
+interface: ezfio,provider,ocaml
+default: false
+
+[thresh_wtg]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_wtg2]
+type: double precision
+doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[avoid_saddle]
+type: logical
+doc: Test to avoid saddle point, active if true
+interface: ezfio,provider,ocaml
+default: false
+
+[version_avoid_saddle]
+type: integer
+doc: cf. trust region, not stable
+interface: ezfio,provider,ocaml
+default: 3
+
+[thresh_rho_2]
+type: double precision
+doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected
+interface: ezfio,provider,ocaml
+default: 0.1
+
+[thresh_cc]
+type: double precision
+doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc)
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_model_2]
+type: double precision
+doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[version_lambda_search]
+type: integer
+doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 ->  1/||x||^2 - 1/delta^2 .eq. 0
+interface: ezfio,provider,ocaml
+default: 2
+
+[nb_it_max_lambda]
+type: integer
+doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 100
+
+[nb_it_max_pre_search]
+type: integer
+doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm
+interface: ezfio,provider,ocaml
+default: 40
diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED
new file mode 100644
index 00000000..1a65ce38
--- /dev/null
+++ b/src/utils_trust_region/NEED
@@ -0,0 +1 @@
+hartree_fock
diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst
new file mode 100644
index 00000000..6a0689b6
--- /dev/null
+++ b/src/utils_trust_region/README.rst
@@ -0,0 +1,5 @@
+============
+trust_region
+============
+
+The documentation can be found in the org files.
diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh
new file mode 100755
index 00000000..059cbe7d
--- /dev/null
+++ b/src/utils_trust_region/TANGLE_org_mode.sh
@@ -0,0 +1,7 @@
+#!/bin/sh   
+
+list='ls *.org'
+for element in $list    
+do   
+		emacs --batch $element -f org-babel-tangle
+done
diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f
new file mode 100644
index 00000000..eac17275
--- /dev/null
+++ b/src/utils_trust_region/algo_trust.irp.f
@@ -0,0 +1,248 @@
+! Algorithm for the trust region
+
+! step_in_trust_region:
+! Computes the step in the trust region (delta)
+! (automatically sets at the iteration 0 and which evolves during the
+! process in function of the evolution of rho). The step is computing by
+! constraining its norm with a lagrange multiplier.
+! Since the calculation of the step is based on the Newton method, an
+! estimation of the gain in energy is given using the Taylors series
+! truncated at the second order (criterion_model).
+! If (DABS(criterion-criterion_model) < 1d-12) then 
+!   must_exit = .True.
+! else 
+!   must_exit = .False.
+
+! This estimation of the gain in energy is used by
+! is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+! If the step must be cancelled, the calculation restart from the same
+! hessian and gradient and recomputes the step but in a smaller trust
+! region and so on until the step is accepted. If the step is accepted
+! the hessian and the gradient are recomputed to produce a new step.
+
+! Example:
+
+
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+
+
+
+! Variables:
+
+! Input:
+! | n              | integer          | m*(m-1)/2                                                                      |
+! | m              | integer          | number of mo in the mo_class                                                   |
+! | H(n,n)         | double precision | Hessian                                                                        |
+! | v_grad(n)      | double precision | Gradient                                                                       |
+! | W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+! | e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+! | criterion      | double precision | Actual criterion                                                               |
+! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+! | rho            | double precision | Given by is_step_cancel_trus_region                                            |
+! |                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+! | nb_iter        | integer          | Actual number of iterations                                                    |
+
+! Input/output:
+! | delta           | double precision | Radius of the trust region                                                      |
+
+! Output:
+! | criterion_model | double precision | Predicted criterion after the rotation                                          |
+! | x(n)            | double precision | Step                                                                            |
+! | must_exit       | logical          | If the program must exit the loop                                               |
+
+
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+
+
+
+! Variables:
+
+! Input:
+! | nb_iter         | integer          | actual number of iterations                    |
+! | prev_criterion  | double precision | criterion before the application of the step x |
+! | criterion       | double precision | criterion after the application of the step x  |
+! | criterion_model | double precision | predicted criterion after the application of x |
+
+! Output:
+! | rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+! | cancel_step | logical          | If the step must be cancelled                                        |
+
+
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org
new file mode 100644
index 00000000..aa836f98
--- /dev/null
+++ b/src/utils_trust_region/algo_trust.org
@@ -0,0 +1,593 @@
+* Algorithm for the trust region
+
+step_in_trust_region:
+Computes the step in the trust region (delta)
+(automatically sets at the iteration 0 and which evolves during the
+process in function of the evolution of rho). The step is computing by
+constraining its norm with a lagrange multiplier.
+Since the calculation of the step is based on the Newton method, an
+estimation of the gain in energy is given using the Taylors series
+truncated at the second order (criterion_model).
+If (DABS(criterion-criterion_model) < 1d-12) then 
+  must_exit = .True.
+else 
+  must_exit = .False.
+
+This estimation of the gain in energy is used by
+is_step_cancel_trust_region to say if the step is accepted or cancelled. 
+
+If the step must be cancelled, the calculation restart from the same
+hessian and gradient and recomputes the step but in a smaller trust
+region and so on until the step is accepted. If the step is accepted
+the hessian and the gradient are recomputed to produce a new step.
+
+Example:
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+!  !### Initialization ###
+!  delta = 0d0
+!  nb_iter = 0 ! Must start at 0 !!!
+!  rho = 0.5d0
+!  not_converged = .True.
+!
+!  ! ### TODO ###
+!  ! Compute the criterion before the loop
+!  call #your_criterion(prev_criterion)
+!
+!  do while (not_converged)
+!    ! ### TODO ## 
+!    ! Call your gradient
+!    ! Call you hessian
+!    call #your_gradient(v_grad) (1D array)
+!    call #your_hessian(H) (2D array) 
+!
+!    ! ### TODO ###
+!    ! Diagonalization of the hessian 
+!    call diagonalization_hessian(n,H,e_val,w)
+!
+!    cancel_step = .True. ! To enter in the loop just after 
+!    ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+!    do while (cancel_step)
+!
+!      ! Hessian,gradient,Criterion -> x 
+!      call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
+!
+!      if (must_exit) then
+!        ! ### Message ###
+!        ! if step_in_trust_region sets must_exit on true for numerical reasons
+!        print*,'algo_trust1 sends the message : Exit'
+!        !### exit ###
+!      endif
+!
+!      !### TODO ###  
+!      ! Compute x -> m_x
+!      ! Compute m_x -> R
+!      ! Apply R and keep the previous MOs...
+!      ! Update/touch 
+!      ! Compute the new criterion/energy -> criterion
+!
+!      call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
+!      call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
+!      call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
+!
+!      TOUCH #your_variables
+!      
+!      call #your_criterion(criterion)
+!
+!      ! Criterion -> step accepted or rejected 
+!      call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+!
+!      ! ### TODO ###
+!      !if (cancel_step) then
+!      ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
+!      !endif
+!      #if (cancel_step) then
+!        #mo_coef = prev_mos
+!      #endif
+!
+!    enddo
+!
+!    !call save_mos() !### depend of the time for 1 iteration
+!
+!    ! To exit the external loop if must_exit = .True.
+!    if (must_exit) then
+!      !### exit ###
+!    endif 
+!
+!    ! Step accepted, nb iteration + 1
+!    nb_iter = nb_iter + 1
+!
+!    ! ### TODO ###
+!    !if (###Conditions###) then
+!    ! no_converged = .False.
+!    !endif
+!    #if (#your_conditions) then
+!    #  not_converged = .False.
+!    #endif
+!
+!  enddo
+#+END_SRC
+
+Variables:
+
+Input:
+| n              | integer          | m*(m-1)/2                                                                      |
+| m              | integer          | number of mo in the mo_class                                                   |
+| H(n,n)         | double precision | Hessian                                                                        |
+| v_grad(n)      | double precision | Gradient                                                                       |
+| W(n,n)         | double precision | Eigenvectors of the hessian                                                    |
+| e_val(n)       | double precision | Eigenvalues of the hessian                                                     |
+| criterion      | double precision | Actual criterion                                                               |
+| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
+| rho            | double precision | Given by is_step_cancel_trus_region                                            |
+|                |                  | Agreement between the real function and the Taylor series (2nd order)          |
+| nb_iter        | integer          | Actual number of iterations                                                    |
+
+Input/output:
+| delta           | double precision | Radius of the trust region                                                      |
+
+Output:
+| criterion_model | double precision | Predicted criterion after the rotation                                          |
+| x(n)            | double precision | Step                                                                            |
+| must_exit       | logical          | If the program must exit the loop                                               |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the step and the expected criterion/energy after the step
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)             :: n, nb_iter
+  double precision, intent(in)    :: H(n,n), W(n,n), v_grad(n)
+  double precision, intent(in)    :: rho, prev_criterion
+
+  ! inout
+  double precision, intent(inout) :: delta, e_val(n)
+
+  ! out
+  double precision, intent(out)   :: criterion_model, x(n)
+  logical, intent(out)            :: must_exit
+
+  ! internal
+  integer :: info
+
+  must_exit = .False.
+  
+  call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
+
+  call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
+
+  ! exit if DABS(prev_criterion - criterion_model) < 1d-12
+  if (DABS(prev_criterion - criterion_model) < thresh_model) then
+    print*,''
+    print*,'###############################################################################'
+    print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
+    print*,'###############################################################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  if (delta < thresh_delta) then
+    print*,''
+    print*,'##############################################'
+    print*,'Delta <', thresh_delta, 'stop the trust region'
+    print*,'##############################################'
+    print*,''
+    must_exit = .True.
+  endif
+
+  ! Add after the call to this subroutine, a statement:
+  ! "if (must_exit) then
+  !   exit
+  ! endif"
+  ! in order to exit the optimization loop
+
+end subroutine
+#+END_SRC
+
+Variables:
+
+Input:
+| nb_iter         | integer          | actual number of iterations                    |
+| prev_criterion  | double precision | criterion before the application of the step x |
+| criterion       | double precision | criterion after the application of the step x  |
+| criterion_model | double precision | predicted criterion after the application of x |
+
+Output:
+| rho         | double precision | Agreement between the predicted criterion and the real new criterion |
+| cancel_step | logical          | If the step must be cancelled                                        |
+
+#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
+subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute if the step should be cancelled
+  END_DOC
+
+  implicit none
+ 
+  ! in
+  double precision, intent(in)  :: prev_criterion, criterion, criterion_model
+  
+  ! inout
+  integer, intent(inout)        :: nb_iter
+
+  ! out
+  logical, intent(out)          :: cancel_step
+  double precision, intent(out) :: rho
+
+  ! Computes rho
+  call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
+  
+  if (nb_iter == 0) then
+    nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled  
+  endif
+
+  ! If rho < thresh_rho -> give something in output to cancel the step
+  if (rho >= thresh_rho) then !0.1d0) then
+     ! The step is accepted
+     cancel_step = .False.
+  else
+     ! The step is rejected
+     cancel_step = .True.
+     print*, '***********************'
+     print*, 'Step cancel : rho <', thresh_rho
+     print*, '***********************'
+  endif
+  
+end subroutine
+#+END_SRC
+
+** Template for MOs
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt
+subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
+
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:)
+  double precision, allocatable :: prev_mos(:,:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit, enforce_step_cancellation
+  integer                       :: nb_iter, info, nb_sub_iter
+  integer                       :: i,j,tmp_i,tmp_j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size))
+  allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num))
+  allocate(prev_mos(ao_num, mo_num))
+
+  ! Provide the criterion, but unnecessary because it's done 
+  ! automatically
+  PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      ! The new hessian and gradient are computed at the end of the previous iteration
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! 1D tmp -> 2D tmp 
+          call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
+
+          ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
+          call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation)
+
+          if (enforce_step_cancellation) then
+            print*, 'Forces the step cancellation, too large error in the rotation matrix'
+            rho = 0d0
+            cycle
+          endif
+
+          ! tmp_R to R, subspace to full space
+          call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
+
+          ! Rotation of the MOs
+          call apply_mo_rotation(R, prev_mos)
+
+          ! touch mo_coef
+          call clear_mo_map ! Only if you are using the bi-electronic integrals
+          ! mo_coef becomes valid
+          ! And avoid the recomputation of the providers which depend of mo_coef
+          TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! To enforce the program to provide new criterion after the update
+          ! of the parameters
+          FREE C_PROVIDER
+          PROVIDE C_PROVIDER
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancellation of the step ?
+          if (cancel_step) then
+              ! Replacement by the previous MOs
+              mo_coef = prev_mos
+              ! call save_mos() ! depends of the time for 1 iteration
+
+              ! No need to clear_mo_map since we don't recompute the gradient and the hessian
+              ! mo_coef becomes valid
+              ! Avoid the recomputation of the providers which depend of mo_coef
+              TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          else
+              ! The step is accepted:
+              ! criterion -> prev criterion
+
+              ! The replacement "criterion -> prev criterion" is already done
+              ! in trust_region_rho, so if the criterion does not have a reason
+              ! to change, it will change nothing for the criterion and will 
+              ! force the program to provide the new hessian, gradient and 
+              ! convergence criterion for the next iteration.
+              ! But in the case of orbital optimization we diagonalize the CI 
+              ! matrix after the "FREE" statement, so the criterion will change
+      
+              FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
+              prev_criterion = C_PROVIDER
+              
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! call save_mos() ! depends of the time for 1 iteration
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      ! Provide the convergence criterion
+      ! Provide the gradient and the hessian for the next iteration
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+
+  ! Save the final MOs
+  call save_mos()
+
+  ! Diagonalization of the hessian 
+  ! (To see the eigenvalues at the end of the optimization)
+  call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+  
+  deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos)
+
+end
+#+END_SRC
+
+** Cartesian version
+#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt
+subroutine algo_trust_cartesian_template(tmp_n)
+
+  implicit none
+
+  ! Variables
+
+  ! In
+  integer, intent(in) :: tmp_n
+  
+  ! Out
+  ! Rien ou un truc pour savoir si ça c'est bien passé
+  
+  ! Internal
+  double precision, allocatable :: e_val(:), W(:,:), tmp_x(:)
+  double precision              :: criterion, prev_criterion, criterion_model
+  double precision              :: delta, rho
+  logical                       :: not_converged, cancel_step, must_exit
+  integer                       :: nb_iter, nb_sub_iter
+  integer                       :: i,j
+
+  allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n))
+
+  PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER
+
+  ! Initialization
+  delta = 0d0 
+  nb_iter = 0 ! Must start at 0 !!!
+  rho = 0.5d0 ! Must start at 0.5 
+  not_converged = .True. ! Must be true
+
+  ! Compute the criterion before the loop
+  prev_criterion = C_PROVIDER
+
+  do while (not_converged)
+
+      print*,''
+      print*,'******************'
+      print*,'Iteration', nb_iter
+      print*,'******************'
+      print*,''
+
+      if (nb_iter > 0) then
+        PROVIDE H_PROVIDER g_PROVIDER
+      endif
+
+      ! Diagonalization of the hessian 
+      call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
+
+      cancel_step = .True. ! To enter in the loop just after
+      nb_sub_iter = 0
+
+      ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
+      do while (cancel_step)
+
+          print*,'-----------------------------'
+          print*,'Iteration:', nb_iter
+          print*,'Sub iteration:', nb_sub_iter
+          print*,'-----------------------------'
+
+          ! Hessian,gradient,Criterion -> x 
+          call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
+               prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
+
+          if (must_exit) then
+              ! if step_in_trust_region sets must_exit on true for numerical reasons
+              print*,'trust_region_step_w_expected_e sent the message : Exit'
+              exit
+          endif
+
+          ! New coordinates, check the sign 
+          X_PROVIDER = X_PROVIDER - tmp_x
+
+          ! touch X_PROVIDER
+          TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER 
+
+          ! To update the other parameters if needed
+          call #update_parameters()
+
+          ! New criterion
+          PROVIDE C_PROVIDER ! Unnecessary
+          criterion = C_PROVIDER
+
+          ! Criterion -> step accepted or rejected 
+          call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
+
+          ! Cancel the previous step
+          if (cancel_step) then
+              ! Replacement by the previous coordinates, check the sign 
+              X_PROVIDER = X_PROVIDER + tmp_x
+
+              ! Avoid the recomputation of the hessian and the gradient
+              TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
+          endif      
+
+          nb_sub_iter = nb_sub_iter + 1
+      enddo
+
+      ! To exit the external loop if must_exit = .True.
+      if (must_exit) then
+          exit
+      endif 
+
+      ! Step accepted, nb iteration + 1
+      nb_iter = nb_iter + 1
+
+      PROVIDE cc_PROVIDER
+
+      ! To exit
+      if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
+        not_converged = .False.
+      endif
+
+      if (nb_iter > optimization_max_nb_iter) then
+        not_converged = .False.
+      endif
+
+      if (delta < thresh_delta) then
+        not_converged = .False.
+      endif
+
+  enddo
+  
+ deallocate(e_val, W, tmp_x)
+
+end
+#+END_SRC
+
+** Script template
+#+BEGIN_SRC bash :tangle script_template_mos.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
+#+BEGIN_SRC bash :tangle script_template_xyz.sh
+#!/bin/bash
+
+your_file=
+
+your_C_PROVIDER=
+your_X_PROVIDER=
+your_H_PROVIDER=
+your_g_PROVIDER=
+your_cc_PROVIDER=
+
+sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file
+sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file
+sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
+sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
+sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
+#+END_SRC
+
diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f
new file mode 100644
index 00000000..e274ec11
--- /dev/null
+++ b/src/utils_trust_region/apply_mo_rotation.irp.f
@@ -0,0 +1,85 @@
+! Apply MO rotation
+! Subroutine to apply the rotation matrix to the coefficients of the
+! MOs.
+
+! New MOs = Old MOs . Rotation matrix
+
+! *Compute the new MOs with the previous MOs and a rotation matrix*
+
+! Provided:
+! | mo_num                 | integer          | number of MOs           |
+! | ao_num                 | integer          | number of AOs           |
+! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+! Intent in:
+! | R(mo_num,mo_num) | double precision | rotation matrix |
+
+! Intent out:
+! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+! Internal:
+! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+! | i,j                    | integer          | indexes                      |
+
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org
new file mode 100644
index 00000000..918581b7
--- /dev/null
+++ b/src/utils_trust_region/apply_mo_rotation.org
@@ -0,0 +1,86 @@
+* Apply MO rotation
+Subroutine to apply the rotation matrix to the coefficients of the
+MOs.
+
+New MOs = Old MOs . Rotation matrix
+
+*Compute the new MOs with the previous MOs and a rotation matrix*
+
+Provided:
+| mo_num                 | integer          | number of MOs           |
+| ao_num                 | integer          | number of AOs           |
+| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
+
+Intent in:
+| R(mo_num,mo_num) | double precision | rotation matrix |
+
+Intent out:
+| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
+
+Internal:
+| new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
+| i,j                    | integer          | indexes                      |
+#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f
+subroutine apply_mo_rotation(R,prev_mos)
+  
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the new MOs knowing the rotation matrix
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  double precision, intent(in)  :: R(mo_num,mo_num)
+
+  ! out 
+  double precision, intent(out) :: prev_mos(ao_num,mo_num)
+  
+  ! internal
+  double precision, allocatable :: new_mos(:,:)
+  integer                       :: i,j
+  double precision              :: t1,t2,t3
+
+  print*,''
+  print*,'---apply_mo_rotation---'
+
+  call wall_time(t1)  
+
+  ! Allocation
+  allocate(new_mos(ao_num,mo_num))
+  
+  ! Calculation
+
+  ! Product of old MOs (mo_coef) by Rotation matrix (R) 
+  call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
+
+  prev_mos = mo_coef
+  mo_coef = new_mos
+
+  !if (debug) then  
+  !  print*,'New mo_coef : '
+  !  do i = 1, mo_num
+  !    write(*,'(100(F10.5))') mo_coef(i,:)
+  !  enddo
+  !endif
+
+  ! Save the new MOs and change the label
+  mo_label = 'MCSCF'
+  !call save_mos
+  call ezfio_set_determinants_mo_label(mo_label)
+  
+  !print*,'Done, MOs saved'
+
+  ! Deallocation, end
+  deallocate(new_mos)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in apply mo rotation:', t3
+  print*,'---End apply_mo_rotation---'
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f
new file mode 100644
index 00000000..35e12232
--- /dev/null
+++ b/src/utils_trust_region/mat_to_vec_index.irp.f
@@ -0,0 +1,61 @@
+! Matrix to vector index
+
+! *Compute the index i of a vector element from the indexes p,q of a
+! matrix element*
+
+! Lower diagonal matrix (p,q), p > q -> vector (i)
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | p,q | integer | indexes of a matrix element in the lower diagonal |
+! |     |         | p > q, q -> column                                |
+! |     |         | p -> row,                                         |
+! |     |         | q -> column                                       |
+
+! Input:
+! | i | integer | corresponding index in the vector |
+
+
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org
new file mode 100644
index 00000000..50840584
--- /dev/null
+++ b/src/utils_trust_region/mat_to_vec_index.org
@@ -0,0 +1,63 @@
+* Matrix to vector index
+
+*Compute the index i of a vector element from the indexes p,q of a
+matrix element*
+
+Lower diagonal matrix (p,q), p > q -> vector (i)
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| p,q | integer | indexes of a matrix element in the lower diagonal |
+|     |         | p > q, q -> column                                |
+|     |         | p -> row,                                         |
+|     |         | q -> column                                       |
+
+Input:
+| i | integer | corresponding index in the vector |
+
+#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f
+subroutine mat_to_vec_index(p,q,i)
+
+  include 'pi.h'
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in) :: p,q
+  
+  ! out
+  integer, intent(out) :: i 
+
+  ! internal
+  integer :: a,b
+  double precision :: da
+
+  ! Calculation
+ 
+  a = p-1
+  b = a*(a-1)/2
+  
+  i = q+b
+
+end subroutine
+#+END_SRC
+
diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h
new file mode 100644
index 00000000..bbfabfec
--- /dev/null
+++ b/src/utils_trust_region/pi.h
@@ -0,0 +1,2 @@
+  !logical, parameter :: debug=.False.
+  double precision, parameter :: pi = 3.1415926535897932d0
diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f
new file mode 100644
index 00000000..4738fd67
--- /dev/null
+++ b/src/utils_trust_region/rotation_matrix.irp.f
@@ -0,0 +1,443 @@
+! Rotation matrix
+
+! *Build a rotation matrix from an antisymmetric matrix*
+
+! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+! $$
+! \textbf{R}=\exp(\textbf{A})
+! $$
+
+! So :
+! \begin{align*}
+! \textbf{R}=& \exp(\textbf{A}) \\
+! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+! \end{align*}
+
+! With :
+! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+! $\tau$ : $\sqrt{-x}$
+! $x$ : eigenvalues of $\textbf{A}^2$
+
+! Input:
+! | A(n,n) | double precision | antisymmetric matrix                                            |
+! | n      | integer          | number of columns of the A matrix                               |
+! | LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+! | LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+! Output:
+! | R(n,n) | double precision | Rotation matrix                                      |
+! | info   | integer          | if info = 0, the execution is successful             |
+! |        |                  | if info = k, the k-th parameter has an illegal value |
+! |        |                  | if info = -k, the algorithm failed                   |
+
+! Internal:
+! | B(n,n)        | double precision | B = A.A                                                       |
+! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+! | lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+! | W(n,n)        | double precision | eigenvectors of B                                             |
+! | e_val(n)      | double precision | eigenvalues of B                                              |
+! | m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+! | cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+! | sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+! | tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+! | part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+! | part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+! | part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+! | part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+! | part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+! | part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+! | RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+! | norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+! | i,j           | integer          | indexes                                                       |
+
+! Functions:
+! | dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+! | disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+
+! Pre-conditions
+
+! Initialization
+info=0
+enforce_step_cancellation = .False.
+
+! Size of matrix A must be at least 1 by 1
+if (n<1) then
+  info = 3
+  print*, 'WARNING: invalid parameter 5'
+  print*, 'n<1'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDA < n) then
+  info = 25
+  print*, 'WARNING: invalid parameter 2 or 5'
+  print*, 'LDA < n'
+  return
+endif
+
+! Leading dimension of A must be >= n
+if (LDR < n) then
+  info = 4
+  print*, 'WARNING: invalid parameter 4'
+  print*, 'LDR < n'
+  return
+endif
+
+! Matrix elements of A must by non-NaN
+do j = 1, n
+  do i = 1, n
+    if (disnan(A(i,j))) then
+      info=1
+      print*, 'WARNING: invalid parameter 1'
+      print*, 'NaN element in A matrix'
+      return
+    endif
+  enddo
+enddo
+
+do i = 1, n
+  if (A(i,i) /= 0d0) then
+    print*, 'WARNING: matrix A is not antisymmetric'
+    print*, 'Non 0 element on the diagonal', i, A(i,i)
+    call ABORT
+  endif
+enddo
+
+do j = 1, n
+  do i = 1, n
+    if (A(i,j)+A(j,i)>1d-16) then
+      print*, 'WANRING: matrix A is not antisymmetric'
+      print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+      print*, 'diff:', A(i,j)+A(j,i)
+      call ABORT 
+    endif
+  enddo
+enddo
+
+! Fix for too big elements ! bad idea better to cancel if the error is too big
+!do j = 1, n
+!  do i = 1, n
+!    A(i,j) = mod(A(i,j),2d0*pi)
+!    if (dabs(A(i,j)) > pi) then
+!      A(i,j) = 0d0
+!    endif
+!  enddo
+!enddo
+
+max_elem_A = 0d0
+do j = 1, n
+  do i = 1, n
+    if (ABS(A(i,j)) > ABS(max_elem_A)) then
+      max_elem_A = A(i,j)
+    endif
+  enddo
+enddo
+print*,'max element in A', max_elem_A
+
+if (ABS(max_elem_A) > 2 * pi) then
+   print*,''
+   print*,'WARNING: ABS(max_elem_A) > 2 pi '
+   print*,''
+endif
+
+! B=A.A
+!     - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+!     - Diagonalization of $\textbf{B}$ 
+!     W, the eigenvectors
+!     e_val, the eigenvalues
+
+
+! Compute B=A.A
+
+call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+! Copy B in W, diagonalization will put the eigenvectors in W
+W=B
+
+! Diagonalization of B
+! Eigenvalues -> e_val
+! Eigenvectors -> W
+lwork = 3*n-1
+allocate(work(lwork,n))
+
+print*,'Starting diagonalization ...'
+
+call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+deallocate(work)
+
+if (info2 == 0) then
+   print*, 'Diagonalization : Done'
+elseif (info2 < 0) then
+   print*, 'WARNING: error in the diagonalization'
+   print*, 'Illegal value of the ', info2,'-th parameter'
+else
+   print*, "WARNING: Diagonalization failed to converge"
+endif
+
+! Tau^-1, cos(tau), sin(tau)
+!     $$\tau = \sqrt{-x}$$
+!     - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+!     - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+!     - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+!     These matrices are diagonals
+
+! Diagonal matrix m_diag
+do j = 1, n
+   if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+      e_val(j) = 0.d0
+   else
+      e_val(j) = - e_val(j)
+   endif
+enddo
+
+m_diag = 0.d0
+do i = 1, n
+   m_diag(i,i) = e_val(i)
+enddo
+
+! cos_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+      else
+         cos_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! sin_tau
+do j = 1, n
+   do i = 1, n
+      if (i==j) then
+         sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+      else
+         sin_tau(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+! Debug, display the cos_tau and sin_tau matrix
+!if (debug) then
+!   print*, 'cos_tau'
+!   do i = 1, n
+!      print*, cos_tau(i,:)
+!   enddo
+!   print*, 'sin_tau'
+!   do i = 1, n
+!      print*, sin_tau(i,:)
+!   enddo
+!endif
+
+! tau^-1
+do j = 1, n
+   do i = 1, n
+      if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+         tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+      else
+         tau_m1(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+max_elem = 0d0
+do i = 1, n
+   if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+      max_elem = tau_m1(i,i)
+   endif
+enddo
+print*,'max elem tau^-1:', max_elem
+
+! Debug
+!print*,'eigenvalues:'
+!do i = 1, n 
+!  print*, e_val(i)
+!enddo
+
+!Debug, display tau^-1
+!if (debug) then
+!   print*, 'tau^-1'
+!   do i = 1, n
+!      print*,tau_m1(i,:)
+!   enddo
+!endif
+
+! Rotation matrix 
+!     \begin{align*}
+!     \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+!     \end{align*}
+!     \begin{align*}
+!     \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+!     \end{align*}
+
+!     First:
+!     part_1 = dgemm(W, dgemm(cos_tau, W^t))
+!     part_1a = dgemm(cos_tau, W^t)
+!     part_1 = dgemm(W, part_1a)
+!     And:
+!     part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+!     part_2a = dgemm(W^t, A)
+!     part_2b = dgemm(sin_tau, part_2a)
+!     part_2c = dgemm(tau_m1, part_2b)
+!     part_2 = dgemm(W, part_2c)
+!     Finally:
+!     Rotation matrix, R = part_1+part_2
+
+!     If $R$ is a rotation matrix:
+!     $R.R^T=R^T.R=\textbf{1}$
+
+! part_1
+call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+! part_2
+call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+! Rotation matrix R
+R = part_1 + part_2
+
+! Matrix check
+! R.R^t and R^t.R must be equal to identity matrix
+do j = 1, n
+   do i=1,n
+      if (i==j) then
+         RR_t(i,j) = 1d0
+      else
+         RR_t(i,j) = 0d0
+      endif
+   enddo
+enddo
+
+call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+norm = dnrm2(n*n,RR_t,1)
+print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+! Debug
+!if (debug) then
+!   print*, 'RR_t'
+!   do i = 1, n
+!      print*, RR_t(i,:)
+!   enddo
+!endif
+
+! Post conditions
+
+! Check if R.R^T=1
+max_elem = 0d0
+do j = 1, n
+   do i = 1, n 
+      if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+         max_elem = RR_t(i,j)
+      endif
+   enddo
+enddo
+
+print*, 'Max error in R.R^T:', max_elem
+print*, 'e_val(1):', e_val(1)
+print*, 'e_val(n):', e_val(n)
+print*, 'max elem in A:', max_elem_A
+
+if (ABS(max_elem) > 1d-12) then
+  print*, 'WARNING: max error in R.R^T > 1d-12'
+  print*, 'Enforce the step cancellation'
+  enforce_step_cancellation = .True.
+endif
+
+! Matrix elements of R must by non-NaN
+do j = 1,n
+   do i = 1,LDR
+      if (disnan(R(i,j))) then
+         info = 666
+         print*, 'NaN in rotation matrix'
+         call ABORT
+      endif
+   enddo
+enddo
+
+! Display
+!if (debug) then
+!   print*,'Rotation matrix :'
+!   do i = 1, n
+!      write(*,'(100(F10.5))') R(i,:)
+!   enddo
+!endif
+
+! Deallocation, end
+
+deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org
new file mode 100644
index 00000000..73ba0298
--- /dev/null
+++ b/src/utils_trust_region/rotation_matrix.org
@@ -0,0 +1,454 @@
+* Rotation matrix
+
+*Build a rotation matrix from an antisymmetric matrix*
+
+Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
+$$
+\textbf{R}=\exp(\textbf{A})
+$$
+
+So :
+\begin{align*}
+\textbf{R}=& \exp(\textbf{A}) \\
+=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
+=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
+\end{align*}
+
+With :
+$\textbf{W}$ : eigenvectors of $\textbf{A}^2$
+$\tau$ : $\sqrt{-x}$
+$x$ : eigenvalues of $\textbf{A}^2$
+
+Input:
+| A(n,n) | double precision | antisymmetric matrix                                            |
+| n      | integer          | number of columns of the A matrix                               |
+| LDA    | integer          | specifies the leading dimension of A, must be at least max(1,n) |
+| LDR    | integer          | specifies the leading dimension of R, must be at least max(1,n) |
+
+Output:
+| R(n,n) | double precision | Rotation matrix                                      |
+| info   | integer          | if info = 0, the execution is successful             |
+|        |                  | if info = k, the k-th parameter has an illegal value |
+|        |                  | if info = -k, the algorithm failed                   |
+
+Internal:
+| B(n,n)        | double precision | B = A.A                                                       |
+| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork)                 |
+| lwork         | integer          | dimension of the syev work array >= max(1, 3n-1)              |
+| W(n,n)        | double precision | eigenvectors of B                                             |
+| e_val(n)      | double precision | eigenvalues of B                                              |
+| m_diag(n,n)   | double precision | diagonal matrix with the eigenvalues of B                     |
+| cos_tau(n,n)  | double precision | diagonal matrix with cos(tau) values                          |
+| sin_tau(n,n)  | double precision | diagonal matrix with sin cos(tau) values                      |
+| tau_m1(n,n)   | double precision | diagonal matrix with (tau)^-1 values                          |
+| part_1(n,n)   | double precision | matrix W.cos_tau.W^t                                          |
+| part_1a(n,n)  | double precision | matrix cos_tau.W^t                                            |
+| part_2(n,n)   | double precision | matrix W.tau_m1.sin_tau.W^t.A                                 |
+| part_2a(n,n)  | double precision | matrix W^t.A                                                  |
+| part_2b(n,n)  | double precision | matrix sin_tau.W^t.A                                          |
+| part_2c(n,n)  | double precision | matrix tau_m1.sin_tau.W^t.A                                   |
+| RR_t(n,n)     | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
+| norm          | integer          | norm of R.R^t-1, must be equal to 0                           |
+| i,j           | integer          | indexes                                                       |
+
+Functions:
+| dnrm2  | double precision | Lapack function, compute the norm of a matrix |
+| disnan | logical          | Lapack function, check if an element is NaN   |
+
+
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
+
+  implicit none
+
+  BEGIN_DOC
+  ! Rotation matrix to rotate the molecular orbitals.
+  ! If the rotation is too large the transformation is not unitary and must be cancelled.
+  END_DOC
+
+  include 'pi.h'
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n,LDA,LDR
+  double precision, intent(inout) :: A(LDA,n)
+
+  ! out
+  double precision, intent(out)   :: R(LDR,n)
+  integer, intent(out)            :: info
+  logical, intent(out)            :: enforce_step_cancellation
+
+  ! internal
+  double precision, allocatable   :: B(:,:) 
+  double precision, allocatable   :: work(:,:) 
+  double precision, allocatable   :: W(:,:), e_val(:)
+  double precision, allocatable   :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
+  double precision, allocatable   :: part_1(:,:),part_1a(:,:)
+  double precision, allocatable   :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
+  double precision, allocatable   :: RR_t(:,:)
+  integer                         :: i,j
+  integer                         :: info2, lwork ! for dsyev
+  double precision                :: norm, max_elem, max_elem_A, t1,t2,t3
+
+  ! function
+  double precision              :: dnrm2
+  logical                       :: disnan
+
+  print*,''
+  print*,'---rotation_matrix---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(B(n,n))
+  allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
+  allocate(W(n,n),e_val(n))
+  allocate(part_1(n,n),part_1a(n,n))
+  allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
+  allocate(RR_t(n,n))
+#+END_SRC
+
+** Pre-conditions
+#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Initialization
+  info=0
+  enforce_step_cancellation = .False.
+
+  ! Size of matrix A must be at least 1 by 1
+  if (n<1) then
+    info = 3
+    print*, 'WARNING: invalid parameter 5'
+    print*, 'n<1'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDA < n) then
+    info = 25
+    print*, 'WARNING: invalid parameter 2 or 5'
+    print*, 'LDA < n'
+    return
+  endif
+
+  ! Leading dimension of A must be >= n
+  if (LDR < n) then
+    info = 4
+    print*, 'WARNING: invalid parameter 4'
+    print*, 'LDR < n'
+    return
+  endif
+
+  ! Matrix elements of A must by non-NaN
+  do j = 1, n
+    do i = 1, n
+      if (disnan(A(i,j))) then
+        info=1
+        print*, 'WARNING: invalid parameter 1'
+        print*, 'NaN element in A matrix'
+        return
+      endif
+    enddo
+  enddo
+
+  do i = 1, n
+    if (A(i,i) /= 0d0) then
+      print*, 'WARNING: matrix A is not antisymmetric'
+      print*, 'Non 0 element on the diagonal', i, A(i,i)
+      call ABORT
+    endif
+  enddo
+
+  do j = 1, n
+    do i = 1, n
+      if (A(i,j)+A(j,i)>1d-16) then
+        print*, 'WANRING: matrix A is not antisymmetric'
+        print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
+        print*, 'diff:', A(i,j)+A(j,i)
+        call ABORT 
+      endif
+    enddo
+  enddo
+
+  ! Fix for too big elements ! bad idea better to cancel if the error is too big
+  !do j = 1, n
+  !  do i = 1, n
+  !    A(i,j) = mod(A(i,j),2d0*pi)
+  !    if (dabs(A(i,j)) > pi) then
+  !      A(i,j) = 0d0
+  !    endif
+  !  enddo
+  !enddo
+
+  max_elem_A = 0d0
+  do j = 1, n
+    do i = 1, n
+      if (ABS(A(i,j)) > ABS(max_elem_A)) then
+        max_elem_A = A(i,j)
+      endif
+    enddo
+  enddo
+  print*,'max element in A', max_elem_A
+
+  if (ABS(max_elem_A) > 2 * pi) then
+     print*,''
+     print*,'WARNING: ABS(max_elem_A) > 2 pi '
+     print*,''
+  endif
+
+#+END_SRC
+
+** Calculations
+
+*** B=A.A
+    - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
+    - Diagonalization of $\textbf{B}$ 
+    W, the eigenvectors
+    e_val, the eigenvalues
+
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Compute B=A.A
+
+  call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
+
+  ! Copy B in W, diagonalization will put the eigenvectors in W
+  W=B
+
+  ! Diagonalization of B
+  ! Eigenvalues -> e_val
+  ! Eigenvectors -> W
+  lwork = 3*n-1
+  allocate(work(lwork,n))
+
+  print*,'Starting diagonalization ...'
+
+  call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
+
+  deallocate(work)
+
+  if (info2 == 0) then
+     print*, 'Diagonalization : Done'
+  elseif (info2 < 0) then
+     print*, 'WARNING: error in the diagonalization'
+     print*, 'Illegal value of the ', info2,'-th parameter'
+  else
+     print*, "WARNING: Diagonalization failed to converge"
+  endif
+    #+END_SRC
+
+*** Tau^-1, cos(tau), sin(tau)
+    $$\tau = \sqrt{-x}$$
+    - Calculation of $\cos(\tau)$  $\Leftrightarrow$ $\cos(\sqrt{-x})$
+    - Calculation of $\sin(\tau)$  $\Leftrightarrow$ $\sin(\sqrt{-x})$
+    - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
+    These matrices are diagonals
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Diagonal matrix m_diag
+  do j = 1, n
+     if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
+        e_val(j) = 0.d0
+     else
+        e_val(j) = - e_val(j)
+     endif
+  enddo
+
+  m_diag = 0.d0
+  do i = 1, n
+     m_diag(i,i) = e_val(i)
+  enddo
+
+  ! cos_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           cos_tau(i,j) = dcos(dsqrt(e_val(i)))
+        else
+           cos_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! sin_tau
+  do j = 1, n
+     do i = 1, n
+        if (i==j) then
+           sin_tau(i,j) = dsin(dsqrt(e_val(i)))
+        else
+           sin_tau(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  ! Debug, display the cos_tau and sin_tau matrix
+  !if (debug) then
+  !   print*, 'cos_tau'
+  !   do i = 1, n
+  !      print*, cos_tau(i,:)
+  !   enddo
+  !   print*, 'sin_tau'
+  !   do i = 1, n
+  !      print*, sin_tau(i,:)
+  !   enddo
+  !endif
+
+  ! tau^-1
+  do j = 1, n
+     do i = 1, n
+        if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
+           tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
+        else
+           tau_m1(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  max_elem = 0d0
+  do i = 1, n
+     if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
+        max_elem = tau_m1(i,i)
+     endif
+  enddo
+  print*,'max elem tau^-1:', max_elem
+
+  ! Debug
+  !print*,'eigenvalues:'
+  !do i = 1, n 
+  !  print*, e_val(i)
+  !enddo
+
+  !Debug, display tau^-1
+  !if (debug) then
+  !   print*, 'tau^-1'
+  !   do i = 1, n
+  !      print*,tau_m1(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Rotation matrix 
+    \begin{align*}
+    \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
+    \end{align*}
+    \begin{align*}
+    \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
+    \end{align*}
+
+    First:
+    part_1 = dgemm(W, dgemm(cos_tau, W^t))
+    part_1a = dgemm(cos_tau, W^t)
+    part_1 = dgemm(W, part_1a)
+    And:
+    part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
+    part_2a = dgemm(W^t, A)
+    part_2b = dgemm(sin_tau, part_2a)
+    part_2c = dgemm(tau_m1, part_2b)
+    part_2 = dgemm(W, part_2c)
+    Finally:
+    Rotation matrix, R = part_1+part_2
+
+    If $R$ is a rotation matrix:
+    $R.R^T=R^T.R=\textbf{1}$
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! part_1
+  call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
+
+  ! part_2
+  call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
+  call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
+  call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
+  call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
+
+  ! Rotation matrix R
+  R = part_1 + part_2
+
+  ! Matrix check
+  ! R.R^t and R^t.R must be equal to identity matrix
+  do j = 1, n
+     do i=1,n
+        if (i==j) then
+           RR_t(i,j) = 1d0
+        else
+           RR_t(i,j) = 0d0
+        endif
+     enddo
+  enddo
+
+  call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
+
+  norm = dnrm2(n*n,RR_t,1)
+  print*, 'Rotation matrix check, norm R.R^T = ', norm 
+
+  ! Debug
+  !if (debug) then
+  !   print*, 'RR_t'
+  !   do i = 1, n
+  !      print*, RR_t(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+*** Post conditions
+    #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  ! Check if R.R^T=1
+  max_elem = 0d0
+  do j = 1, n
+     do i = 1, n 
+        if (ABS(RR_t(i,j)) > ABS(max_elem)) then
+           max_elem = RR_t(i,j)
+        endif
+     enddo
+  enddo
+
+  print*, 'Max error in R.R^T:', max_elem
+  print*, 'e_val(1):', e_val(1)
+  print*, 'e_val(n):', e_val(n)
+  print*, 'max elem in A:', max_elem_A
+
+  if (ABS(max_elem) > 1d-12) then
+    print*, 'WARNING: max error in R.R^T > 1d-12'
+    print*, 'Enforce the step cancellation'
+    enforce_step_cancellation = .True.
+  endif
+
+  ! Matrix elements of R must by non-NaN
+  do j = 1,n
+     do i = 1,LDR
+        if (disnan(R(i,j))) then
+           info = 666
+           print*, 'NaN in rotation matrix'
+           call ABORT
+        endif
+     enddo
+  enddo
+
+  ! Display
+  !if (debug) then
+  !   print*,'Rotation matrix :'
+  !   do i = 1, n
+  !      write(*,'(100(F10.5))') R(i,:)
+  !   enddo
+  !endif
+    #+END_SRC
+
+** Deallocation, end
+   #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
+  deallocate(B)
+  deallocate(m_diag,cos_tau,sin_tau,tau_m1)
+  deallocate(W,e_val)
+  deallocate(part_1,part_1a)
+  deallocate(part_2,part_2a,part_2b,part_2c)
+  deallocate(RR_t)
+
+  call wall_time(t2)
+  t3 = t2-t1
+  print*,'Time in rotation matrix:', t3
+
+  print*,'---End rotation_matrix---'
+
+end subroutine
+   #+END_SRC
+
diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
new file mode 100644
index 00000000..bdd1f6ba
--- /dev/null
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
@@ -0,0 +1,64 @@
+! Rotation matrix in a subspace to rotation matrix in the full space
+
+! Usually, we are using a list of MOs, for exemple the active ones. When
+! we compute a rotation matrix to rotate the MOs, we just compute a
+! rotation matrix for these MOs in order to reduce the size of the
+! matrix which has to be computed. Since the computation of a rotation
+! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+! reuce the number of MOs involved.
+! After that we replace the rotation matrix in the full space by
+! building the elements of the rotation matrix in the full space from
+! the elements of the rotation matrix in the subspace and adding some 0
+! on the extradiagonal elements and some 1 on the diagonal elements,
+! for the MOs that are not involved in the rotation.
+
+! Provided:
+! | mo_num      | integer | Number of MOs                 |
+
+! Input:
+! | m           | integer          | Size of tmp_list, m <= mo_num   |
+! | tmp_list(m) | integer          | List of MOs                     |
+! | tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+! |             |                  | the MOs containing by tmp_list  |
+
+! Output:
+! | R(mo_num,mo_num | double precision | Rotation matrix in the space |
+! |                 |                  | of all the MOs               |
+
+! Internal:
+! | i,j         | integer | indexes in the full space |
+! | tmp_i,tmp_j | integer | indexes in the subspace   |
+
+
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org
new file mode 100644
index 00000000..16434dc8
--- /dev/null
+++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org
@@ -0,0 +1,65 @@
+* Rotation matrix in a subspace to rotation matrix in the full space
+
+Usually, we are using a list of MOs, for exemple the active ones. When
+we compute a rotation matrix to rotate the MOs, we just compute a
+rotation matrix for these MOs in order to reduce the size of the
+matrix which has to be computed. Since the computation of a rotation
+matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
+reuce the number of MOs involved.
+After that we replace the rotation matrix in the full space by
+building the elements of the rotation matrix in the full space from
+the elements of the rotation matrix in the subspace and adding some 0
+on the extradiagonal elements and some 1 on the diagonal elements,
+for the MOs that are not involved in the rotation.
+
+Provided:
+| mo_num      | integer | Number of MOs                 |
+
+Input:
+| m           | integer          | Size of tmp_list, m <= mo_num   |
+| tmp_list(m) | integer          | List of MOs                     |
+| tmp_R(m,m)  | double precision | Rotation matrix in the space of |
+|             |                  | the MOs containing by tmp_list  |
+
+Output:
+| R(mo_num,mo_num | double precision | Rotation matrix in the space |
+|                 |                  | of all the MOs               |
+
+Internal:
+| i,j         | integer | indexes in the full space |
+| tmp_i,tmp_j | integer | indexes in the subspace   |
+
+#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f 
+subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
+
+  BEGIN_DOC
+  ! Compute the full rotation matrix from a smaller one
+  END_DOC
+  
+  implicit none
+
+  ! in
+  integer, intent(in)           :: m, tmp_list(m)
+  double precision, intent(in)  :: tmp_R(m,m)
+  
+  ! out
+  double precision, intent(out) :: R(mo_num,mo_num)
+ 
+  ! internal
+  integer                       :: i,j,tmp_i,tmp_j
+
+  ! tmp_R to R, subspace to full space
+  R = 0d0
+  do i = 1, mo_num
+    R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
+  enddo
+  do tmp_j = 1, m
+    j = tmp_list(tmp_j)
+    do tmp_i = 1, m
+      i = tmp_list(tmp_i)
+      R(i,j) = tmp_R(tmp_i,tmp_j)
+    enddo
+  enddo
+ 
+end
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f
new file mode 100644
index 00000000..b7d849d1
--- /dev/null
+++ b/src/utils_trust_region/trust_region_expected_e.irp.f
@@ -0,0 +1,119 @@
+! Predicted energy : e_model
+
+! *Compute the energy predicted by the Taylor series*
+
+! The energy is predicted using a Taylor expansion truncated at te 2nd
+! order :
+
+! \begin{align*}
+! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+! \end{align*}
+
+! Input:
+! | n           | integer          | m*(m-1)/2                |
+! | v_grad(n)   | double precision | gradient                 |
+! | H(n,n)      | double precision | hessian                  |
+! | x(n)        | double precision | Step in the trust region |
+! | prev_energy | double precision | previous energy          |
+
+! Output:
+! | e_model | double precision | predicted energy after the rotation of the MOs |
+
+! Internal:
+! | part_1  | double precision | v_grad^T.x    |
+! | part_2  | double precision | 1/2 . x^T.H.x |
+! | part_2a | double precision | H.x           |
+! | i,j     | integer          | indexes       |
+
+! Function:
+! | ddot | double precision | dot product (Lapack) |
+
+
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+
+! Calculations
+
+! part_1 corresponds to the product g.x
+! part_2a corresponds to the product H.x
+! part_2 corresponds to the product 0.5*(x^T.H.x)
+
+! TODO: remove the dot products
+
+
+! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine
diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org
new file mode 100644
index 00000000..58c8f804
--- /dev/null
+++ b/src/utils_trust_region/trust_region_expected_e.org
@@ -0,0 +1,121 @@
+* Predicted energy : e_model
+
+*Compute the energy predicted by the Taylor series*
+
+The energy is predicted using a Taylor expansion truncated at te 2nd
+order :
+
+\begin{align*}
+E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
+\end{align*}
+
+Input:
+| n           | integer          | m*(m-1)/2                |
+| v_grad(n)   | double precision | gradient                 |
+| H(n,n)      | double precision | hessian                  |
+| x(n)        | double precision | Step in the trust region |
+| prev_energy | double precision | previous energy          |
+
+Output:
+| e_model | double precision | predicted energy after the rotation of the MOs |
+
+Internal:
+| part_1  | double precision | v_grad^T.x    |
+| part_2  | double precision | 1/2 . x^T.H.x |
+| part_2a | double precision | H.x           |
+| i,j     | integer          | indexes       |
+
+Function:
+| ddot | double precision | dot product (Lapack) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
+   
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the expected criterion/energy after the application of the step x
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: v_grad(n),H(n,n),x(n)
+  double precision, intent(in)  :: prev_energy
+
+  ! out
+  double precision, intent(out) :: e_model
+
+  ! internal
+  double precision              :: part_1, part_2, t1,t2,t3
+  double precision, allocatable :: part_2a(:)
+
+  integer                       :: i,j
+
+  !Function
+  double precision              :: ddot
+
+  print*,''
+  print*,'---Trust_e_model---'
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(part_2a(n))
+#+END_SRC
+
+** Calculations
+
+part_1 corresponds to the product g.x
+part_2a corresponds to the product H.x
+part_2 corresponds to the product 0.5*(x^T.H.x)
+
+TODO: remove the dot products
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
+  ! Product v_grad.x
+  part_1 = ddot(n,v_grad,1,x,1)
+ 
+  !if (debug) then
+    print*,'g.x : ', part_1
+  !endif  
+
+  ! Product H.x
+  call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
+
+  ! Product 1/2 . x^T.H.x
+  part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
+
+  !if (debug) then
+    print*,'1/2*x^T.H.x : ', part_2 
+  !endif
+
+  print*,'prev_energy', prev_energy
+
+  ! Sum
+  e_model = prev_energy + part_1 + part_2
+
+  ! Writing the predicted energy
+  print*, 'Predicted energy after the rotation : ', e_model
+  print*, 'Previous energy - predicted energy:', prev_energy - e_model
+  
+  ! Can be deleted, already in another subroutine
+  if (DABS(prev_energy - e_model) < 1d-12 ) then 
+    print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
+  endif
+
+  ! Deallocation
+  deallocate(part_2a)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust e model:', t3
+
+  print*,'---End trust_e_model---'
+  print*,''
+ 
+end subroutine 
+#+END_SRC 
diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f
new file mode 100644
index 00000000..85e7bf2a
--- /dev/null
+++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f
@@ -0,0 +1,1655 @@
+! Newton's method to find the optimal lambda
+
+! *Compute the lambda value for the trust region*
+
+! This subroutine uses the Newton method in order to find the optimal
+! lambda. This constant is added on the diagonal of the hessian to shift
+! the eiganvalues. It has a double role:
+! - ensure that the resulting hessian is positive definite for the
+!   Newton method
+! - constrain the step in the trust region, i.e.,
+!   $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius
+!   of the trust region.
+! We search $\lambda$ which minimizes
+! \begin{align*}
+!   f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2
+! \end{align*}
+! or
+! \begin{align*}
+!   \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2
+! \end{align*}
+! and gives obviously 0 in both cases. \newline
+
+! There are several cases:
+! - If $\textbf{H}$ is positive definite the interval containing the
+!   solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). 
+! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+!   \textbf{g} \neq 0$ then the interval containing
+!   the solution is  $\lambda \in (-h_1, \infty)$.
+! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ then the interval containing the solution is
+!   $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| <
+!   10^{-12}$ are not computed, so the term where $i = 1$ is
+!   automatically removed and this case becomes similar to the previous one.
+
+! So to avoid numerical problems (cf. trust_region) we start the
+! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$,
+! with $\epsilon$ a little constant.
+! The research must be restricted to the interval containing the
+! solution. For that reason a little trust region in 1D is used.  
+
+! The Newton method to find the optimal $\lambda$ is :
+! \begin{align*}
+!   \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\
+! \end{align*}
+! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to
+! $\lambda$ at the l-th iteration,
+! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to
+! $\lambda$ at the l-th iteration.\newline
+
+! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1}
+! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as 
+! \begin{align*}
+!   y \leq \alpha
+! \end{align*}
+! with $\alpha$ a scalar representing the trust length (trust region in
+! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the
+! Taylor series truncated at the second order. Thus, if $y > \alpha$,
+! the constraint is applied as  
+! \begin{align*}
+!   y^* = \alpha \frac{y}{|y|}
+! \end{align*}
+! with $y^*$ the solution in the trust region. 
+
+! The size of the trust region evolves in function of $\rho$ as for the
+! trust region seen previously cf. trust_region, rho_model.
+! The prediction of the value of $f$ or $\tilde{f}$ is done using the
+! Taylor series truncated at the second order cf. "trust_region",
+! "trust_e_model". 
+
+! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 -
+! \Delta^2)^2$ with respect to $\lambda$ are:
+! \begin{align*}
+!   \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+!   = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+!   \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+! \begin{align*}
+! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{align*}
+
+! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 -
+! 1/\Delta^2)^2$ with respect to $\lambda$ are:
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}
+!    {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+!    {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+! Provided in qp_edit:
+! | thresh_rho_2          |
+! | thresh_cc             |
+! | nb_it_max_lambda      |
+! | version_lambda_search |
+! | nb_it_max_pre_search  |
+! see qp_edit for more details
+
+! Input:
+! | n          | integer          | m*(m-1)/2                  |
+! | e_val(n)   | double precision | eigenvalues of the hessian |
+! | tmp_wtg(n) | double precision | w_i^T.v_grad(i)            |
+! | delta      | double precision | delta for the trust region |
+
+! Output:
+! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step |
+! |        |                  | lambda > 0                                                               |
+
+! Internal:
+! | d1_N        | double precision | value of d1_norm_trust_region                                        |
+! | d2_N        | double precision | value of d2_norm_trust_region                                        |
+! | f_N         | double precision | value of f_norm_trust_region                                         |
+! | prev_f_N    | double precision | previous value of f_norm_trust_region                                |
+! | f_R         | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2               |
+! | prev_f_R    | double precision | previous value of f_R                                                |
+! | model       | double precision | predicted value of f_R from prev_f_R and y                           |
+! | d_1         | double precision | value of the first derivative                                        |
+! | d_2         | double precision | value of the second derivative                                       |
+! | y           | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda               |
+! | prev_lambda | double precision | previous value of lambda                                             |
+! | t1,t2,t3    | double precision | wall time                                                            |
+! | i           | integer          | index                                                                |
+! | epsilon     | double precision | little constant to avoid numerical problem                           |
+! | rho_2       | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R |
+! | version     | integer          | version of the root finding method                                   |
+
+! Function:
+! | d1_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+! | d2_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of  (1/norm(x)^2 - 1/Delta^2)^2 |
+! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 |
+! | f_norm_trust_region          | double precision | value of norm(x)^2                                                      |
+
+
+
+subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Research the optimal lambda to constrain the step size in the trust region
+  END_DOC
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(inout) :: e_val(n)
+  double precision, intent(in)    :: delta
+  double precision, intent(in)    :: tmp_wtg(n)
+
+  ! out
+  double precision, intent(out)   :: lambda
+
+  ! Internal
+  double precision                :: d1_N, d2_N, f_N, prev_f_N
+  double precision                :: prev_f_R, f_R
+  double precision                :: model
+  double precision                :: d_1, d_2
+  double precision                :: t1,t2,t3
+  integer                         :: i
+  double precision                :: epsilon
+  double precision                :: y
+  double precision                :: prev_lambda
+  double precision                :: rho_2
+  double precision                :: alpha
+  integer                         :: version
+
+  ! Functions
+  double precision                :: d1_norm_trust_region,d1_norm_trust_region_omp
+  double precision                :: d2_norm_trust_region, d2_norm_trust_region_omp
+  double precision                :: f_norm_trust_region, f_norm_trust_region_omp
+  double precision                :: d1_norm_inverse_trust_region
+  double precision                :: d2_norm_inverse_trust_region
+  double precision                :: d1_norm_inverse_trust_region_omp
+  double precision                :: d2_norm_inverse_trust_region_omp
+
+  print*,''
+  print*,'---Trust_newton---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! version_lambda_search
+  ! 1 -> ||x||^2 - delta^2 = 0,
+  ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better)
+  if (version_lambda_search == 1) then
+    print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0'
+  else
+    print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0'
+  endif
+  ! Version 2 is normally better
+
+
+
+! Resolution with the Newton method:
+
+
+  ! Initialization
+  epsilon = 1d-4
+  lambda =MAX(0d0, -e_val(1))
+  
+  ! Pre research of lambda to start near the optimal lambda
+  ! by adding a constant epsilon and changing the constant to
+  ! have ||x(lambda + epsilon)|| ~ delta, before setting
+  ! lambda = lambda + epsilon 
+  print*, 'Pre research of lambda:'
+  print*,'Initial lambda =', lambda
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+  print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta 
+  i = 1
+  
+  ! To increase lambda
+  if (f_N > delta**2) then
+    print*,'Increasing lambda...'
+    do while (f_N > delta**2 .and. i <= nb_it_max_pre_search)
+
+      ! Update the previous norm
+      prev_f_N = f_N
+      ! New epsilon
+      epsilon = epsilon * 2d0
+      ! New norm
+      f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+      print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+      
+      ! Security
+      if (prev_f_N < f_N) then
+        print*,'WARNING, error: prev_f_N < f_N, exit'
+        epsilon = epsilon * 0.5d0
+        i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  
+  ! To reduce lambda
+  else
+     print*,'Reducing lambda...'
+     do while (f_N < delta**2 .and. i <= nb_it_max_pre_search)
+
+       ! Update the previous norm
+       prev_f_N = f_N  
+       ! New epsilon
+       epsilon = epsilon * 0.5d0
+       ! New norm
+       f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+       print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+
+       ! Security
+       if (prev_f_N > f_N) then
+         print*,'WARNING, error: prev_f_N > f_N, exit'
+         epsilon = epsilon * 2d0
+         i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  endif
+
+  print*,'End of the pre research of lambda'
+  
+  ! New value of lambda
+  lambda = lambda + epsilon
+
+  print*, 'e_val(1):', e_val(1)
+  print*, 'Staring point, lambda =', lambda
+  
+  ! thresh_cc, threshold for the research of the optimal lambda
+  ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc
+  ! thresh_rho_2, threshold to cancel the step in the research
+  ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2
+  print*,'Threshold for the CC:', thresh_cc
+  print*,'Threshold for rho_2:', thresh_rho_2  
+
+  print*, 'w_1^T . g =', tmp_wtg(1)
+
+  ! Debug
+  !if (debug) then
+  !    print*, 'Iteration    rho_2    lambda    delta  ||x||  |1-(||x||^2/delta^2)|'
+  !endif
+
+  ! Initialization  
+  i = 1
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2
+  model = 0d0           ! predicted value of (||x||^2 - delta^2)^2
+  prev_f_N = 0d0    ! previous value of ||x||^2
+  prev_f_R = 0d0    ! previous value of (||x||^2 - delta^2)^2
+  f_R = 0d0         ! value of (||x||^2 - delta^2)^2
+  rho_2 = 0d0       ! (prev_f_R - f_R)/(prev_f_R - m)
+  y = 0d0           ! step size
+  prev_lambda = 0d0 ! previous lambda
+
+    ! Derivatives
+    if (version_lambda_search == 1) then
+      d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+      d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+    else
+      d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+    endif
+
+    ! Trust length
+    alpha = DABS((1d0/d_2)*d_1)
+
+    ! Newton's method
+    do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc)
+      print*,'--------------------------------------'
+      print*,'Research of lambda, iteration:', i
+      print*,'--------------------------------------'
+
+      ! Update of f_N, f_R and the derivatives
+      prev_f_N = f_N 
+      if (version_lambda_search == 1) then
+        prev_f_R = (prev_f_N - delta**2)**2
+        d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+        d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+      else
+        prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2
+        d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+        d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      endif
+      write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2  
+
+      ! Newton's step
+      y = -(1d0/DABS(d_2))*d_1
+
+      ! Constraint on y (the newton step)
+      if (DABS(y) > alpha) then
+        y = alpha * (y/DABS(y)) ! preservation of the sign of y
+      endif
+      write(*,'(a,E12.5)') ' Step length: ', y
+
+      ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series
+      model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2    
+
+      ! Updates lambda
+      prev_lambda = lambda
+      lambda = prev_lambda + y
+      print*,'prev lambda:', prev_lambda
+      print*,'new lambda:', lambda
+
+      ! Checks if lambda is in (-h_1, \infty)
+      if (lambda > MAX(0d0, -e_val(1))) then
+        ! New value of ||x(lambda)||^2
+        f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+        ! New f_R
+        if (version_lambda_search == 1) then
+          f_R = (f_N - delta**2)**2          ! new value of (||x(lambda)||^2 - delta^2)^2
+        else
+          f_R = (1d0/f_N - 1d0/delta**2)**2  ! new value of (1/||x(lambda)||^2 -1/delta^2)^2
+        endif
+        
+        if (version_lambda_search == 1) then
+          print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R
+          print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R
+          print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model
+        else
+          print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R
+          print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R
+          print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model
+        endif
+
+        print*,'previous - actual:', prev_f_R - f_R
+        print*,'previous - model:', prev_f_R - model
+
+        ! Check the gain
+        if (DABS(prev_f_R - model) < thresh_model_2) then
+          print*,''
+          print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity'
+          print*,''
+        endif        
+
+        ! Will be deleted
+        !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then
+        !  print*,''
+        !  print*,'WARNING: ABS(previous - model) <= 1d-16, exit'
+        !  print*,''
+        !  exit
+        !endif
+
+        ! Computes rho_2
+        rho_2 = (prev_f_R - f_R)/(prev_f_R - model)
+        print*,'rho_2:', rho_2               
+      else
+        rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty)
+        print*,'lambda < -e_val(1) ===> rho_2 = 0'
+      endif
+
+      ! Evolution of the trust length, alpha
+      if (rho_2 >= 0.75d0) then
+        alpha = 2d0 * alpha
+      elseif (rho_2 >= 0.5d0) then
+        alpha = alpha
+      elseif (rho_2 >= 0.25d0) then
+        alpha = 0.5d0 * alpha
+      else 
+        alpha = 0.25d0 * alpha
+      endif
+      write(*,'(a,E12.5)') ' New trust length alpha: ', alpha
+
+      ! cancellaion of the step if rho < 0.1
+      if (rho_2 < thresh_rho_2) then !0.1d0) then
+        lambda = prev_lambda
+        f_N = prev_f_N
+        print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda'
+      endif
+
+      print*,''
+      print*,'lambda, ||x||, delta:'
+      print*, lambda, dsqrt(f_N), delta
+      print*,'CC:', DABS(1d0 - f_N/delta**2)
+      print*,''
+      
+      i = i + 1
+    enddo
+
+  ! if trust newton failed
+  if (i > nb_it_max_lambda) then
+    print*,''
+    print*,'######################################################'
+    print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton'
+    print*,'The research of the optimal lambda has failed'
+    print*,'######################################################'
+    print*,''
+  endif
+
+  print*,'Number of iterations :', i
+  print*,'Value of lambda :', lambda
+  print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2
+  print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2
+  print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2
+
+  ! Time
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_newton:', t3
+
+  print*,'' 
+  print*,'---End trust_newton---'
+  print*,''
+
+end subroutine
+
+! OMP: First derivative of (||x||^2 - Delta^2)^2
+
+! *Function to compute the first derivative of (||x||^2 - Delta^2)^2*
+
+! This function computes the first derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+
+! \begin{align*}
+! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right)
+! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+
+! \begin{align*}
+!   \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\
+!   \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | mo_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1      | double precision | first sum of the formula           |
+! | accu2      | double precision | second sum of the formula          |
+! | tmp_accu1  | double precision | temporary array for the first sum  |
+! | tmp_accu2  | double precision | temporary array for the second sum |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+! | i,j        | integer          | indexes                            |
+
+! Function:
+! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+
+function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+   
+  ! Internal
+  double precision              :: wtg,accu1,accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2)  &
+      !$OMP DEFAULT(NONE)
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+ 
+  !$OMP MASTER
+  do i = 1, n 
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig) then
+      tmp_accu2(i) =  tmp_wtg(i)**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2)
+
+  deallocate(tmp_accu1, tmp_accu2)
+
+end function
+
+! OMP: Second derivative of (||x||^2 - Delta^2)^2
+
+! *Function to compute the second derivative of (||x||^2 - Delta^2)^2*
+
+! This function computes the second derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+! \begin{align*}
+! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{align*}
+
+! \begin{align*}
+!   \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+!   \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+!   \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} 
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1      | double precision | first sum of the formula           |
+! | accu2      | double precision | second sum of the formula          |
+! | accu3      | double precision | third sum of the formula           |
+! | tmp_accu1  | double precision | temporary array for the first sum  |
+! | tmp_accu2  | double precision | temporary array for the second sum |
+! | tmp_accu2  | double precision | temporary array for the third sum  |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+! | i,j        | integer          | indexes                            |
+
+! Function:
+! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+
+function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Functions
+  double precision              :: d2_norm_trust_region_omp
+  double precision              :: ddot
+
+  ! Internal
+  double precision              :: accu1,accu2,accu3
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+  integer :: i, j
+  
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0 
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n 
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations
+
+  ! accu1
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  ! accu2
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3
+    endif
+  enddo
+  !$OMP END DO
+ 
+  ! accu3
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2)
+
+  deallocate(tmp_accu1, tmp_accu2, tmp_accu3)
+
+end function
+
+! OMP: Function value of ||x||^2
+
+! *Compute the value of ||x||^2*
+
+! This function computes the value of ||x(lambda)||^2
+
+! \begin{align*}
+! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+
+! Internal:
+! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad   |
+! | tmp_fN     | double precision | temporary array for the function |
+! | i,j        | integer          | indexes                          |
+
+
+function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+  use omp_lib
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+ 
+  ! functions
+  double precision              :: f_norm_trust_region_omp
+ 
+  ! internal
+  double precision, allocatable :: tmp_fN(:)
+  integer                       :: i,j
+
+  ! Allocation
+  allocate(tmp_fN(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp)    &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  f_norm_trust_region_omp = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_fN(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations 
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+       tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+  
+  !$OMP MASTER
+  do i = 1, n
+    f_norm_trust_region_omp =  f_norm_trust_region_omp + tmp_fN(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  deallocate(tmp_fN)
+
+end function
+
+! First derivative of (||x||^2 - Delta^2)^2
+! Version without omp
+
+! *Function to compute the first derivative of ||x||^2 - Delta*
+
+! This function computes the first derivative of (||x||^2 - Delta^2)^2
+! with respect to lambda.
+
+! \begin{align*}
+! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1 | double precision | first sum of the formula               |
+! | accu2 | double precision | second sum of the formula              |
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+! | ddot                 | double precision | blas dot product                                                   |
+
+
+function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i, j
+
+  ! Functions
+  double precision             :: d1_norm_trust_region
+  double precision             :: ddot
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 
+    endif
+  enddo
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+
+  d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2)
+
+end function
+
+! Second derivative of (||x||^2 - Delta^2)^2
+! Version without OMP
+
+! *Function to compute the second derivative of ||x||^2 - Delta*
+
+
+! \begin{equation}
+! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+! \end{equation}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | accu1 | double precision | first sum of the formula               |
+! | accu2 | double precision | second sum of the formula              |
+! | accu3 | double precision | third sum of the formula                |
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2       |
+! | ddot                 | double precision | blas dot product                                               |
+
+
+function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in) :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Functions
+  double precision :: d2_norm_trust_region
+  double precision :: ddot
+
+  ! Internal
+  double precision :: wtg,accu1,accu2,accu3
+  integer :: i, j
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3
+    endif
+  enddo
+
+  d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2)
+
+end function
+
+! Function value of ||x||^2
+! Version without OMP
+
+! *Compute the value of ||x||^2*
+
+! This function computes the value of ||x(lambda)||^2
+
+! \begin{align*}
+! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | f_norm_trust_region | double precision | value of norm(x)^2 |
+! | ddot                | double precision | blas dot product   |
+
+
+
+function f_norm_trust_region(n,e_val,tmp_wtg,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  
+  ! function
+  double precision             :: f_norm_trust_region
+  double precision             :: ddot
+
+  ! internal
+  integer                      :: i,j
+
+  ! Initialization
+  f_norm_trust_region = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then    
+      f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+
+end function
+
+! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version with OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg        | double precision | temporary variable to store W^T.v_grad |
+! | tmp_accu1  | double precision | temporary array for the first sum      |
+! | tmp_accu2  | double precision | temporary array for the second sum     |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+! | i,j        | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Internal
+  double precision              :: accu1, accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+
+  deallocate(tmp_accu1, tmp_accu2)
+ 
+end
+
+! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version with OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg        | double precision | temporary variable to store W^T.v_grad |
+! | tmp_accu1  | double precision | temporary array for the first sum      |
+! | tmp_accu2  | double precision | temporary array for the second sum     |
+! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+! | i,j        | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision :: accu1, accu2, accu3
+  integer          :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+
+  ! Functions
+  double precision :: d2_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+   d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+
+  deallocate(tmp_accu1,tmp_accu2,tmp_accu3)
+ 
+end
+
+! First derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version without OMP
+
+! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+!      - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+!        {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+!   &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+!        \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!       - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+! \end{align*}
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} 
+! \end{align*}
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d1_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+ 
+end
+
+! Second derivative of (1/||x||^2 - 1/Delta^2)^2
+! Version without OMP
+
+! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+! \begin{align*}
+!   \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+!   &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+!   &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+!   - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+! \end{align*}
+
+! \begin{align*}
+! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+! \end{align*}
+
+! Provided:
+! | m_num | integer | number of MOs |
+
+! Input:
+! | n         | integer          | mo_num*(mo_num-1)/2         |
+! | e_val(n)  | double precision | eigenvalues of the hessian  |
+! | W(n,n)    | double precision | eigenvectors of the hessian |
+! | v_grad(n) | double precision | gradient                    |
+! | lambda    | double precision | Lagrange multiplier         |
+! | delta     | double precision | Delta of the trust region   |
+
+! Internal:
+! | wtg   | double precision | temporary variable to store W^T.v_grad |
+! | i,j   | integer          | indexes                                |
+
+! Function:
+! | d2_norm_inverse_trust_region | double precision | value of the first derivative |
+
+
+function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2, accu3
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d2_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4
+    endif
+  enddo
+
+  d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+  
+end
diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org
new file mode 100644
index 00000000..b39c9a10
--- /dev/null
+++ b/src/utils_trust_region/trust_region_optimal_lambda.org
@@ -0,0 +1,1665 @@
+* Newton's method to find the optimal lambda
+
+*Compute the lambda value for the trust region*
+
+This subroutine uses the Newton method in order to find the optimal
+lambda. This constant is added on the diagonal of the hessian to shift
+the eiganvalues. It has a double role:
+- ensure that the resulting hessian is positive definite for the
+  Newton method
+- constrain the step in the trust region, i.e.,
+  $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius
+  of the trust region.
+We search $\lambda$ which minimizes
+\begin{align*}
+  f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2
+\end{align*}
+or
+\begin{align*}
+  \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2
+\end{align*}
+and gives obviously 0 in both cases. \newline
+
+There are several cases:
+- If $\textbf{H}$ is positive definite the interval containing the
+  solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). 
+- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+  \textbf{g} \neq 0$ then the interval containing
+  the solution is  $\lambda \in (-h_1, \infty)$.
+- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ then the interval containing the solution is
+  $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| <
+  10^{-12}$ are not computed, so the term where $i = 1$ is
+  automatically removed and this case becomes similar to the previous one.
+
+So to avoid numerical problems (cf. trust_region) we start the
+algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$,
+with $\epsilon$ a little constant.
+The research must be restricted to the interval containing the
+solution. For that reason a little trust region in 1D is used.  
+
+The Newton method to find the optimal $\lambda$ is :
+\begin{align*}
+  \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\
+\end{align*}
+$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to
+$\lambda$ at the l-th iteration,
+$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to
+$\lambda$ at the l-th iteration.\newline
+
+Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1}
+f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as 
+\begin{align*}
+  y \leq \alpha
+\end{align*}
+with $\alpha$ a scalar representing the trust length (trust region in
+1D) where the function $f$ or $\tilde{f}$ is correctly describe by the
+Taylor series truncated at the second order. Thus, if $y > \alpha$,
+the constraint is applied as  
+\begin{align*}
+  y^* = \alpha \frac{y}{|y|}
+\end{align*}
+with $y^*$ the solution in the trust region. 
+
+The size of the trust region evolves in function of $\rho$ as for the
+trust region seen previously cf. trust_region, rho_model.
+The prediction of the value of $f$ or $\tilde{f}$ is done using the
+Taylor series truncated at the second order cf. "trust_region",
+"trust_e_model". 
+
+The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 -
+\Delta^2)^2$ with respect to $\lambda$ are:
+\begin{align*}
+  \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+  = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+  \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+\begin{align*}
+\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{align*}
+
+The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 -
+1/\Delta^2)^2$ with respect to $\lambda$ are:
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}
+   {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}
+   {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+Provided in qp_edit:
+| thresh_rho_2          |
+| thresh_cc             |
+| nb_it_max_lambda      |
+| version_lambda_search |
+| nb_it_max_pre_search  |
+see qp_edit for more details
+
+Input:
+| n          | integer          | m*(m-1)/2                  |
+| e_val(n)   | double precision | eigenvalues of the hessian |
+| tmp_wtg(n) | double precision | w_i^T.v_grad(i)            |
+| delta      | double precision | delta for the trust region |
+
+Output:
+| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step |
+|        |                  | lambda > 0                                                               |
+
+Internal:
+| d1_N        | double precision | value of d1_norm_trust_region                                        |
+| d2_N        | double precision | value of d2_norm_trust_region                                        |
+| f_N         | double precision | value of f_norm_trust_region                                         |
+| prev_f_N    | double precision | previous value of f_norm_trust_region                                |
+| f_R         | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2               |
+| prev_f_R    | double precision | previous value of f_R                                                |
+| model       | double precision | predicted value of f_R from prev_f_R and y                           |
+| d_1         | double precision | value of the first derivative                                        |
+| d_2         | double precision | value of the second derivative                                       |
+| y           | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda               |
+| prev_lambda | double precision | previous value of lambda                                             |
+| t1,t2,t3    | double precision | wall time                                                            |
+| i           | integer          | index                                                                |
+| epsilon     | double precision | little constant to avoid numerical problem                           |
+| rho_2       | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R |
+| version     | integer          | version of the root finding method                                   |
+
+Function:
+| d1_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+| d2_norm_trust_region         | double precision | first derivative with respect to lambda of  (norm(x)^2 - Delta^2)^2     |
+| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of  (1/norm(x)^2 - 1/Delta^2)^2 |
+| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 |
+| f_norm_trust_region          | double precision | value of norm(x)^2                                                      |
+
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Research the optimal lambda to constrain the step size in the trust region
+  END_DOC
+
+  implicit none
+  
+  ! Variables
+  
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(inout) :: e_val(n)
+  double precision, intent(in)    :: delta
+  double precision, intent(in)    :: tmp_wtg(n)
+
+  ! out
+  double precision, intent(out)   :: lambda
+
+  ! Internal
+  double precision                :: d1_N, d2_N, f_N, prev_f_N
+  double precision                :: prev_f_R, f_R
+  double precision                :: model
+  double precision                :: d_1, d_2
+  double precision                :: t1,t2,t3
+  integer                         :: i
+  double precision                :: epsilon
+  double precision                :: y
+  double precision                :: prev_lambda
+  double precision                :: rho_2
+  double precision                :: alpha
+  integer                         :: version
+
+  ! Functions
+  double precision                :: d1_norm_trust_region,d1_norm_trust_region_omp
+  double precision                :: d2_norm_trust_region, d2_norm_trust_region_omp
+  double precision                :: f_norm_trust_region, f_norm_trust_region_omp
+  double precision                :: d1_norm_inverse_trust_region
+  double precision                :: d2_norm_inverse_trust_region
+  double precision                :: d1_norm_inverse_trust_region_omp
+  double precision                :: d2_norm_inverse_trust_region_omp
+
+  print*,''
+  print*,'---Trust_newton---'
+  print*,''
+
+  call wall_time(t1)
+
+  ! version_lambda_search
+  ! 1 -> ||x||^2 - delta^2 = 0,
+  ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better)
+  if (version_lambda_search == 1) then
+    print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0'
+  else
+    print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0'
+  endif
+  ! Version 2 is normally better
+#+END_SRC
+
+Resolution with the Newton method:
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+  ! Initialization
+  epsilon = 1d-4
+  lambda =MAX(0d0, -e_val(1))
+  
+  ! Pre research of lambda to start near the optimal lambda
+  ! by adding a constant epsilon and changing the constant to
+  ! have ||x(lambda + epsilon)|| ~ delta, before setting
+  ! lambda = lambda + epsilon 
+  print*, 'Pre research of lambda:'
+  print*,'Initial lambda =', lambda
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+  print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta 
+  i = 1
+  
+  ! To increase lambda
+  if (f_N > delta**2) then
+    print*,'Increasing lambda...'
+    do while (f_N > delta**2 .and. i <= nb_it_max_pre_search)
+
+      ! Update the previous norm
+      prev_f_N = f_N
+      ! New epsilon
+      epsilon = epsilon * 2d0
+      ! New norm
+      f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+      print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+      
+      ! Security
+      if (prev_f_N < f_N) then
+        print*,'WARNING, error: prev_f_N < f_N, exit'
+        epsilon = epsilon * 0.5d0
+        i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  
+  ! To reduce lambda
+  else
+     print*,'Reducing lambda...'
+     do while (f_N < delta**2 .and. i <= nb_it_max_pre_search)
+
+       ! Update the previous norm
+       prev_f_N = f_N  
+       ! New epsilon
+       epsilon = epsilon * 0.5d0
+       ! New norm
+       f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon)
+
+       print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta
+
+       ! Security
+       if (prev_f_N > f_N) then
+         print*,'WARNING, error: prev_f_N > f_N, exit'
+         epsilon = epsilon * 2d0
+         i = nb_it_max_pre_search + 1
+      endif
+
+      i = i + 1
+    enddo
+  endif
+
+  print*,'End of the pre research of lambda'
+  
+  ! New value of lambda
+  lambda = lambda + epsilon
+
+  print*, 'e_val(1):', e_val(1)
+  print*, 'Staring point, lambda =', lambda
+  
+  ! thresh_cc, threshold for the research of the optimal lambda
+  ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc
+  ! thresh_rho_2, threshold to cancel the step in the research
+  ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2
+  print*,'Threshold for the CC:', thresh_cc
+  print*,'Threshold for rho_2:', thresh_rho_2  
+
+  print*, 'w_1^T . g =', tmp_wtg(1)
+
+  ! Debug
+  !if (debug) then
+  !    print*, 'Iteration    rho_2    lambda    delta  ||x||  |1-(||x||^2/delta^2)|'
+  !endif
+
+  ! Initialization  
+  i = 1
+  f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2
+  model = 0d0           ! predicted value of (||x||^2 - delta^2)^2
+  prev_f_N = 0d0    ! previous value of ||x||^2
+  prev_f_R = 0d0    ! previous value of (||x||^2 - delta^2)^2
+  f_R = 0d0         ! value of (||x||^2 - delta^2)^2
+  rho_2 = 0d0       ! (prev_f_R - f_R)/(prev_f_R - m)
+  y = 0d0           ! step size
+  prev_lambda = 0d0 ! previous lambda
+
+    ! Derivatives
+    if (version_lambda_search == 1) then
+      d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+      d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+    else
+      d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+    endif
+
+    ! Trust length
+    alpha = DABS((1d0/d_2)*d_1)
+
+    ! Newton's method
+    do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc)
+      print*,'--------------------------------------'
+      print*,'Research of lambda, iteration:', i
+      print*,'--------------------------------------'
+
+      ! Update of f_N, f_R and the derivatives
+      prev_f_N = f_N 
+      if (version_lambda_search == 1) then
+        prev_f_R = (prev_f_N - delta**2)**2
+        d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2
+        d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2
+      else
+        prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2
+        d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+        d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2
+      endif
+      write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2  
+
+      ! Newton's step
+      y = -(1d0/DABS(d_2))*d_1
+
+      ! Constraint on y (the newton step)
+      if (DABS(y) > alpha) then
+        y = alpha * (y/DABS(y)) ! preservation of the sign of y
+      endif
+      write(*,'(a,E12.5)') ' Step length: ', y
+
+      ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series
+      model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2    
+
+      ! Updates lambda
+      prev_lambda = lambda
+      lambda = prev_lambda + y
+      print*,'prev lambda:', prev_lambda
+      print*,'new lambda:', lambda
+
+      ! Checks if lambda is in (-h_1, \infty)
+      if (lambda > MAX(0d0, -e_val(1))) then
+        ! New value of ||x(lambda)||^2
+        f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+        ! New f_R
+        if (version_lambda_search == 1) then
+          f_R = (f_N - delta**2)**2          ! new value of (||x(lambda)||^2 - delta^2)^2
+        else
+          f_R = (1d0/f_N - 1d0/delta**2)**2  ! new value of (1/||x(lambda)||^2 -1/delta^2)^2
+        endif
+        
+        if (version_lambda_search == 1) then
+          print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R
+          print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R
+          print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model
+        else
+          print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R
+          print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R
+          print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model
+        endif
+
+        print*,'previous - actual:', prev_f_R - f_R
+        print*,'previous - model:', prev_f_R - model
+
+        ! Check the gain
+        if (DABS(prev_f_R - model) < thresh_model_2) then
+          print*,''
+          print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity'
+          print*,''
+        endif        
+
+        ! Will be deleted
+        !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then
+        !  print*,''
+        !  print*,'WARNING: ABS(previous - model) <= 1d-16, exit'
+        !  print*,''
+        !  exit
+        !endif
+
+        ! Computes rho_2
+        rho_2 = (prev_f_R - f_R)/(prev_f_R - model)
+        print*,'rho_2:', rho_2               
+      else
+        rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty)
+        print*,'lambda < -e_val(1) ===> rho_2 = 0'
+      endif
+
+      ! Evolution of the trust length, alpha
+      if (rho_2 >= 0.75d0) then
+        alpha = 2d0 * alpha
+      elseif (rho_2 >= 0.5d0) then
+        alpha = alpha
+      elseif (rho_2 >= 0.25d0) then
+        alpha = 0.5d0 * alpha
+      else 
+        alpha = 0.25d0 * alpha
+      endif
+      write(*,'(a,E12.5)') ' New trust length alpha: ', alpha
+
+      ! cancellaion of the step if rho < 0.1
+      if (rho_2 < thresh_rho_2) then !0.1d0) then
+        lambda = prev_lambda
+        f_N = prev_f_N
+        print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda'
+      endif
+
+      print*,''
+      print*,'lambda, ||x||, delta:'
+      print*, lambda, dsqrt(f_N), delta
+      print*,'CC:', DABS(1d0 - f_N/delta**2)
+      print*,''
+      
+      i = i + 1
+    enddo
+
+  ! if trust newton failed
+  if (i > nb_it_max_lambda) then
+    print*,''
+    print*,'######################################################'
+    print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton'
+    print*,'The research of the optimal lambda has failed'
+    print*,'######################################################'
+    print*,''
+  endif
+
+  print*,'Number of iterations :', i
+  print*,'Value of lambda :', lambda
+  print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2
+  print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2
+  print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2
+
+  ! Time
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_newton:', t3
+
+  print*,'' 
+  print*,'---End trust_newton---'
+  print*,''
+
+end subroutine
+#+END_SRC
+
+* OMP: First derivative of (||x||^2 - Delta^2)^2
+
+*Function to compute the first derivative of (||x||^2 - Delta^2)^2*
+
+This function computes the first derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+
+\begin{align*}
+\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right)
+\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+
+\begin{align*}
+  \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\
+  \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| mo_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1      | double precision | first sum of the formula           |
+| accu2      | double precision | second sum of the formula          |
+| tmp_accu1  | double precision | temporary array for the first sum  |
+| tmp_accu2  | double precision | temporary array for the second sum |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+| i,j        | integer          | indexes                            |
+
+Function:
+| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+   
+  ! Internal
+  double precision              :: wtg,accu1,accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2)  &
+      !$OMP DEFAULT(NONE)
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+ 
+  !$OMP MASTER
+  do i = 1, n 
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig) then
+      tmp_accu2(i) =  tmp_wtg(i)**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2)
+
+  deallocate(tmp_accu1, tmp_accu2)
+
+end function
+#+END_SRC
+
+* OMP: Second derivative of (||x||^2 - Delta^2)^2
+
+*Function to compute the second derivative of (||x||^2 - Delta^2)^2*
+
+This function computes the second derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+\begin{align*}
+\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{align*}
+
+\begin{align*}
+  \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+  \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+  \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} 
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1      | double precision | first sum of the formula           |
+| accu2      | double precision | second sum of the formula          |
+| accu3      | double precision | third sum of the formula           |
+| tmp_accu1  | double precision | temporary array for the first sum  |
+| tmp_accu2  | double precision | temporary array for the second sum |
+| tmp_accu2  | double precision | temporary array for the third sum  |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad     |
+| i,j        | integer          | indexes                            |
+
+Function:
+| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+  
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Functions
+  double precision              :: d2_norm_trust_region_omp
+  double precision              :: ddot
+
+  ! Internal
+  double precision              :: accu1,accu2,accu3
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+  integer :: i, j
+  
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0 
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n 
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations
+
+  ! accu1
+  !$OMP DO
+  do i = 1, n
+    if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo
+  !$OMP END MASTER
+
+  ! accu2
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3
+    endif
+  enddo
+  !$OMP END DO
+ 
+  ! accu3
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2)
+
+  deallocate(tmp_accu1, tmp_accu2, tmp_accu3)
+
+end function
+#+END_SRC
+
+* OMP: Function value of ||x||^2
+
+*Compute the value of ||x||^2*
+
+This function computes the value of ||x(lambda)||^2
+
+\begin{align*}
+||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+
+Internal:
+| tmp_wtg(n) | double precision | temporary array for W^T.v_grad   |
+| tmp_fN     | double precision | temporary array for the function |
+| i,j        | integer          | indexes                          |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+
+  use omp_lib
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+ 
+  ! functions
+  double precision              :: f_norm_trust_region_omp
+ 
+  ! internal
+  double precision, allocatable :: tmp_fN(:)
+  integer                       :: i,j
+
+  ! Allocation
+  allocate(tmp_fN(n))
+
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp)    &
+      !$OMP DEFAULT(NONE)
+
+  ! Initialization
+
+  !$OMP MASTER
+  f_norm_trust_region_omp = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    tmp_fN(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  ! Calculations 
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+       tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  !$OMP END DO
+  
+  !$OMP MASTER
+  do i = 1, n
+    f_norm_trust_region_omp =  f_norm_trust_region_omp + tmp_fN(i)
+  enddo
+  !$OMP END MASTER
+
+  !$OMP END PARALLEL
+
+  deallocate(tmp_fN)
+
+end function
+#+END_SRC
+
+* First derivative of (||x||^2 - Delta^2)^2
+Version without omp
+
+*Function to compute the first derivative of ||x||^2 - Delta*
+
+This function computes the first derivative of (||x||^2 - Delta^2)^2
+with respect to lambda.
+
+\begin{align*}
+\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)
+\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right)
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1 | double precision | first sum of the formula               |
+| accu2 | double precision | second sum of the formula              |
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 |
+| ddot                 | double precision | blas dot product                                                   |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i, j
+
+  ! Functions
+  double precision             :: d1_norm_trust_region
+  double precision             :: ddot
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 
+    endif
+  enddo
+
+  do i = 1, n
+    wtg = 0d0
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 
+    endif
+  enddo
+
+  d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2)
+
+end function
+#+END_SRC
+
+* Second derivative of (||x||^2 - Delta^2)^2
+Version without OMP
+
+*Function to compute the second derivative of ||x||^2 - Delta*
+
+
+\begin{equation}
+\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 
+= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right]
+\end{equation}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| accu1 | double precision | first sum of the formula               |
+| accu2 | double precision | second sum of the formula              |
+| accu3 | double precision | third sum of the formula                |
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2       |
+| ddot                 | double precision | blas dot product                                               |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in) :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Functions
+  double precision :: d2_norm_trust_region
+  double precision :: ddot
+
+  ! Internal
+  double precision :: wtg,accu1,accu2,accu3
+  integer :: i, j
+
+  ! Initialization
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      !wtg = ddot(n,w(:,i),1,v_grad,1)
+      accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3
+    endif
+  enddo
+
+  d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2)
+
+end function
+#+END_SRC
+
+* Function value of ||x||^2
+Version without OMP
+
+*Compute the value of ||x||^2*
+
+This function computes the value of ||x(lambda)||^2
+
+\begin{align*}
+||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| f_norm_trust_region | double precision | value of norm(x)^2 |
+| ddot                | double precision | blas dot product   |
+
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function f_norm_trust_region(n,e_val,tmp_wtg,lambda)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute ||x(lambda)||^2
+  END_DOC
+  
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  
+  ! function
+  double precision             :: f_norm_trust_region
+  double precision             :: ddot
+
+  ! internal
+  integer                      :: i,j
+
+  ! Initialization
+  f_norm_trust_region = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then    
+      f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+
+end function
+#+END_SRC
+
+* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2
+Version with OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg        | double precision | temporary variable to store W^T.v_grad |
+| tmp_accu1  | double precision | temporary array for the first sum      |
+| tmp_accu2  | double precision | temporary array for the second sum     |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+| i,j        | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: e_val(n)
+  double precision, intent(in)  :: tmp_wtg(n)
+  double precision, intent(in)  :: lambda
+  double precision, intent(in)  :: delta
+
+  ! Internal
+  double precision              :: accu1, accu2
+  integer                       :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:)
+
+  ! Functions
+  double precision              :: d1_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+!  !$OMP MASTER
+!  do i = 1, n
+!    if (ABS(e_val(i)+lambda) > 1d-12) then
+!      tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3
+!    endif
+!  enddo
+!  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+  d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+
+  deallocate(tmp_accu1, tmp_accu2)
+ 
+end
+#+END_SRC
+
+* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2
+Version with OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg        | double precision | temporary variable to store W^T.v_grad |
+| tmp_accu1  | double precision | temporary array for the first sum      |
+| tmp_accu2  | double precision | temporary array for the second sum     |
+| tmp_wtg(n) | double precision | temporary array for W^t.v_grad         |
+| i,j        | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta)
+
+  use omp_lib
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: tmp_wtg(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision :: accu1, accu2, accu3
+  integer          :: i,j
+  double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:)
+
+  ! Functions
+  double precision :: d2_norm_inverse_trust_region_omp
+
+  ! Allocation
+  allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n))
+
+  ! OMP
+  call omp_set_max_active_levels(1)
+
+  ! OMP 
+  !$OMP PARALLEL                                         &
+      !$OMP PRIVATE(i,j)                                 &
+      !$OMP SHARED(n,lambda, e_val, thresh_eig,&
+      !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg,    &
+      !$OMP accu1, accu2, accu3)                         &
+      !$OMP DEFAULT(NONE)
+  
+  !$OMP MASTER
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+  !$OMP END MASTER
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu1(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO 
+  do i = 1, n
+    tmp_accu2(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    tmp_accu3(i) = 0d0
+  enddo
+  !$OMP END DO
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu1(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**2 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu1 = accu1 + tmp_accu1(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu2(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**3 
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu2 = accu2 + tmp_accu2(i)
+  enddo  
+  !$OMP END MASTER
+
+  !$OMP DO
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      tmp_accu3(i) = tmp_wtg(i)**2 /  (e_val(i) + lambda)**4
+    endif
+  enddo
+  !$OMP END DO
+
+  !$OMP MASTER
+  do i = 1, n
+    accu3 = accu3 + tmp_accu3(i)
+  enddo  
+  !$OMP END MASTER
+  
+  !$OMP END PARALLEL
+
+  call omp_set_max_active_levels(4)
+
+   d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+
+  deallocate(tmp_accu1,tmp_accu2,tmp_accu3)
+ 
+end
+#+END_SRC
+
+* First derivative of (1/||x||^2 - 1/Delta^2)^2
+Version without OMP
+
+*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} 
+     - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}}
+       {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\
+  &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}
+       \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+      - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right)
+\end{align*}
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} 
+\end{align*}
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d1_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d1_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2))
+ 
+end
+#+END_SRC
+
+* Second derivative of (1/||x||^2 - 1/Delta^2)^2
+Version without OMP
+
+*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2*
+
+This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2
+
+\begin{align*}
+  \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 
+  &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} 
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\
+  &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3}
+  - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right]
+\end{align*}
+
+\begin{align*}
+\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\
+\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\
+\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4}
+\end{align*}
+
+Provided:
+| m_num | integer | number of MOs |
+
+Input:
+| n         | integer          | mo_num*(mo_num-1)/2         |
+| e_val(n)  | double precision | eigenvalues of the hessian  |
+| W(n,n)    | double precision | eigenvectors of the hessian |
+| v_grad(n) | double precision | gradient                    |
+| lambda    | double precision | Lagrange multiplier         |
+| delta     | double precision | Delta of the trust region   |
+
+Internal:
+| wtg   | double precision | temporary variable to store W^T.v_grad |
+| i,j   | integer          | indexes                                |
+
+Function:
+| d2_norm_inverse_trust_region | double precision | value of the first derivative |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f
+function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2
+  END_DOC
+
+  implicit none
+
+  ! Variables
+  
+  ! in
+  integer, intent(in)          :: n
+  double precision, intent(in) :: e_val(n)
+  double precision, intent(in) :: w(n,n)
+  double precision, intent(in) :: v_grad(n)
+  double precision, intent(in) :: lambda
+  double precision, intent(in) :: delta
+
+  ! Internal
+  double precision             :: wtg, accu1, accu2, accu3
+  integer                      :: i,j
+
+  ! Functions
+  double precision             :: d2_norm_inverse_trust_region
+  
+  accu1 = 0d0
+  accu2 = 0d0
+  accu3 = 0d0
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2
+    endif
+  enddo
+  
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3
+    endif
+  enddo
+
+  do i = 1, n
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      wtg = 0d0
+      do j = 1, n
+        wtg = wtg + w(j,i) * v_grad(j)
+      enddo
+      accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4
+    endif
+  enddo
+
+  d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) &
+    - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2)
+  
+end
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f
new file mode 100644
index 00000000..45738736
--- /dev/null
+++ b/src/utils_trust_region/trust_region_rho.irp.f
@@ -0,0 +1,121 @@
+! Agreement with the model: Rho
+
+! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+! Rho represents the agreement between the model (the predicted energy
+! by the Taylor expansion truncated at the 2nd order) and the real
+! energy : 
+
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+! With :
+! $E^{k}$ the energy at the previous iteration
+! $E^{k+1}$ the energy at the actual iteration
+! $m^{k+1}$ the predicted energy for the actual iteration
+! (cf. trust_e_model)
+
+! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+! If $\rho \leq 0$ the previous energy is lower than the actual 
+! energy. We have to cancel the last step and use a smaller trust
+! region.
+! Here we cancel the last step if $\rho < 0.1$, because even if
+! the energy decreases, the agreement is bad, i.e., the Taylor expansion
+! truncated at the second order doesn't represent correctly the energy
+! landscape. So it's better to cancel the step and restart with a
+! smaller trust region.
+
+! Provided in qp_edit:
+! | thresh_rho |
+
+! Input:
+! | prev_energy | double precision | previous energy (energy before the rotation) |
+! | e_model     | double precision | predicted energy after the rotation          |
+
+! Output:
+! | rho         | double precision | the agreement between the model (predicted) and the real energy |
+! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+! |             |                  | else the previous energy doesn't change                         |
+
+! Internal:
+! | energy | double precision | energy (real) after the rotation |
+! | i      | integer          | index                            |
+! | t*     | double precision | time                             |
+
+
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+
+! Rho
+! \begin{equation}
+! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+! \end{equation}
+
+! In function of $\rho$ th step can be accepted or cancelled.
+
+! If we cancel the last step (k+1), the previous energy (k) doesn't
+! change!
+! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+
+! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org
new file mode 100644
index 00000000..9b25ee29
--- /dev/null
+++ b/src/utils_trust_region/trust_region_rho.org
@@ -0,0 +1,123 @@
+* Agreement with the model: Rho
+
+*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
+
+Rho represents the agreement between the model (the predicted energy
+by the Taylor expansion truncated at the 2nd order) and the real
+energy : 
+
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+With :
+$E^{k}$ the energy at the previous iteration
+$E^{k+1}$ the energy at the actual iteration
+$m^{k+1}$ the predicted energy for the actual iteration
+(cf. trust_e_model)
+
+If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
+If $\rho \leq 0$ the previous energy is lower than the actual 
+energy. We have to cancel the last step and use a smaller trust
+region.
+Here we cancel the last step if $\rho < 0.1$, because even if
+the energy decreases, the agreement is bad, i.e., the Taylor expansion
+truncated at the second order doesn't represent correctly the energy
+landscape. So it's better to cancel the step and restart with a
+smaller trust region.
+
+Provided in qp_edit:
+| thresh_rho |
+
+Input:
+| prev_energy | double precision | previous energy (energy before the rotation) |
+| e_model     | double precision | predicted energy after the rotation          |
+
+Output:
+| rho         | double precision | the agreement between the model (predicted) and the real energy |
+| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy     |
+|             |                  | else the previous energy doesn't change                         |
+
+Internal:
+| energy | double precision | energy (real) after the rotation |
+| i      | integer          | index                            |
+| t*     | double precision | time                             |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+subroutine trust_region_rho(prev_energy, energy,e_model,rho)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute rho, the agreement between the predicted criterion/energy and the real one
+  END_DOC
+
+  implicit none
+   
+  ! Variables
+
+  ! In
+  double precision, intent(inout) :: prev_energy
+  double precision, intent(in)    :: e_model, energy
+  
+  ! Out
+  double precision, intent(out)   :: rho
+
+  ! Internal
+  double precision                :: t1, t2, t3
+  integer                         :: i
+
+  print*,''
+  print*,'---Rho_model---'
+  
+  call wall_time(t1)
+#+END_SRC
+
+** Rho
+\begin{equation}
+\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
+\end{equation}
+
+In function of $\rho$ th step can be accepted or cancelled.
+
+If we cancel the last step (k+1), the previous energy (k) doesn't
+change!
+If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
+  ! Already done in an other subroutine
+  !if (ABS(prev_energy - e_model) < 1d-12) then
+  !  print*,'WARNING: prev_energy - e_model < 1d-12'
+  !  print*,'=> rho will tend toward infinity'
+  !  print*,'Check you convergence criterion !'
+  !endif
+
+  rho = (prev_energy - energy) / (prev_energy - e_model)
+
+  print*, 'previous energy, prev_energy :', prev_energy
+  print*, 'predicted energy, e_model :', e_model
+  print*, 'real energy, energy :', energy
+  print*, 'prev_energy - energy :', prev_energy - energy
+  print*, 'prev_energy - e_model :', prev_energy - e_model
+  print*, 'Rho :', rho
+  print*, 'Threshold for rho:', thresh_rho
+
+  ! Modification of prev_energy in function of rho
+  if (rho < thresh_rho) then !0.1) then
+    ! the step is cancelled  
+    print*, 'Rho <', thresh_rho,', the previous energy does not changed'
+    print*, 'prev_energy :', prev_energy  
+  else
+    ! the step is accepted
+    prev_energy = energy
+    print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
+  endif
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in rho model:', t3
+
+  print*,'---End rho_model---'
+  print*,''
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f
new file mode 100644
index 00000000..42aa6ed4
--- /dev/null
+++ b/src/utils_trust_region/trust_region_step.irp.f
@@ -0,0 +1,716 @@
+! Trust region
+
+! *Compute the next step with the trust region algorithm*
+
+! The Newton method is an iterative method to find a minimum of a given
+! function. It uses a Taylor series truncated at the second order of the
+! targeted function and gives its minimizer. The minimizer is taken as
+! the new position and the same thing is done. And by doing so
+! iteratively the method find a minimum, a local or global one depending
+! of the starting point and the convexity/nonconvexity of the targeted
+! function.  
+
+! The goal of the trust region is to constrain the step size of the
+! Newton method in a certain area around the actual position, where the 
+! Taylor series is a good approximation of the targeted function. This
+! area is called the "trust region".
+
+! In addition, in function of the agreement between the Taylor
+! development of the energy and the real energy, the size of the trust
+! region will be updated at each iteration. By doing so, the step sizes
+! are not too larges. In addition, since we add a criterion to cancel the
+! step if the energy increases (more precisely if rho < 0.1), so it's
+! impossible to diverge. \newline
+
+! References: \newline
+! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+! ISBN: 978-0-387-40065-5 \newline
+
+! By using the first and the second derivatives, the Newton method gives
+! a step:
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+!   \textbf{g}_{(k)}
+! \end{align*}
+! which leads to the minimizer of the Taylor series.
+! !!! Warning: the Newton method gives the minimizer if and only if
+! $\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+! \begin{align*}
+!   ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+! \end{align*}
+! which is equivalent to 
+! \begin{align*}
+!   \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+! \end{align*}
+
+! with: \newline
+! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+! size n) \newline
+! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+! matrix) \newline
+! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+! size n) \newline
+! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+! \newline
+
+! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+! hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+! $\textbf{H}$ is positive definite, the
+! solution is the step given by the Newton method
+! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+! Else we have to constrain the step size. For simplicity we will remove
+! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+! to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+! Starting from the Taylor series of a function E (here, the energy)
+! truncated at the 2nd order, we have:
+! \begin{align*}
+!   E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+!   \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+!   \mathcal{O}(\textbf{x}^2)
+! \end{align*} 
+
+! With the constraint on the norm of $\textbf{x}$ we can write the
+! Lagrangian
+! \begin{align*}
+!   \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+!   + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+!   + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+! \end{align*}
+! Where: \newline
+! $\lambda$ is the Lagrange multiplier \newline
+! $E$ is the energy at the k-th iteration $\Leftrightarrow
+! E(\textbf{x} = \textbf{0})$ \newline
+
+! To solve this equation, we search a stationary point where the first
+! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+! \end{align*}
+
+! The derivative is:
+! \begin{align*}
+!   \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+!   = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+! \end{align*}
+
+! So, we search $\textbf{x}$ such as:
+! \begin{align*}
+! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+! \end{align*}
+
+! We can rewrite that as:
+! \begin{align*}
+!   \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+!   = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+! \end{align*}
+! with $\textbf{I}$ is the identity matrix. 
+
+! By doing so, the solution is:
+! \begin{align*}
+!   (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+! \end{align*}
+! \begin{align*}
+!   \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+! with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+! We have to solve this previous equation to find this $\textbf{x}$ in the
+! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+! just a one dimension problem because we can express $\textbf{x}$ as a
+! function of $\lambda$: 
+! \begin{align*}
+!   \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+! \end{align*}
+
+! We start from the fact that the hessian is diagonalizable. So we have: 
+! \begin{align*}
+!   \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+! \end{align*}
+! with: \newline
+! $\textbf{H}$, the hessian matrix \newline
+! $\textbf{W}$, the matrix containing the eigenvectors \newline
+! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+! $h_i$, the i-th eigenvalue in ascending order \newline
+
+! Now we use the fact that adding a constant on the diagonal just shifts
+! the eigenvalues:
+! \begin{align*}
+!   \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+!   +\textbf{I} \lambda) \cdot \textbf{W}^T
+! \end{align*}
+
+! By doing so we can express $\textbf{x}$ as a function of $\lambda$
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+! with $\lambda \neq - h_i$.
+
+! An interesting thing in our case is the norm of $\textbf{x}$,
+! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+! \begin{align*}
+!   ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+!   \textbf{g})^2}{(h_i + \lambda)^2}
+! \end{align*}
+
+! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+! And if we study the properties of this function we see that: 
+! \begin{align*}
+!   \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+! \end{align*}
+! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+! \begin{align*}
+!   \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+! \end{align*}
+
+! From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+! strictly decreasing function on the interval $\lambda \in
+! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+! solution. 
+
+! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+! Newton method is only defined for a positive definite hessian matrix,
+! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+! definite. Consequently, in the case where $\textbf{H}$ is not positive
+! definite, to ensure the positive definiteness, $\lambda$ must be
+! greater than $- h_1$.
+! \begin{align*}
+!   \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+! \end{align*} 
+
+! From that there are five cases:
+! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+!   \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+!   \lambda)$ 
+!   must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+!   \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+!   $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+!   positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+!   similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+!   0)|| \leq \Delta$
+!   but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+
+! Thus to find the solution, we can write: 
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| = \Delta
+! \end{align*}
+! \begin{align*}
+!   ||\textbf{x}(\lambda)|| - \Delta = 0
+! \end{align*}
+
+! Taking the square of this equation
+! \begin{align*}
+!   (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+! \end{align*}
+! we have a function with one minimum for the optimal $\lambda$.
+! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+! \begin{align*}
+!   (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+! \end{align*}
+
+! But in practice, it is more effective to solve:
+! \begin{align*}
+!   (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+! \end{align*}
+
+! To do that, we just use the Newton method with "trust_newton" using
+! first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+! \Delta^2)^2$ with respect to $\textbf{x}$.
+! This will give the optimal $\lambda$ to compute the
+! solution $\textbf{x}$ with the formula seen previously:
+! \begin{align*}
+!   \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!   \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+
+
+
+! Evolution of the trust region
+
+! We initialize the trust region at the first iteration using a radius
+! \begin{align*}
+!   \Delta = ||\textbf{x}(\lambda=0)||
+! \end{align*}
+
+! And for the next iteration the trust region will evolves depending of
+! the agreement of the energy prediction based on the Taylor series
+! truncated at the 2nd order and the real energy. If the Taylor series
+! truncated at the 2nd order represents correctly the energy landscape
+! the trust region will be extent else it will be reduced. In order to
+! mesure this agreement we use the ratio rho cf. "rho_model" and
+! "trust_e_model". From that we use the following values:
+! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+! In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+! restarts with a smaller trust region until the energy decreases.
+
+
+
+
+! Summary
+
+! To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+! gradient and the radius of the trust region we can compute the norm of
+! the Newton step  
+! \begin{align*}
+!   ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+!   \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+! \end{align*}
+
+! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+!   $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+!   necessary to put a constraint on $\textbf{x}$, the solution is the
+!   unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+!   the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+!   -h_1)$, similarly to the previous case.
+!   But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+!   time a constant to ensure the condition $||\textbf{x}(\lambda =
+!   -h_1)|| = \Delta$ and escape from the saddle point
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+!   have to search $\lambda \in (-h_1, \infty)$ such as
+!   $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+!   \begin{align*}
+!     (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+!   \end{align*}
+!   or
+!   \begin{align*}
+!     (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+!   \end{align*}
+!   which is numerically more stable. And finally compute 
+!   \begin{align*}
+!     \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+!     \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+!   \end{align*}
+! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+!   do exactly the same thing that the previous case but we search
+!   $\lambda \in (0, \infty)$ 
+! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+!   $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+!   sum), again we do exactly the same thing that the previous case
+!   searching $\lambda \in (-h_1, \infty)$.
+  
+
+! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+! necessary in fact to remove the $j = 1$ in the sum since the term
+! where $h_i - \lambda < 10^{-6}$ are not computed.
+
+! After that, we take this vector $\textbf{x}^*$, called "x", and we do
+! the transformation to an antisymmetric matrix $\textbf{X}$, called
+! m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+! NB: 
+! An improvement can be done using a elleptical trust region.
+
+
+
+
+! Code
+
+! Provided:
+! | mo_num | integer | number of MOs |
+
+! Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+! Input:
+! | m         | integer          | number of MOs                                |
+! | n         | integer          | m*(m-1)/2                                       |
+! | H(n, n)   | double precision | hessian                                         |
+! | v_grad(n) | double precision | gradient                                        |
+! | e_val(n)  | double precision | eigenvalues of the hessian                      |
+! | W(n, n)   | double precision | eigenvectors of the hessian                     |
+! | rho       | double precision | agreement between the model and the reality,    |
+! |           |                  | represents the quality of the energy prediction |
+! | nb_iter   | integer          | number of iteration                             |
+
+! Input/Ouput:
+! | delta | double precision | radius of the trust region |
+
+! Output:
+! | x(n)      | double precision | vector containing the step |
+
+! Internal:
+! | accu          | double precision | temporary variable to compute the step       |
+! | lambda        | double precision | lagrange multiplier                          |
+! | trust_radius2 | double precision | square of the radius of the trust region     |
+! | norm2_x       | double precision | norm^2 of the vector x                       |
+! | norm2_g       | double precision | norm^2 of the vector containing the gradient |
+! | tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+! | i, j, k       | integer          | indexes                                      |
+
+! Function:
+! | dnrm2                   | double precision | Blas function computing the norm       |
+! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+
+! Initialization and norm
+
+! The norm of the step size will be useful for the trust region
+! algorithm. We start from a first guess and the radius of the trust
+! region will evolve during the optimization.
+
+! avoid_saddle is actually a test to avoid saddle points
+
+
+! Initialization of the Lagrange multiplier
+lambda = 0d0
+
+! List of w^T.g, to avoid the recomputation
+tmp_wtg = 0d0
+do j = 1, n
+  do i = 1, n
+    tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+  enddo
+enddo
+
+! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+! in the case of avoid_saddle
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  i = 2
+  ! Number of negative eigenvalues
+  do while (e_val(i) < - thresh_eig)
+    if (tmp_wtg(i) < thresh_wtg2) then
+      if (version_avoid_saddle == 1) then  
+        tmp_wtg(i) = 1d0
+      elseif  (version_avoid_saddle == 2) then  
+        tmp_wtg(i) = DABS(e_val(i))
+      elseif  (version_avoid_saddle == 3) then  
+        tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+      else
+        tmp_wtg(i) = thresh_wtg2
+      endif
+    endif
+    i = i + 1
+  enddo
+
+  ! For the fist one it's a little bit different
+  if (tmp_wtg(1) < thresh_wtg2) then 
+    tmp_wtg(1) = 0d0
+  endif
+
+endif 
+
+! Norm^2 of x, ||x||^2
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+! Anyway if the step is too big it will be reduced
+print*,'||x||^2 :', norm2_x
+
+! Norm^2 of the gradient, ||v_grad||^2
+norm2_g = (dnrm2(n,v_grad,1))**2
+print*,'||grad||^2 :', norm2_g
+
+! Trust radius initialization
+
+!     At the first iteration (nb_iter = 0) we initialize the trust region
+!     with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+!     (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+!     we compute this norm using f_norm_trust_region_omp as explain just
+!     below) 
+
+
+! trust radius
+if (nb_iter == 0) then
+   trust_radius2 = norm2_x 
+   ! To avoid infinite loop of cancellation of this first step
+   ! without changing delta
+   nb_iter = 1
+
+   ! Compute delta, delta = sqrt(trust_radius)
+   delta = dsqrt(trust_radius2)
+endif
+
+! Modification of the trust radius
+
+! In function of rho (which represents the agreement between the model
+! and the reality, cf. rho_model) the trust region evolves. We update
+! delta (the radius of the trust region).
+
+! To avoid too big trust region we put a maximum size.
+
+
+! Modification of the trust radius in function of rho
+if (rho >= 0.75d0) then
+   delta = 2d0 * delta
+elseif (rho >= 0.5d0) then
+   delta = delta
+elseif (rho >= 0.25d0) then
+   delta = 0.5d0 * delta
+else
+   delta = 0.25d0 * delta
+endif
+
+! Maximum size of the trust region
+!if (delta > 0.5d0 * n * pi) then
+!  delta = 0.5d0 * n * pi
+!  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+!endif
+
+if (delta > 1d10) then
+  delta = 1d10
+endif
+
+print*, 'Delta :', delta
+
+! Calculation of the optimal lambda
+
+! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+! - If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+!   $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+!   unconstrained one, $\lambda = 0$
+
+! You will find more details at the beginning
+
+
+! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+! and not (||x||^2 - delta)^2 = 0
+
+! Research of lambda to solve ||x(lambda)|| = Delta 
+
+! Display
+print*, 'e_val(1) = ', e_val(1)
+print*, 'w_1^T.g =', tmp_wtg(1)
+
+! H positive definite 
+if (e_val(1) > - thresh_eig) then
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  print*, '||x(0)||=', dsqrt(norm2_x)
+  print*, 'Delta=', delta
+
+  ! H positive definite, ||x(lambda = 0)|| <= Delta
+  if (dsqrt(norm2_x) <= delta) then 
+    print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+    print*, 'lambda = 0, no lambda optimization'
+    lambda = 0d0
+
+  ! H positive definite, ||x(lambda = 0)|| > Delta
+  else
+    ! Constraint solution
+    print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+    print*,'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+  endif
+
+! H indefinite
+else
+  if (DABS(tmp_wtg(1)) < thresh_wtg) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+    print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+  endif
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+  if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+    ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+    print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+    print*, 'lambda = -e_val(1), no lambda optimization'
+    lambda = - e_val(1)
+
+  ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+  ! and
+  ! H indefinite, w_1^T.g =/= 0
+  else
+    ! Constraint solution/ add lambda
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+       print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+    else
+       print*, 'H indefinite, w_1^T.g =/= 0'
+    endif
+    print*, 'Computation of the optimal lambda...'
+    call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+endif
+
+! Recomputation of the norm^2 of the step x
+norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+print*,''
+print*,'Summary after the trust region:'
+print*,'lambda:', lambda
+print*,'||x||:', dsqrt(norm2_x)
+print*,'delta:', delta
+
+! Calculation of the step x
+
+! x refers to $\textbf{x}^*$
+! We compute x in function of lambda using its formula :
+! \begin{align*}
+! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
+! + \lambda} \cdot \textbf{w}_i
+! \end{align*}
+
+
+! Initialisation
+x = 0d0
+
+! Calculation of the step x
+
+! Normal version
+if (.not. absolute_eig) then
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+      enddo
+    endif
+  enddo
+
+! Version to use the absolute value of the eigenvalues
+else
+
+  do i = 1, n 
+    if (DABS(e_val(i)) > thresh_eig) then
+      do j = 1, n
+        x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+      enddo
+    endif
+  enddo
+
+endif
+
+double precision :: beta, norm_x
+
+! Test
+! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+! eigenvectors multiply by a constant to ensure the condition
+! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+  if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+    ! norm of x
+    norm_x = dnrm2(n,x,1)
+
+    ! Computes the coefficient for the w_1 
+    beta = delta**2 - norm_x**2
+
+    ! Updates the step x
+    x = x + W(:,1) * dsqrt(beta)
+
+    ! Recomputes the norm to check
+    norm_x = dnrm2(n,x,1)
+
+    print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+    print*, '||x||', norm_x 
+  endif
+endif
+
+! Transformation of x
+
+! x is a vector of size n, so it can be write as a m by m
+! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+
+! Deallocation, end
+
+
+deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/trust_region_step.org
new file mode 100644
index 00000000..331453a3
--- /dev/null
+++ b/src/utils_trust_region/trust_region_step.org
@@ -0,0 +1,726 @@
+* Trust region
+
+*Compute the next step with the trust region algorithm*
+
+The Newton method is an iterative method to find a minimum of a given
+function. It uses a Taylor series truncated at the second order of the
+targeted function and gives its minimizer. The minimizer is taken as
+the new position and the same thing is done. And by doing so
+iteratively the method find a minimum, a local or global one depending
+of the starting point and the convexity/nonconvexity of the targeted
+function.  
+
+The goal of the trust region is to constrain the step size of the
+Newton method in a certain area around the actual position, where the 
+Taylor series is a good approximation of the targeted function. This
+area is called the "trust region".
+
+In addition, in function of the agreement between the Taylor
+development of the energy and the real energy, the size of the trust
+region will be updated at each iteration. By doing so, the step sizes
+are not too larges. In addition, since we add a criterion to cancel the
+step if the energy increases (more precisely if rho < 0.1), so it's
+impossible to diverge. \newline
+
+References: \newline
+Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
+https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
+ISBN: 978-0-387-40065-5 \newline
+
+By using the first and the second derivatives, the Newton method gives
+a step:
+\begin{align*}
+  \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot 
+  \textbf{g}_{(k)}
+\end{align*}
+which leads to the minimizer of the Taylor series.
+!!! Warning: the Newton method gives the minimizer if and only if
+$\textbf{H}$ is positive definite, else it leads to a saddle point !!!
+But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
+\begin{align*}
+  ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
+\end{align*}
+which is equivalent to 
+\begin{align*}
+  \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
+\end{align*}
+
+with: \newline
+$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
+size n) \newline
+$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
+matrix) \newline
+$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
+size n) \newline
+$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
+\newline
+
+Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
+hypersphere of radius $\Delta_{(k+1)}$.\newline
+
+So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
+$\textbf{H}$ is positive definite, the
+solution is the step given by the Newton method
+$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
+Else we have to constrain the step size. For simplicity we will remove
+the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
+to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
+Starting from the Taylor series of a function E (here, the energy)
+truncated at the 2nd order, we have:
+\begin{align*}
+  E(\textbf{x}) =  E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
+  \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
+  \mathcal{O}(\textbf{x}^2)
+\end{align*} 
+
+With the constraint on the norm of $\textbf{x}$ we can write the
+Lagrangian
+\begin{align*}
+  \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
+  + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} 
+  + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
+\end{align*}
+Where: \newline
+$\lambda$ is the Lagrange multiplier \newline
+$E$ is the energy at the k-th iteration $\Leftrightarrow
+E(\textbf{x} = \textbf{0})$ \newline
+
+To solve this equation, we search a stationary point where the first
+derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
+\end{align*}
+
+The derivative is:
+\begin{align*}
+  \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+  = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+\end{align*}
+
+So, we search $\textbf{x}$ such as:
+\begin{align*}
+\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
+= \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
+\end{align*}
+
+We can rewrite that as:
+\begin{align*}
+  \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} 
+  = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
+\end{align*}
+with $\textbf{I}$ is the identity matrix. 
+
+By doing so, the solution is:
+\begin{align*}
+  (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
+\end{align*}
+\begin{align*}
+  \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+with $\textbf{x}^T \textbf{x} = \Delta^2$.
+
+We have to solve this previous equation to find this $\textbf{x}$ in the
+trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
+just a one dimension problem because we can express $\textbf{x}$ as a
+function of $\lambda$: 
+\begin{align*}
+  \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
+\end{align*}
+
+We start from the fact that the hessian is diagonalizable. So we have: 
+\begin{align*}
+  \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
+\end{align*}
+with: \newline
+$\textbf{H}$, the hessian matrix \newline
+$\textbf{W}$, the matrix containing the eigenvectors \newline
+$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
+$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
+$h_i$, the i-th eigenvalue in ascending order \newline
+
+Now we use the fact that adding a constant on the diagonal just shifts
+the eigenvalues:
+\begin{align*}
+  \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} 
+  +\textbf{I} \lambda) \cdot \textbf{W}^T
+\end{align*}
+
+By doing so we can express $\textbf{x}$ as a function of $\lambda$
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot 
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+with $\lambda \neq - h_i$.
+
+An interesting thing in our case is the norm of $\textbf{x}$,
+because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
+the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
+\begin{align*}
+  ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot 
+  \textbf{g})^2}{(h_i + \lambda)^2}
+\end{align*}
+
+So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. 
+And if we study the properties of this function we see that: 
+\begin{align*}
+  \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
+\end{align*}
+and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: 
+\begin{align*}
+  \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
+\end{align*}
+
+From these limits and knowing that $h_1$ is the lowest eigenvalue, we
+can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
+strictly decreasing function on the interval $\lambda \in
+(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
+gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
+solution. 
+
+Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
+\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
+$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
+Newton method is only defined for a positive definite hessian matrix,
+so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
+definite. Consequently, in the case where $\textbf{H}$ is not positive
+definite, to ensure the positive definiteness, $\lambda$ must be
+greater than $- h_1$.
+\begin{align*}
+  \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
+\end{align*} 
+
+From that there are five cases:
+- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
+- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
+  \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
+  \lambda)$ 
+  must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
+- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
+  \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
+  $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
+  positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
+  similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
+  0)|| \leq \Delta$
+  but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+
+Thus to find the solution, we can write: 
+\begin{align*}
+  ||\textbf{x}(\lambda)|| = \Delta
+\end{align*}
+\begin{align*}
+  ||\textbf{x}(\lambda)|| - \Delta = 0
+\end{align*}
+
+Taking the square of this equation
+\begin{align*}
+  (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
+\end{align*}
+we have a function with one minimum for the optimal $\lambda$.
+Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
+\begin{align*}
+  (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+\end{align*}
+
+But in practice, it is more effective to solve:
+\begin{align*}
+  (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+\end{align*}
+
+To do that, we just use the Newton method with "trust_newton" using
+first and second derivative of $(||\textbf{x}(\lambda)||^2 -
+\Delta^2)^2$ with respect to $\textbf{x}$.
+This will give the optimal $\lambda$ to compute the
+solution $\textbf{x}$ with the formula seen previously:
+\begin{align*}
+  \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+  \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
+step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Evolution of the trust region
+
+We initialize the trust region at the first iteration using a radius
+\begin{align*}
+  \Delta = ||\textbf{x}(\lambda=0)||
+\end{align*}
+
+And for the next iteration the trust region will evolves depending of
+the agreement of the energy prediction based on the Taylor series
+truncated at the 2nd order and the real energy. If the Taylor series
+truncated at the 2nd order represents correctly the energy landscape
+the trust region will be extent else it will be reduced. In order to
+mesure this agreement we use the ratio rho cf. "rho_model" and
+"trust_e_model". From that we use the following values:
+- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
+- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, 
+- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, 
+- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
+
+In addition, if $\rho < 0.1$ the iteration is cancelled, so it
+restarts with a smaller trust region until the energy decreases.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Summary
+
+To summarize, knowing the hessian (eigenvectors and eigenvalues), the
+gradient and the radius of the trust region we can compute the norm of
+the Newton step  
+\begin{align*}
+  ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n 
+  \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
+\end{align*}
+
+- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
+  $\textbf{x}(\lambda=0)$ is in the trust region and it is not
+  necessary to put a constraint on $\textbf{x}$, the solution is the
+  unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
+- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
+  the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
+  -h_1)$, similarly to the previous case.
+  But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
+  time a constant to ensure the condition $||\textbf{x}(\lambda =
+  -h_1)|| = \Delta$ and escape from the saddle point
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
+  have to search $\lambda \in (-h_1, \infty)$ such as
+  $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method 
+  \begin{align*}
+    (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
+  \end{align*}
+  or
+  \begin{align*}
+    (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
+  \end{align*}
+  which is numerically more stable. And finally compute 
+  \begin{align*}
+    \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
+    \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
+  \end{align*}
+- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
+  do exactly the same thing that the previous case but we search
+  $\lambda \in (0, \infty)$ 
+- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
+  $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
+  sum), again we do exactly the same thing that the previous case
+  searching $\lambda \in (-h_1, \infty)$.
+  
+
+For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
+necessary in fact to remove the $j = 1$ in the sum since the term
+where $h_i - \lambda < 10^{-6}$ are not computed.
+
+After that, we take this vector $\textbf{x}^*$, called "x", and we do
+the transformation to an antisymmetric matrix $\textbf{X}$, called
+m_x. This matrix $\textbf{X}$ will be used to compute a rotation
+matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
+
+NB: 
+An improvement can be done using a elleptical trust region.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f 
+#+END_SRC
+
+** Code
+
+Provided:
+| mo_num | integer | number of MOs |
+
+Cf. qp_edit in orbital optimization section, for some constants/thresholds
+
+Input:
+| m         | integer          | number of MOs                                |
+| n         | integer          | m*(m-1)/2                                       |
+| H(n, n)   | double precision | hessian                                         |
+| v_grad(n) | double precision | gradient                                        |
+| e_val(n)  | double precision | eigenvalues of the hessian                      |
+| W(n, n)   | double precision | eigenvectors of the hessian                     |
+| rho       | double precision | agreement between the model and the reality,    |
+|           |                  | represents the quality of the energy prediction |
+| nb_iter   | integer          | number of iteration                             |
+
+Input/Ouput:
+| delta | double precision | radius of the trust region |
+
+Output:
+| x(n)      | double precision | vector containing the step |
+
+Internal:
+| accu          | double precision | temporary variable to compute the step       |
+| lambda        | double precision | lagrange multiplier                          |
+| trust_radius2 | double precision | square of the radius of the trust region     |
+| norm2_x       | double precision | norm^2 of the vector x                       |
+| norm2_g       | double precision | norm^2 of the vector containing the gradient |
+| tmp_wtg(n)    | double precision | tmp_wtg(i) = w_i^T . g                       |
+| i, j, k       | integer          | indexes                                      |
+
+Function:
+| dnrm2                   | double precision | Blas function computing the norm       |
+| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compuet the step in the trust region
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer, intent(in)             :: n
+  double precision, intent(in)    :: v_grad(n), rho
+  integer, intent(inout)          :: nb_iter
+  double precision, intent(in)    :: e_val(n), w(n,n)
+
+  ! inout
+  double precision, intent(inout) :: delta
+
+  ! out
+  double precision, intent(out)   :: x(n)
+
+  ! Internal
+  double precision                :: accu, lambda, trust_radius2
+  double precision                :: norm2_x, norm2_g
+  double precision, allocatable   :: tmp_wtg(:)
+  integer                         :: i,j,k
+  double precision                :: t1,t2,t3
+  integer                         :: n_neg_eval
+
+
+  ! Functions
+  double precision                :: ddot, dnrm2
+  double precision                :: f_norm_trust_region_omp
+
+  print*,''
+  print*,'=================='
+  print*,'---Trust_region---'
+  print*,'=================='
+
+  call wall_time(t1)
+
+  ! Allocation
+  allocate(tmp_wtg(n))
+#+END_SRC
+
+
+*** Initialization and norm
+
+The norm of the step size will be useful for the trust region
+algorithm. We start from a first guess and the radius of the trust
+region will evolve during the optimization.
+
+avoid_saddle is actually a test to avoid saddle points
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f                                                                                                                                                                                            
+  ! Initialization of the Lagrange multiplier
+  lambda = 0d0
+
+  ! List of w^T.g, to avoid the recomputation
+  tmp_wtg = 0d0
+  do j = 1, n
+    do i = 1, n
+      tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
+    enddo
+  enddo
+
+  ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
+  ! in the case of avoid_saddle
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    i = 2
+    ! Number of negative eigenvalues
+    do while (e_val(i) < - thresh_eig)
+      if (tmp_wtg(i) < thresh_wtg2) then
+        if (version_avoid_saddle == 1) then  
+          tmp_wtg(i) = 1d0
+        elseif  (version_avoid_saddle == 2) then  
+          tmp_wtg(i) = DABS(e_val(i))
+        elseif  (version_avoid_saddle == 3) then  
+          tmp_wtg(i) = dsqrt(DABS(e_val(i)))
+        else
+          tmp_wtg(i) = thresh_wtg2
+        endif
+      endif
+      i = i + 1
+    enddo
+
+    ! For the fist one it's a little bit different
+    if (tmp_wtg(1) < thresh_wtg2) then 
+      tmp_wtg(1) = 0d0
+    endif
+
+  endif 
+
+  ! Norm^2 of x, ||x||^2
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+  ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
+  ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
+  ! Anyway if the step is too big it will be reduced
+  print*,'||x||^2 :', norm2_x
+
+  ! Norm^2 of the gradient, ||v_grad||^2
+  norm2_g = (dnrm2(n,v_grad,1))**2
+  print*,'||grad||^2 :', norm2_g
+#+END_SRC
+
+*** Trust radius initialization
+
+    At the first iteration (nb_iter = 0) we initialize the trust region
+    with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
+    (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
+    we compute this norm using f_norm_trust_region_omp as explain just
+    below) 
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! trust radius
+  if (nb_iter == 0) then
+     trust_radius2 = norm2_x 
+     ! To avoid infinite loop of cancellation of this first step
+     ! without changing delta
+     nb_iter = 1
+
+     ! Compute delta, delta = sqrt(trust_radius)
+     delta = dsqrt(trust_radius2)
+  endif
+#+END_SRC
+
+*** Modification of the trust radius
+
+In function of rho (which represents the agreement between the model
+and the reality, cf. rho_model) the trust region evolves. We update
+delta (the radius of the trust region).
+
+To avoid too big trust region we put a maximum size.
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Modification of the trust radius in function of rho
+  if (rho >= 0.75d0) then
+     delta = 2d0 * delta
+  elseif (rho >= 0.5d0) then
+     delta = delta
+  elseif (rho >= 0.25d0) then
+     delta = 0.5d0 * delta
+  else
+     delta = 0.25d0 * delta
+  endif
+
+  ! Maximum size of the trust region
+  !if (delta > 0.5d0 * n * pi) then
+  !  delta = 0.5d0 * n * pi
+  !  print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
+  !endif
+
+  if (delta > 1d10) then
+    delta = 1d10
+  endif
+
+  print*, 'Delta :', delta
+#+END_SRC 
+  
+*** Calculation of the optimal lambda
+
+We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
+- If $||\textbf{x}|| > \Delta$  or $h_1 < 0$ we have to add a constant
+  $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
+- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
+  unconstrained one, $\lambda = 0$
+
+You will find more details at the beginning
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! By giving delta, we search (||x||^2 - delta^2)^2 = 0
+  ! and not (||x||^2 - delta)^2 = 0
+  
+  ! Research of lambda to solve ||x(lambda)|| = Delta 
+
+  ! Display
+  print*, 'e_val(1) = ', e_val(1)
+  print*, 'w_1^T.g =', tmp_wtg(1)
+  
+  ! H positive definite 
+  if (e_val(1) > - thresh_eig) then
+    norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
+    print*, '||x(0)||=', dsqrt(norm2_x)
+    print*, 'Delta=', delta
+
+    ! H positive definite, ||x(lambda = 0)|| <= Delta
+    if (dsqrt(norm2_x) <= delta) then 
+      print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
+      print*, 'lambda = 0, no lambda optimization'
+      lambda = 0d0
+
+    ! H positive definite, ||x(lambda = 0)|| > Delta
+    else
+      ! Constraint solution
+      print*, 'H positive definite, ||x(lambda = 0)|| > Delta' 
+      print*,'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+    endif
+
+  ! H indefinite
+  else
+    if (DABS(tmp_wtg(1)) < thresh_wtg) then
+      norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
+      print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) 
+    endif
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta 
+    if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
+      ! Add e_val(1) in order to have (H - e_val(1) I) positive definite
+      print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
+      print*, 'lambda = -e_val(1), no lambda optimization'
+      lambda = - e_val(1)
+
+    ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
+    ! and
+    ! H indefinite, w_1^T.g =/= 0
+    else
+      ! Constraint solution/ add lambda
+      if (DABS(tmp_wtg(1)) < thresh_wtg) then
+         print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
+      else
+         print*, 'H indefinite, w_1^T.g =/= 0'
+      endif
+      print*, 'Computation of the optimal lambda...'
+      call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
+      endif
+     
+  endif
+  
+  ! Recomputation of the norm^2 of the step x
+  norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
+  print*,''
+  print*,'Summary after the trust region:'
+  print*,'lambda:', lambda
+  print*,'||x||:', dsqrt(norm2_x)
+  print*,'delta:', delta
+#+END_SRC
+
+*** Calculation of the step x
+
+x refers to $\textbf{x}^*$
+We compute x in function of lambda using its formula :
+\begin{align*}
+\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i 
++ \lambda} \cdot \textbf{w}_i
+\end{align*}
+
+#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  ! Initialisation
+  x = 0d0
+
+  ! Calculation of the step x
+
+  ! Normal version
+  if (.not. absolute_eig) then
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
+        enddo
+      endif
+    enddo
+
+  ! Version to use the absolute value of the eigenvalues
+  else
+
+    do i = 1, n 
+      if (DABS(e_val(i)) > thresh_eig) then
+        do j = 1, n
+          x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
+        enddo
+      endif
+    enddo
+
+  endif
+
+  double precision :: beta, norm_x
+ 
+  ! Test
+  ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
+  ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first 
+  ! eigenvectors multiply by a constant to ensure the condition
+  ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
+  if (avoid_saddle .and. e_val(1) < - thresh_eig) then
+    if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
+
+      ! norm of x
+      norm_x = dnrm2(n,x,1)
+
+      ! Computes the coefficient for the w_1 
+      beta = delta**2 - norm_x**2
+
+      ! Updates the step x
+      x = x + W(:,1) * dsqrt(beta)
+
+      ! Recomputes the norm to check
+      norm_x = dnrm2(n,x,1)
+
+      print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
+      print*, '||x||', norm_x 
+    endif
+  endif
+#+END_SRC
+
+*** Transformation of x
+
+x is a vector of size n, so it can be write as a m by m
+antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+!  ! Step transformation vector -> matrix
+!  ! Vector with n element -> mo_num by mo_num matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i>j) then
+!           call mat_to_vec_index(i,j,k)
+!           m_x(i,j) = x(k)
+!        else
+!           m_x(i,j) = 0d0
+!        endif
+!     enddo
+!  enddo
+!
+!  ! Antisymmetrization of the previous matrix
+!  do j = 1, m
+!     do i = 1, m
+!        if (i<j) then
+!           m_x(i,j) = - m_x(j,i)
+!        endif
+!     enddo
+!  enddo
+    #+END_SRC
+
+*** Deallocation, end
+
+    #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
+  deallocate(tmp_wtg)
+
+  call wall_time(t2)
+  t3 = t2 - t1
+  print*,'Time in trust_region:', t3
+  print*,'======================'
+  print*,'---End trust_region---'
+  print*,'======================'
+  print*,''
+
+end
+    #+END_SRC
+ 
diff --git a/src/utils_trust_region/vec_to_mat_index.irp.f b/src/utils_trust_region/vec_to_mat_index.irp.f
new file mode 100644
index 00000000..5d68748b
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_index.irp.f
@@ -0,0 +1,71 @@
+! Vector to matrix indexes
+  
+! *Compute the indexes p,q of a matrix element with the vector index i*
+
+! Vector (i) -> lower diagonal matrix (p,q), p > q
+
+! If a matrix is antisymmetric it can be reshaped as a vector. And the
+! vector can be reshaped as an antisymmetric matrix
+
+! \begin{align*}
+! \begin{pmatrix}
+! 0 & -1 & -2 & -4 \\
+! 1 & 0  & -3 & -5 \\
+! 2 & 3 & 0  & -6  \\
+! 4 & 5 & 6 & 0
+! \end{pmatrix}
+! \Leftrightarrow
+! \begin{pmatrix}
+! 1 & 2 & 3 & 4 & 5 & 6
+! \end{pmatrix}
+! \end{align*}
+
+! !!! Here the algorithm only work for the lower diagonal !!!
+
+! Input:
+! | i | integer | index in the vector |
+
+! Ouput:
+! | p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+! |     |         | p > q,                                                  |
+! |     |         | p -> row,                                               |
+! |     |         | q -> column                                             |
+
+
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org
new file mode 100644
index 00000000..0a09fa86
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_index.org
@@ -0,0 +1,72 @@
+* Vector to matrix indexes
+  
+*Compute the indexes p,q of a matrix element with the vector index i*
+
+Vector (i) -> lower diagonal matrix (p,q), p > q
+
+If a matrix is antisymmetric it can be reshaped as a vector. And the
+vector can be reshaped as an antisymmetric matrix
+
+\begin{align*}
+\begin{pmatrix}
+0 & -1 & -2 & -4 \\
+1 & 0  & -3 & -5 \\
+2 & 3 & 0  & -6  \\
+4 & 5 & 6 & 0
+\end{pmatrix}
+\Leftrightarrow
+\begin{pmatrix}
+1 & 2 & 3 & 4 & 5 & 6
+\end{pmatrix}
+\end{align*}
+
+!!! Here the algorithm only work for the lower diagonal !!!
+
+Input:
+| i | integer | index in the vector |
+
+Ouput:
+| p,q | integer | corresponding indexes in the lower diagonal of a matrix |
+|     |         | p > q,                                                  |
+|     |         | p -> row,                                               |
+|     |         | q -> column                                             |
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f
+subroutine vec_to_mat_index(i,p,q)
+
+  include 'pi.h'
+
+  BEGIN_DOC
+  ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
+  ! its index i a vector
+  END_DOC
+
+  implicit none
+
+  ! Variables
+
+  ! in
+  integer,intent(in)   :: i
+  
+  ! out
+  integer, intent(out) :: p,q
+  
+  ! internal 
+  integer              :: a,b
+  double precision     :: da
+
+  da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
+  a = INT(da) 
+  if ((a*(a-1))/2==i) then
+    p = a-1
+  else
+    p = a
+  endif
+  b = p*(p-1)/2
+ 
+  ! Matrix element indexes
+  p = p + 1
+  q = i - b 
+
+end subroutine
+#+END_SRC
diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f
new file mode 100644
index 00000000..9140b8d3
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_v2.irp.f
@@ -0,0 +1,39 @@
+! Vect to antisymmetric matrix using mat_to_vec_index
+
+! Vector to antisymmetric matrix transformation using mat_to_vec_index
+! subroutine.
+
+! Can be done in OMP (for the first part and with omp critical for the second)
+
+
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org
new file mode 100644
index 00000000..4e358a88
--- /dev/null
+++ b/src/utils_trust_region/vec_to_mat_v2.org
@@ -0,0 +1,40 @@
+* Vect to antisymmetric matrix using mat_to_vec_index
+
+Vector to antisymmetric matrix transformation using mat_to_vec_index
+subroutine.
+
+Can be done in OMP (for the first part and with omp critical for the second)
+
+#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f 
+subroutine vec_to_mat_v2(n,m,v_x,m_x)
+
+  BEGIN_DOC
+  ! Vector to antisymmetric matrix
+  END_DOC
+  
+  implicit none
+  
+  integer, intent(in) :: n,m
+  double precision, intent(in) :: v_x(n)
+  double precision, intent(out) :: m_x(m,m)
+
+  integer :: i,j,k
+
+  ! 1D -> 2D lower diagonal
+  m_x = 0d0
+  do j = 1, m - 1
+    do i = j + 1, m
+      call mat_to_vec_index(i,j,k)
+      m_x(i,j) = v_x(k)
+    enddo
+  enddo
+  
+  ! Antisym
+  do i = 1, m - 1
+    do j = i + 1, m
+      m_x(i,j) = - m_x(j,i) 
+    enddo
+  enddo
+
+end
+#+END_SRC