diff --git a/configure b/configure index b3ed7758..7d063b1d 100755 --- a/configure +++ b/configure @@ -369,7 +369,7 @@ else echo "" echo "${QP_ROOT}/build.ninja does not exist," echo "you need to specify the COMPILATION configuration file." - echo "See ./configure --help for more details." + echo "See ./configure -h for more details." echo "" fi diff --git a/external/Python/.gitignore b/external/Python/.gitignore index e69de29b..9d09b447 100644 --- a/external/Python/.gitignore +++ b/external/Python/.gitignore @@ -0,0 +1 @@ +docopt.py diff --git a/external/Python/docopt.py b/external/Python/docopt.py deleted file mode 100644 index 7b927e2f..00000000 --- a/external/Python/docopt.py +++ /dev/null @@ -1,579 +0,0 @@ -"""Pythonic command-line interface parser that will make you smile. - - * http://docopt.org - * Repository and issue-tracker: https://github.com/docopt/docopt - * Licensed under terms of MIT license (see LICENSE-MIT) - * Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com - -""" -import sys -import re - - -__all__ = ['docopt'] -__version__ = '0.6.2' - - -class DocoptLanguageError(Exception): - - """Error in construction of usage-message by developer.""" - - -class DocoptExit(SystemExit): - - """Exit in case user invoked program with incorrect arguments.""" - - usage = '' - - def __init__(self, message=''): - SystemExit.__init__(self, (message + '\n' + self.usage).strip()) - - -class Pattern(object): - - def __eq__(self, other): - return repr(self) == repr(other) - - def __hash__(self): - return hash(repr(self)) - - def fix(self): - self.fix_identities() - self.fix_repeating_arguments() - return self - - def fix_identities(self, uniq=None): - """Make pattern-tree tips point to same object if they are equal.""" - if not hasattr(self, 'children'): - return self - uniq = list(set(self.flat())) if uniq is None else uniq - for i, c in enumerate(self.children): - if not hasattr(c, 'children'): - assert c in uniq - self.children[i] = uniq[uniq.index(c)] - else: - c.fix_identities(uniq) - - def fix_repeating_arguments(self): - """Fix elements that should accumulate/increment values.""" - either = [list(c.children) for c in self.either.children] - for case in either: - for e in [c for c in case if case.count(c) > 1]: - if type(e) is Argument or type(e) is Option and e.argcount: - if e.value is None: - e.value = [] - elif type(e.value) is not list: - e.value = e.value.split() - if type(e) is Command or type(e) is Option and e.argcount == 0: - e.value = 0 - return self - - @property - def either(self): - """Transform pattern into an equivalent, with only top-level Either.""" - # Currently the pattern will not be equivalent, but more "narrow", - # although good enough to reason about list arguments. - ret = [] - groups = [[self]] - while groups: - children = groups.pop(0) - types = [type(c) for c in children] - if Either in types: - either = [c for c in children if type(c) is Either][0] - children.pop(children.index(either)) - for c in either.children: - groups.append([c] + children) - elif Required in types: - required = [c for c in children if type(c) is Required][0] - children.pop(children.index(required)) - groups.append(list(required.children) + children) - elif Optional in types: - optional = [c for c in children if type(c) is Optional][0] - children.pop(children.index(optional)) - groups.append(list(optional.children) + children) - elif AnyOptions in types: - optional = [c for c in children if type(c) is AnyOptions][0] - children.pop(children.index(optional)) - groups.append(list(optional.children) + children) - elif OneOrMore in types: - oneormore = [c for c in children if type(c) is OneOrMore][0] - children.pop(children.index(oneormore)) - groups.append(list(oneormore.children) * 2 + children) - else: - ret.append(children) - return Either(*[Required(*e) for e in ret]) - - -class ChildPattern(Pattern): - - def __init__(self, name, value=None): - self.name = name - self.value = value - - def __repr__(self): - return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value) - - def flat(self, *types): - return [self] if not types or type(self) in types else [] - - def match(self, left, collected=None): - collected = [] if collected is None else collected - pos, match = self.single_match(left) - if match is None: - return False, left, collected - left_ = left[:pos] + left[pos + 1:] - same_name = [a for a in collected if a.name == self.name] - if type(self.value) in (int, list): - if type(self.value) is int: - increment = 1 - else: - increment = ([match.value] if type(match.value) is str - else match.value) - if not same_name: - match.value = increment - return True, left_, collected + [match] - same_name[0].value += increment - return True, left_, collected - return True, left_, collected + [match] - - -class ParentPattern(Pattern): - - def __init__(self, *children): - self.children = list(children) - - def __repr__(self): - return '%s(%s)' % (self.__class__.__name__, - ', '.join(repr(a) for a in self.children)) - - def flat(self, *types): - if type(self) in types: - return [self] - return sum([c.flat(*types) for c in self.children], []) - - -class Argument(ChildPattern): - - def single_match(self, left): - for n, p in enumerate(left): - if type(p) is Argument: - return n, Argument(self.name, p.value) - return None, None - - @classmethod - def parse(class_, source): - name = re.findall('(<\S*?>)', source)[0] - value = re.findall('\[default: (.*)\]', source, flags=re.I) - return class_(name, value[0] if value else None) - - -class Command(Argument): - - def __init__(self, name, value=False): - self.name = name - self.value = value - - def single_match(self, left): - for n, p in enumerate(left): - if type(p) is Argument: - if p.value == self.name: - return n, Command(self.name, True) - else: - break - return None, None - - -class Option(ChildPattern): - - def __init__(self, short=None, long=None, argcount=0, value=False): - assert argcount in (0, 1) - self.short, self.long = short, long - self.argcount, self.value = argcount, value - self.value = None if value is False and argcount else value - - @classmethod - def parse(class_, option_description): - short, long, argcount, value = None, None, 0, False - options, _, description = option_description.strip().partition(' ') - options = options.replace(',', ' ').replace('=', ' ') - for s in options.split(): - if s.startswith('--'): - long = s - elif s.startswith('-'): - short = s - else: - argcount = 1 - if argcount: - matched = re.findall('\[default: (.*)\]', description, flags=re.I) - value = matched[0] if matched else None - return class_(short, long, argcount, value) - - def single_match(self, left): - for n, p in enumerate(left): - if self.name == p.name: - return n, p - return None, None - - @property - def name(self): - return self.long or self.short - - def __repr__(self): - return 'Option(%r, %r, %r, %r)' % (self.short, self.long, - self.argcount, self.value) - - -class Required(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - l = left - c = collected - for p in self.children: - matched, l, c = p.match(l, c) - if not matched: - return False, left, collected - return True, l, c - - -class Optional(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - for p in self.children: - m, left, collected = p.match(left, collected) - return True, left, collected - - -class AnyOptions(Optional): - - """Marker/placeholder for [options] shortcut.""" - - -class OneOrMore(ParentPattern): - - def match(self, left, collected=None): - assert len(self.children) == 1 - collected = [] if collected is None else collected - l = left - c = collected - l_ = None - matched = True - times = 0 - while matched: - # could it be that something didn't match but changed l or c? - matched, l, c = self.children[0].match(l, c) - times += 1 if matched else 0 - if l_ == l: - break - l_ = l - if times >= 1: - return True, l, c - return False, left, collected - - -class Either(ParentPattern): - - def match(self, left, collected=None): - collected = [] if collected is None else collected - outcomes = [] - for p in self.children: - matched, _, _ = outcome = p.match(left, collected) - if matched: - outcomes.append(outcome) - if outcomes: - return min(outcomes, key=lambda outcome: len(outcome[1])) - return False, left, collected - - -class TokenStream(list): - - def __init__(self, source, error): - self += source.split() if hasattr(source, 'split') else source - self.error = error - - def move(self): - return self.pop(0) if len(self) else None - - def current(self): - return self[0] if len(self) else None - - -def parse_long(tokens, options): - """long ::= '--' chars [ ( ' ' | '=' ) chars ] ;""" - long, eq, value = tokens.move().partition('=') - assert long.startswith('--') - value = None if eq == value == '' else value - similar = [o for o in options if o.long == long] - if tokens.error is DocoptExit and similar == []: # if no exact match - similar = [o for o in options if o.long and o.long.startswith(long)] - if len(similar) > 1: # might be simply specified ambiguously 2+ times? - raise tokens.error('%s is not a unique prefix: %s?' % - (long, ', '.join(o.long for o in similar))) - elif len(similar) < 1: - argcount = 1 if eq == '=' else 0 - o = Option(None, long, argcount) - options.append(o) - if tokens.error is DocoptExit: - o = Option(None, long, argcount, value if argcount else True) - else: - o = Option(similar[0].short, similar[0].long, - similar[0].argcount, similar[0].value) - if o.argcount == 0: - if value is not None: - raise tokens.error('%s must not have an argument' % o.long) - else: - if value is None: - if tokens.current() is None: - raise tokens.error('%s requires argument' % o.long) - value = tokens.move() - if tokens.error is DocoptExit: - o.value = value if value is not None else True - return [o] - - -def parse_shorts(tokens, options): - """shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;""" - token = tokens.move() - assert token.startswith('-') and not token.startswith('--') - left = token.lstrip('-') - parsed = [] - while left != '': - short, left = '-' + left[0], left[1:] - similar = [o for o in options if o.short == short] - if len(similar) > 1: - raise tokens.error('%s is specified ambiguously %d times' % - (short, len(similar))) - elif len(similar) < 1: - o = Option(short, None, 0) - options.append(o) - if tokens.error is DocoptExit: - o = Option(short, None, 0, True) - else: # why copying is necessary here? - o = Option(short, similar[0].long, - similar[0].argcount, similar[0].value) - value = None - if o.argcount != 0: - if left == '': - if tokens.current() is None: - raise tokens.error('%s requires argument' % short) - value = tokens.move() - else: - value = left - left = '' - if tokens.error is DocoptExit: - o.value = value if value is not None else True - parsed.append(o) - return parsed - - -def parse_pattern(source, options): - tokens = TokenStream(re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source), - DocoptLanguageError) - result = parse_expr(tokens, options) - if tokens.current() is not None: - raise tokens.error('unexpected ending: %r' % ' '.join(tokens)) - return Required(*result) - - -def parse_expr(tokens, options): - """expr ::= seq ( '|' seq )* ;""" - seq = parse_seq(tokens, options) - if tokens.current() != '|': - return seq - result = [Required(*seq)] if len(seq) > 1 else seq - while tokens.current() == '|': - tokens.move() - seq = parse_seq(tokens, options) - result += [Required(*seq)] if len(seq) > 1 else seq - return [Either(*result)] if len(result) > 1 else result - - -def parse_seq(tokens, options): - """seq ::= ( atom [ '...' ] )* ;""" - result = [] - while tokens.current() not in [None, ']', ')', '|']: - atom = parse_atom(tokens, options) - if tokens.current() == '...': - atom = [OneOrMore(*atom)] - tokens.move() - result += atom - return result - - -def parse_atom(tokens, options): - """atom ::= '(' expr ')' | '[' expr ']' | 'options' - | long | shorts | argument | command ; - """ - token = tokens.current() - result = [] - if token in '([': - tokens.move() - matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token] - result = pattern(*parse_expr(tokens, options)) - if tokens.move() != matching: - raise tokens.error("unmatched '%s'" % token) - return [result] - elif token == 'options': - tokens.move() - return [AnyOptions()] - elif token.startswith('--') and token != '--': - return parse_long(tokens, options) - elif token.startswith('-') and token not in ('-', '--'): - return parse_shorts(tokens, options) - elif token.startswith('<') and token.endswith('>') or token.isupper(): - return [Argument(tokens.move())] - else: - return [Command(tokens.move())] - - -def parse_argv(tokens, options, options_first=False): - """Parse command-line argument vector. - - If options_first: - argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ; - else: - argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ; - - """ - parsed = [] - while tokens.current() is not None: - if tokens.current() == '--': - return parsed + [Argument(None, v) for v in tokens] - elif tokens.current().startswith('--'): - parsed += parse_long(tokens, options) - elif tokens.current().startswith('-') and tokens.current() != '-': - parsed += parse_shorts(tokens, options) - elif options_first: - return parsed + [Argument(None, v) for v in tokens] - else: - parsed.append(Argument(None, tokens.move())) - return parsed - - -def parse_defaults(doc): - # in python < 2.7 you can't pass flags=re.MULTILINE - split = re.split('\n *(<\S+?>|-\S+?)', doc)[1:] - split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])] - options = [Option.parse(s) for s in split if s.startswith('-')] - #arguments = [Argument.parse(s) for s in split if s.startswith('<')] - #return options, arguments - return options - - -def printable_usage(doc): - # in python < 2.7 you can't pass flags=re.IGNORECASE - usage_split = re.split(r'([Uu][Ss][Aa][Gg][Ee]:)', doc) - if len(usage_split) < 3: - raise DocoptLanguageError('"usage:" (case-insensitive) not found.') - if len(usage_split) > 3: - raise DocoptLanguageError('More than one "usage:" (case-insensitive).') - return re.split(r'\n\s*\n', ''.join(usage_split[1:]))[0].strip() - - -def formal_usage(printable_usage): - pu = printable_usage.split()[1:] # split and drop "usage:" - return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )' - - -def extras(help, version, options, doc): - if help and any((o.name in ('-h', '--help')) and o.value for o in options): - print(doc.strip("\n")) - sys.exit() - if version and any(o.name == '--version' and o.value for o in options): - print(version) - sys.exit() - - -class Dict(dict): - def __repr__(self): - return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items())) - - -def docopt(doc, argv=None, help=True, version=None, options_first=False): - """Parse `argv` based on command-line interface described in `doc`. - - `docopt` creates your command-line interface based on its - description that you pass as `doc`. Such description can contain - --options, , commands, which could be - [optional], (required), (mutually | exclusive) or repeated... - - Parameters - ---------- - doc : str - Description of your command-line interface. - argv : list of str, optional - Argument vector to be parsed. sys.argv[1:] is used if not - provided. - help : bool (default: True) - Set to False to disable automatic help on -h or --help - options. - version : any object - If passed, the object will be printed if --version is in - `argv`. - options_first : bool (default: False) - Set to True to require options preceed positional arguments, - i.e. to forbid options and positional arguments intermix. - - Returns - ------- - args : dict - A dictionary, where keys are names of command-line elements - such as e.g. "--verbose" and "", and values are the - parsed values of those elements. - - Example - ------- - >>> from docopt import docopt - >>> doc = ''' - Usage: - my_program tcp [--timeout=] - my_program serial [--baud=] [--timeout=] - my_program (-h | --help | --version) - - Options: - -h, --help Show this screen and exit. - --baud= Baudrate [default: 9600] - ''' - >>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30'] - >>> docopt(doc, argv) - {'--baud': '9600', - '--help': False, - '--timeout': '30', - '--version': False, - '': '127.0.0.1', - '': '80', - 'serial': False, - 'tcp': True} - - See also - -------- - * For video introduction see http://docopt.org - * Full documentation is available in README.rst as well as online - at https://github.com/docopt/docopt#readme - - """ - if argv is None: - argv = sys.argv[1:] - DocoptExit.usage = printable_usage(doc) - options = parse_defaults(doc) - pattern = parse_pattern(formal_usage(DocoptExit.usage), options) - # [default] syntax for argument is disabled - #for a in pattern.flat(Argument): - # same_name = [d for d in arguments if d.name == a.name] - # if same_name: - # a.value = same_name[0].value - argv = parse_argv(TokenStream(argv, DocoptExit), list(options), - options_first) - pattern_options = set(pattern.flat(Option)) - for ao in pattern.flat(AnyOptions): - doc_options = parse_defaults(doc) - ao.children = list(set(doc_options) - pattern_options) - #if any_options: - # ao.children += [Option(o.short, o.long, o.argcount) - # for o in argv if type(o) is Option] - extras(help, version, argv, doc) - matched, left, collected = pattern.fix().match(argv) - if matched and left == []: # better error message if left? - return Dict((a.name, a.value) for a in (pattern.flat() + collected)) - raise DocoptExit() diff --git a/include/.gitignore b/include/.gitignore index d52be2d2..afcb2de6 100644 --- a/include/.gitignore +++ b/include/.gitignore @@ -5,3 +5,4 @@ zconf.h zlib.h zmq_utils.h f77_zmq_free.h +f77_zmq.h diff --git a/include/f77_zmq.h b/include/f77_zmq.h deleted file mode 100644 index b19bb707..00000000 --- a/include/f77_zmq.h +++ /dev/null @@ -1,617 +0,0 @@ - integer EADDRINUSE - integer EADDRNOTAVAIL - integer EAFNOSUPPORT - integer ECONNABORTED - integer ECONNREFUSED - integer ECONNRESET - integer EFSM - integer EHOSTUNREACH - integer EINPROGRESS - integer EMSGSIZE - integer EMTHREAD - integer ENETDOWN - integer ENETRESET - integer ENETUNREACH - integer ENOBUFS - integer ENOCOMPATPROTO - integer ENOTCONN - integer ENOTSOCK - integer ENOTSUP - integer EPROTONOSUPPORT - integer ETERM - integer ETIMEDOUT - integer ZMQ_AFFINITY - integer ZMQ_BACKLOG - integer ZMQ_BINDTODEVICE - integer ZMQ_BLOCKY - integer ZMQ_CHANNEL - integer ZMQ_CLIENT - integer ZMQ_CONFLATE - integer ZMQ_CONNECT_RID - integer ZMQ_CONNECT_ROUTING_ID - integer ZMQ_CONNECT_TIMEOUT - integer ZMQ_CURRENT_EVENT_VERSION - integer ZMQ_CURRENT_EVENT_VERSION_DRAFT - integer ZMQ_CURVE - integer ZMQ_CURVE_PUBLICKEY - integer ZMQ_CURVE_SECRETKEY - integer ZMQ_CURVE_SERVER - integer ZMQ_CURVE_SERVERKEY - integer ZMQ_DEALER - integer ZMQ_DEFINED_STDINT - integer ZMQ_DELAY_ATTACH_ON_CONNECT - integer ZMQ_DGRAM - integer ZMQ_DISCONNECT_MSG - integer ZMQ_DISH - integer ZMQ_DONTWAIT - integer ZMQ_EVENTS - integer ZMQ_EVENT_ACCEPTED - integer ZMQ_EVENT_ACCEPT_FAILED - integer ZMQ_EVENT_ALL - integer ZMQ_EVENT_ALL_V1 - integer ZMQ_EVENT_ALL_V2 - integer ZMQ_EVENT_BIND_FAILED - integer ZMQ_EVENT_CLOSED - integer ZMQ_EVENT_CLOSE_FAILED - integer ZMQ_EVENT_CONNECTED - integer ZMQ_EVENT_CONNECT_DELAYED - integer ZMQ_EVENT_CONNECT_RETRIED - integer ZMQ_EVENT_DISCONNECTED - integer ZMQ_EVENT_HANDSHAKE_FAILED_AUTH - integer ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL - integer ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL - integer ZMQ_EVENT_HANDSHAKE_SUCCEEDED - integer ZMQ_EVENT_LISTENING - integer ZMQ_EVENT_MONITOR_STOPPED - integer ZMQ_EVENT_PIPES_STATS - integer ZMQ_FAIL_UNROUTABLE - integer ZMQ_FD - integer ZMQ_FORWARDER - integer ZMQ_GATHER - integer ZMQ_GROUP_MAX_LENGTH - integer ZMQ_GSSAPI - integer ZMQ_GSSAPI_NT_HOSTBASED - integer ZMQ_GSSAPI_NT_KRB5_PRINCIPAL - integer ZMQ_GSSAPI_NT_USER_NAME - integer ZMQ_GSSAPI_PLAINTEXT - integer ZMQ_GSSAPI_PRINCIPAL - integer ZMQ_GSSAPI_PRINCIPAL_NAMETYPE - integer ZMQ_GSSAPI_SERVER - integer ZMQ_GSSAPI_SERVICE_PRINCIPAL - integer ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE - integer ZMQ_HANDSHAKE_IVL - integer ZMQ_HAS_CAPABILITIES - integer ZMQ_HAUSNUMERO - integer ZMQ_HEARTBEAT_IVL - integer ZMQ_HEARTBEAT_TIMEOUT - integer ZMQ_HEARTBEAT_TTL - integer ZMQ_HELLO_MSG - integer ZMQ_IDENTITY - integer ZMQ_IMMEDIATE - integer ZMQ_INVERT_MATCHING - integer ZMQ_IN_BATCH_SIZE - integer ZMQ_IO_THREADS - integer ZMQ_IO_THREADS_DFLT - integer ZMQ_IPC_FILTER_GID - integer ZMQ_IPC_FILTER_PID - integer ZMQ_IPC_FILTER_UID - integer ZMQ_IPV4ONLY - integer ZMQ_IPV6 - integer ZMQ_LAST_ENDPOINT - integer ZMQ_LINGER - integer ZMQ_LOOPBACK_FASTPATH - integer ZMQ_MAXMSGSIZE - integer ZMQ_MAX_MSGSZ - integer ZMQ_MAX_SOCKETS - integer ZMQ_MAX_SOCKETS_DFLT - integer ZMQ_MECHANISM - integer ZMQ_METADATA - integer ZMQ_MORE - integer ZMQ_MSG_T_SIZE - integer ZMQ_MULTICAST_HOPS - integer ZMQ_MULTICAST_LOOP - integer ZMQ_MULTICAST_MAXTPDU - integer ZMQ_NOBLOCK - integer ZMQ_NOTIFY_CONNECT - integer ZMQ_NOTIFY_DISCONNECT - integer ZMQ_NULL - integer ZMQ_ONLY_FIRST_SUBSCRIBE - integer ZMQ_OUT_BATCH_SIZE - integer ZMQ_PAIR - integer ZMQ_PEER - integer ZMQ_PLAIN - integer ZMQ_PLAIN_PASSWORD - integer ZMQ_PLAIN_SERVER - integer ZMQ_PLAIN_USERNAME - integer ZMQ_POLLERR - integer ZMQ_POLLIN - integer ZMQ_POLLITEMS_DFLT - integer ZMQ_POLLOUT - integer ZMQ_POLLPRI - integer ZMQ_PRIORITY - integer ZMQ_PROBE_ROUTER - integer ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID - integer ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION - integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA - integer ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE - integer ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY - integer ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC - integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA - integer ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE - integer ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED - integer ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME - integer ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH - integer ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND - integer ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED - integer ZMQ_PTR - integer ZMQ_PUB - integer ZMQ_PULL - integer ZMQ_PUSH - integer ZMQ_QUEUE - integer ZMQ_RADIO - integer ZMQ_RATE - integer ZMQ_RCVBUF - integer ZMQ_RCVHWM - integer ZMQ_RCVMORE - integer ZMQ_RCVTIMEO - integer ZMQ_RECONNECT_IVL - integer ZMQ_RECONNECT_IVL_MAX - integer ZMQ_RECONNECT_STOP - integer ZMQ_RECONNECT_STOP_AFTER_DISCONNECT - integer ZMQ_RECONNECT_STOP_CONN_REFUSED - integer ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED - integer ZMQ_RECOVERY_IVL - integer ZMQ_REP - integer ZMQ_REQ - integer ZMQ_REQ_CORRELATE - integer ZMQ_REQ_RELAXED - integer ZMQ_ROUTER - integer ZMQ_ROUTER_BEHAVIOR - integer ZMQ_ROUTER_HANDOVER - integer ZMQ_ROUTER_MANDATORY - integer ZMQ_ROUTER_NOTIFY - integer ZMQ_ROUTER_RAW - integer ZMQ_ROUTING_ID - integer ZMQ_SCATTER - integer ZMQ_SERVER - integer ZMQ_SHARED - integer ZMQ_SNDBUF - integer ZMQ_SNDHWM - integer ZMQ_SNDMORE - integer ZMQ_SNDTIMEO - integer ZMQ_SOCKET_LIMIT - integer ZMQ_SOCKS_PASSWORD - integer ZMQ_SOCKS_PROXY - integer ZMQ_SOCKS_USERNAME - integer ZMQ_SRCFD - integer ZMQ_STREAM - integer ZMQ_STREAMER - integer ZMQ_STREAM_NOTIFY - integer ZMQ_SUB - integer ZMQ_SUBSCRIBE - integer ZMQ_TCP_ACCEPT_FILTER - integer ZMQ_TCP_KEEPALIVE - integer ZMQ_TCP_KEEPALIVE_CNT - integer ZMQ_TCP_KEEPALIVE_IDLE - integer ZMQ_TCP_KEEPALIVE_INTVL - integer ZMQ_TCP_MAXRT - integer ZMQ_THREAD_AFFINITY_CPU_ADD - integer ZMQ_THREAD_AFFINITY_CPU_REMOVE - integer ZMQ_THREAD_NAME_PREFIX - integer ZMQ_THREAD_PRIORITY - integer ZMQ_THREAD_PRIORITY_DFLT - integer ZMQ_THREAD_SAFE - integer ZMQ_THREAD_SCHED_POLICY - integer ZMQ_THREAD_SCHED_POLICY_DFLT - integer ZMQ_TOS - integer ZMQ_TYPE - integer ZMQ_UNSUBSCRIBE - integer ZMQ_USE_FD - integer ZMQ_VERSION - integer ZMQ_VERSION_MAJOR - integer ZMQ_VERSION_MINOR - integer ZMQ_VERSION_PATCH - integer ZMQ_VMCI_BUFFER_MAX_SIZE - integer ZMQ_VMCI_BUFFER_MIN_SIZE - integer ZMQ_VMCI_BUFFER_SIZE - integer ZMQ_VMCI_CONNECT_TIMEOUT - integer ZMQ_WSS_CERT_PEM - integer ZMQ_WSS_HOSTNAME - integer ZMQ_WSS_KEY_PEM - integer ZMQ_WSS_TRUST_PEM - integer ZMQ_WSS_TRUST_SYSTEM - integer ZMQ_XPUB - integer ZMQ_XPUB_MANUAL - integer ZMQ_XPUB_MANUAL_LAST_VALUE - integer ZMQ_XPUB_NODROP - integer ZMQ_XPUB_VERBOSE - integer ZMQ_XPUB_VERBOSER - integer ZMQ_XPUB_WELCOME_MSG - integer ZMQ_XREP - integer ZMQ_XREQ - integer ZMQ_XSUB - integer ZMQ_ZAP_DOMAIN - integer ZMQ_ZAP_ENFORCE_DOMAIN - integer ZMQ_ZERO_COPY_RECV - parameter(EADDRINUSE=156384717) - parameter(EADDRNOTAVAIL=156384718) - parameter(EAFNOSUPPORT=156384723) - parameter(ECONNABORTED=156384725) - parameter(ECONNREFUSED=156384719) - parameter(ECONNRESET=156384726) - parameter(EFSM=156384763) - parameter(EHOSTUNREACH=156384729) - parameter(EINPROGRESS=156384720) - parameter(EMSGSIZE=156384722) - parameter(EMTHREAD=156384766) - parameter(ENETDOWN=156384716) - parameter(ENETRESET=156384730) - parameter(ENETUNREACH=156384724) - parameter(ENOBUFS=156384715) - parameter(ENOCOMPATPROTO=156384764) - parameter(ENOTCONN=156384727) - parameter(ENOTSOCK=156384721) - parameter(ENOTSUP=156384713) - parameter(EPROTONOSUPPORT=156384714) - parameter(ETERM=156384765) - parameter(ETIMEDOUT=156384728) - parameter(ZMQ_AFFINITY=4) - parameter(ZMQ_BACKLOG=19) - parameter(ZMQ_BINDTODEVICE=92) - parameter(ZMQ_BLOCKY=70) - parameter(ZMQ_CHANNEL=20) - parameter(ZMQ_CLIENT=13) - parameter(ZMQ_CONFLATE=54) - parameter(ZMQ_CONNECT_RID=61) - parameter(ZMQ_CONNECT_ROUTING_ID=61) - parameter(ZMQ_CONNECT_TIMEOUT=79) - parameter(ZMQ_CURRENT_EVENT_VERSION=1) - parameter(ZMQ_CURRENT_EVENT_VERSION_DRAFT=2) - parameter(ZMQ_CURVE=2) - parameter(ZMQ_CURVE_PUBLICKEY=48) - parameter(ZMQ_CURVE_SECRETKEY=49) - parameter(ZMQ_CURVE_SERVER=47) - parameter(ZMQ_CURVE_SERVERKEY=50) - parameter(ZMQ_DEALER=5) - parameter(ZMQ_DEFINED_STDINT=1) - parameter(ZMQ_DELAY_ATTACH_ON_CONNECT=39) - parameter(ZMQ_DGRAM=18) - parameter(ZMQ_DISCONNECT_MSG=111) - parameter(ZMQ_DISH=15) - parameter(ZMQ_DONTWAIT=1) - parameter(ZMQ_EVENTS=15) - parameter(ZMQ_EVENT_ACCEPTED=32) - parameter(ZMQ_EVENT_ACCEPT_FAILED=64) - parameter(ZMQ_EVENT_ALL=65535) - parameter(ZMQ_EVENT_ALL_V1=65535) - parameter(ZMQ_EVENT_ALL_V2=131071) - parameter(ZMQ_EVENT_BIND_FAILED=16) - parameter(ZMQ_EVENT_CLOSED=128) - parameter(ZMQ_EVENT_CLOSE_FAILED=256) - parameter(ZMQ_EVENT_CONNECTED=1) - parameter(ZMQ_EVENT_CONNECT_DELAYED=2) - parameter(ZMQ_EVENT_CONNECT_RETRIED=4) - parameter(ZMQ_EVENT_DISCONNECTED=512) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_AUTH=16384) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_NO_DETAIL=2048) - parameter(ZMQ_EVENT_HANDSHAKE_FAILED_PROTOCOL=8192) - parameter(ZMQ_EVENT_HANDSHAKE_SUCCEEDED=4096) - parameter(ZMQ_EVENT_LISTENING=8) - parameter(ZMQ_EVENT_MONITOR_STOPPED=1024) - parameter(ZMQ_EVENT_PIPES_STATS=65536) - parameter(ZMQ_FAIL_UNROUTABLE=33) - parameter(ZMQ_FD=14) - parameter(ZMQ_FORWARDER=2) - parameter(ZMQ_GATHER=16) - parameter(ZMQ_GROUP_MAX_LENGTH=255) - parameter(ZMQ_GSSAPI=3) - parameter(ZMQ_GSSAPI_NT_HOSTBASED=0) - parameter(ZMQ_GSSAPI_NT_KRB5_PRINCIPAL=2) - parameter(ZMQ_GSSAPI_NT_USER_NAME=1) - parameter(ZMQ_GSSAPI_PLAINTEXT=65) - parameter(ZMQ_GSSAPI_PRINCIPAL=63) - parameter(ZMQ_GSSAPI_PRINCIPAL_NAMETYPE=90) - parameter(ZMQ_GSSAPI_SERVER=62) - parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL=64) - parameter(ZMQ_GSSAPI_SERVICE_PRINCIPAL_NAMETYPE=91) - parameter(ZMQ_HANDSHAKE_IVL=66) - parameter(ZMQ_HAS_CAPABILITIES=1) - parameter(ZMQ_HAUSNUMERO=156384712) - parameter(ZMQ_HEARTBEAT_IVL=75) - parameter(ZMQ_HEARTBEAT_TIMEOUT=77) - parameter(ZMQ_HEARTBEAT_TTL=76) - parameter(ZMQ_HELLO_MSG=110) - parameter(ZMQ_IDENTITY=5) - parameter(ZMQ_IMMEDIATE=39) - parameter(ZMQ_INVERT_MATCHING=74) - parameter(ZMQ_IN_BATCH_SIZE=101) - parameter(ZMQ_IO_THREADS=1) - parameter(ZMQ_IO_THREADS_DFLT=1) - parameter(ZMQ_IPC_FILTER_GID=60) - parameter(ZMQ_IPC_FILTER_PID=58) - parameter(ZMQ_IPC_FILTER_UID=59) - parameter(ZMQ_IPV4ONLY=31) - parameter(ZMQ_IPV6=42) - parameter(ZMQ_LAST_ENDPOINT=32) - parameter(ZMQ_LINGER=17) - parameter(ZMQ_LOOPBACK_FASTPATH=94) - parameter(ZMQ_MAXMSGSIZE=22) - parameter(ZMQ_MAX_MSGSZ=5) - parameter(ZMQ_MAX_SOCKETS=2) - parameter(ZMQ_MAX_SOCKETS_DFLT=1023) - parameter(ZMQ_MECHANISM=43) - parameter(ZMQ_METADATA=95) - parameter(ZMQ_MORE=1) - parameter(ZMQ_MSG_T_SIZE=6) - parameter(ZMQ_MULTICAST_HOPS=25) - parameter(ZMQ_MULTICAST_LOOP=96) - parameter(ZMQ_MULTICAST_MAXTPDU=84) - parameter(ZMQ_NOBLOCK=1) - parameter(ZMQ_NOTIFY_CONNECT=1) - parameter(ZMQ_NOTIFY_DISCONNECT=2) - parameter(ZMQ_NULL=0) - parameter(ZMQ_ONLY_FIRST_SUBSCRIBE=108) - parameter(ZMQ_OUT_BATCH_SIZE=102) - parameter(ZMQ_PAIR=0) - parameter(ZMQ_PEER=19) - parameter(ZMQ_PLAIN=1) - parameter(ZMQ_PLAIN_PASSWORD=46) - parameter(ZMQ_PLAIN_SERVER=44) - parameter(ZMQ_PLAIN_USERNAME=45) - parameter(ZMQ_POLLERR=4) - parameter(ZMQ_POLLIN=1) - parameter(ZMQ_POLLITEMS_DFLT=16) - parameter(ZMQ_POLLOUT=2) - parameter(ZMQ_POLLPRI=8) - parameter(ZMQ_PRIORITY=112) - parameter(ZMQ_PROBE_ROUTER=51) - parameter(ZMQ_PROTOCOL_ERROR_WS_UNSPECIFIED=805306368) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_REQUEST_ID=536870914) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_BAD_VERSION=536870915) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_METADATA=536870917) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_INVALID_STATUS_CODE=536870916) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_MALFORMED_REPLY=536870913) - parameter(ZMQ_PROTOCOL_ERROR_ZAP_UNSPECIFIED=536870912) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_CRYPTOGRAPHIC=285212673) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_METADATA=268435480) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_INVALID_SEQUENCE=268435458) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_KEY_EXCHANGE=268435459) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_ERROR=268435477) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_HELLO=268435475) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_INITIATE=268435476) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_MESSAGE=268435474) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_READY=268435478) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_UNSPECIFIED=268435473) - parameter( - & ZMQ_PROTOCOL_ERROR_ZMTP_MALFORMED_COMMAND_WELCOME=268435479) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_MECHANISM_MISMATCH=285212674) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNEXPECTED_COMMAND=268435457) - parameter(ZMQ_PROTOCOL_ERROR_ZMTP_UNSPECIFIED=268435456) - parameter(ZMQ_PTR=8) - parameter(ZMQ_PUB=1) - parameter(ZMQ_PULL=7) - parameter(ZMQ_PUSH=8) - parameter(ZMQ_QUEUE=3) - parameter(ZMQ_RADIO=14) - parameter(ZMQ_RATE=8) - parameter(ZMQ_RCVBUF=12) - parameter(ZMQ_RCVHWM=24) - parameter(ZMQ_RCVMORE=13) - parameter(ZMQ_RCVTIMEO=27) - parameter(ZMQ_RECONNECT_IVL=18) - parameter(ZMQ_RECONNECT_IVL_MAX=21) - parameter(ZMQ_RECONNECT_STOP=109) - parameter(ZMQ_RECONNECT_STOP_AFTER_DISCONNECT=3) - parameter(ZMQ_RECONNECT_STOP_CONN_REFUSED=1) - parameter(ZMQ_RECONNECT_STOP_HANDSHAKE_FAILED=2) - parameter(ZMQ_RECOVERY_IVL=9) - parameter(ZMQ_REP=4) - parameter(ZMQ_REQ=3) - parameter(ZMQ_REQ_CORRELATE=52) - parameter(ZMQ_REQ_RELAXED=53) - parameter(ZMQ_ROUTER=6) - parameter(ZMQ_ROUTER_BEHAVIOR=33) - parameter(ZMQ_ROUTER_HANDOVER=56) - parameter(ZMQ_ROUTER_MANDATORY=33) - parameter(ZMQ_ROUTER_NOTIFY=97) - parameter(ZMQ_ROUTER_RAW=41) - parameter(ZMQ_ROUTING_ID=5) - parameter(ZMQ_SCATTER=17) - parameter(ZMQ_SERVER=12) - parameter(ZMQ_SHARED=3) - parameter(ZMQ_SNDBUF=11) - parameter(ZMQ_SNDHWM=23) - parameter(ZMQ_SNDMORE=2) - parameter(ZMQ_SNDTIMEO=28) - parameter(ZMQ_SOCKET_LIMIT=3) - parameter(ZMQ_SOCKS_PASSWORD=100) - parameter(ZMQ_SOCKS_PROXY=68) - parameter(ZMQ_SOCKS_USERNAME=99) - parameter(ZMQ_SRCFD=2) - parameter(ZMQ_STREAM=11) - parameter(ZMQ_STREAMER=1) - parameter(ZMQ_STREAM_NOTIFY=73) - parameter(ZMQ_SUB=2) - parameter(ZMQ_SUBSCRIBE=6) - parameter(ZMQ_TCP_ACCEPT_FILTER=38) - parameter(ZMQ_TCP_KEEPALIVE=34) - parameter(ZMQ_TCP_KEEPALIVE_CNT=35) - parameter(ZMQ_TCP_KEEPALIVE_IDLE=36) - parameter(ZMQ_TCP_KEEPALIVE_INTVL=37) - parameter(ZMQ_TCP_MAXRT=80) - parameter(ZMQ_THREAD_AFFINITY_CPU_ADD=7) - parameter(ZMQ_THREAD_AFFINITY_CPU_REMOVE=8) - parameter(ZMQ_THREAD_NAME_PREFIX=9) - parameter(ZMQ_THREAD_PRIORITY=3) - parameter(ZMQ_THREAD_PRIORITY_DFLT=-1) - parameter(ZMQ_THREAD_SAFE=81) - parameter(ZMQ_THREAD_SCHED_POLICY=4) - parameter(ZMQ_THREAD_SCHED_POLICY_DFLT=-1) - parameter(ZMQ_TOS=57) - parameter(ZMQ_TYPE=16) - parameter(ZMQ_UNSUBSCRIBE=7) - parameter(ZMQ_USE_FD=89) - parameter(ZMQ_VERSION=40304) - parameter(ZMQ_VERSION_MAJOR=4) - parameter(ZMQ_VERSION_MINOR=3) - parameter(ZMQ_VERSION_PATCH=4) - parameter(ZMQ_VMCI_BUFFER_MAX_SIZE=87) - parameter(ZMQ_VMCI_BUFFER_MIN_SIZE=86) - parameter(ZMQ_VMCI_BUFFER_SIZE=85) - parameter(ZMQ_VMCI_CONNECT_TIMEOUT=88) - parameter(ZMQ_WSS_CERT_PEM=104) - parameter(ZMQ_WSS_HOSTNAME=106) - parameter(ZMQ_WSS_KEY_PEM=103) - parameter(ZMQ_WSS_TRUST_PEM=105) - parameter(ZMQ_WSS_TRUST_SYSTEM=107) - parameter(ZMQ_XPUB=9) - parameter(ZMQ_XPUB_MANUAL=71) - parameter(ZMQ_XPUB_MANUAL_LAST_VALUE=98) - parameter(ZMQ_XPUB_NODROP=69) - parameter(ZMQ_XPUB_VERBOSE=40) - parameter(ZMQ_XPUB_VERBOSER=78) - parameter(ZMQ_XPUB_WELCOME_MSG=72) - parameter(ZMQ_XREP=6) - parameter(ZMQ_XREQ=5) - parameter(ZMQ_XSUB=10) - parameter(ZMQ_ZAP_DOMAIN=55) - parameter(ZMQ_ZAP_ENFORCE_DOMAIN=93) - parameter(ZMQ_ZERO_COPY_RECV=10) - integer f77_zmq_bind - external f77_zmq_bind - integer f77_zmq_close - external f77_zmq_close - integer f77_zmq_connect - external f77_zmq_connect - integer f77_zmq_ctx_destroy - external f77_zmq_ctx_destroy - integer f77_zmq_ctx_get - external f77_zmq_ctx_get - integer*8 f77_zmq_ctx_new - external f77_zmq_ctx_new - integer f77_zmq_ctx_set - external f77_zmq_ctx_set - integer f77_zmq_ctx_shutdown - external f77_zmq_ctx_shutdown - integer f77_zmq_ctx_term - external f77_zmq_ctx_term - integer f77_zmq_disconnect - external f77_zmq_disconnect - integer f77_zmq_errno - external f77_zmq_errno - integer f77_zmq_getsockopt - external f77_zmq_getsockopt - integer f77_zmq_microsleep - external f77_zmq_microsleep - integer f77_zmq_msg_close - external f77_zmq_msg_close - integer f77_zmq_msg_copy - external f77_zmq_msg_copy - integer f77_zmq_msg_copy_from_data - external f77_zmq_msg_copy_from_data - integer f77_zmq_msg_copy_to_data - external f77_zmq_msg_copy_to_data - integer f77_zmq_msg_copy_to_data8 - external f77_zmq_msg_copy_to_data8 - integer*8 f77_zmq_msg_data - external f77_zmq_msg_data - integer*8 f77_zmq_msg_data_new - external f77_zmq_msg_data_new - integer f77_zmq_msg_destroy - external f77_zmq_msg_destroy - integer f77_zmq_msg_destroy_data - external f77_zmq_msg_destroy_data - integer f77_zmq_msg_get - external f77_zmq_msg_get - character*(64) f77_zmq_msg_gets - external f77_zmq_msg_gets - integer f77_zmq_msg_init - external f77_zmq_msg_init - integer f77_zmq_msg_init_data - external f77_zmq_msg_init_data - integer f77_zmq_msg_init_size - external f77_zmq_msg_init_size - integer f77_zmq_msg_more - external f77_zmq_msg_more - integer f77_zmq_msg_move - external f77_zmq_msg_move - integer*8 f77_zmq_msg_new - external f77_zmq_msg_new - integer f77_zmq_msg_recv - external f77_zmq_msg_recv - integer*8 f77_zmq_msg_recv8 - external f77_zmq_msg_recv8 - integer f77_zmq_msg_send - external f77_zmq_msg_send - integer*8 f77_zmq_msg_send8 - external f77_zmq_msg_send8 - integer f77_zmq_msg_set - external f77_zmq_msg_set - integer f77_zmq_msg_size - external f77_zmq_msg_size - integer*8 f77_zmq_msg_size8 - external f77_zmq_msg_size8 - integer f77_zmq_poll - external f77_zmq_poll - integer f77_zmq_pollitem_destroy - external f77_zmq_pollitem_destroy - integer*8 f77_zmq_pollitem_new - external f77_zmq_pollitem_new - integer f77_zmq_pollitem_revents - external f77_zmq_pollitem_revents - integer f77_zmq_pollitem_set_events - external f77_zmq_pollitem_set_events - integer f77_zmq_pollitem_set_socket - external f77_zmq_pollitem_set_socket - integer f77_zmq_proxy - external f77_zmq_proxy - integer f77_zmq_proxy_steerable - external f77_zmq_proxy_steerable - integer f77_zmq_recv - external f77_zmq_recv - integer*8 f77_zmq_recv8 - external f77_zmq_recv8 - integer f77_zmq_send - external f77_zmq_send - integer*8 f77_zmq_send8 - external f77_zmq_send8 - integer f77_zmq_send_const - external f77_zmq_send_const - integer*8 f77_zmq_send_const8 - external f77_zmq_send_const8 - integer f77_zmq_setsockopt - external f77_zmq_setsockopt - integer*8 f77_zmq_socket - external f77_zmq_socket - integer f77_zmq_socket_monitor - external f77_zmq_socket_monitor - character*(64) f77_zmq_strerror - external f77_zmq_strerror - integer f77_zmq_term - external f77_zmq_term - integer f77_zmq_unbind - external f77_zmq_unbind - integer f77_zmq_version - external f77_zmq_version - integer pthread_create - external pthread_create - integer pthread_create_arg - external pthread_create_arg - integer pthread_detach - external pthread_detach - integer pthread_join - external pthread_join diff --git a/ocaml/Message.ml b/ocaml/Message.ml index b7d77430..049203d7 100644 --- a/ocaml/Message.ml +++ b/ocaml/Message.ml @@ -63,11 +63,11 @@ end module Connect_msg : sig type t = Tcp | Inproc | Ipc - val create : typ:string -> t + val create : string -> t val to_string : t -> string end = struct type t = Tcp | Inproc | Ipc - let create ~typ = + let create typ = match typ with | "tcp" -> Tcp | "inproc" -> Inproc @@ -515,9 +515,9 @@ let of_string s = | Connect_ socket -> Connect (Connect_msg.create socket) | NewJob_ { state ; push_address_tcp ; push_address_inproc } -> - Newjob (Newjob_msg.create push_address_tcp push_address_inproc state) + Newjob (Newjob_msg.create ~address_tcp:push_address_tcp ~address_inproc:push_address_inproc ~state) | EndJob_ state -> - Endjob (Endjob_msg.create state) + Endjob (Endjob_msg.create ~state) | GetData_ { state ; client_id ; key } -> GetData (GetData_msg.create ~client_id ~state ~key) | PutData_ { state ; client_id ; key } -> diff --git a/ocaml/TaskServer.ml b/ocaml/TaskServer.ml index 92a6f5ca..ad827316 100644 --- a/ocaml/TaskServer.ml +++ b/ocaml/TaskServer.ml @@ -776,7 +776,7 @@ let run ~port = Zmq.Socket.create zmq_context Zmq.Socket.rep in Zmq.Socket.set_linger_period rep_socket 1_000_000; - bind_socket "REP" rep_socket port; + bind_socket ~socket_type:"REP" ~socket:rep_socket ~port; let initial_program_state = { queue = Queuing_system.create () ; diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index d096b15b..dfbab167 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -110,7 +110,7 @@ let run slave ?prefix exe ezfio_file = let task_thread = let thread = Thread.create ( fun () -> - TaskServer.run port_number ) + TaskServer.run ~port:port_number ) in thread (); in diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index c0ba8c6a..0f70f4c4 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -121,6 +121,7 @@ def ninja_create_env_variable(pwd_config_file): l_string.append("LIB = {0} ".format(str_lib)) + l_string.append("CONFIG_FILE = {0}".format(pwd_config_file)) l_string.append("") return l_string diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index ef43a78b..eb98994c 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -19,11 +19,11 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints) return endif n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3)= ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3)= ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) @@ -33,19 +33,19 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints) do m=1,ao_prim_num(i) beta = ao_expo_ordered_transp(m,i) do mm = 1, 3 - ! (x phi_i ) * phi_j + ! (x phi_i ) * phi_j ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1} ! ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x power_B_tmp = power_B - contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) + contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) - ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 + * ao_coef_normalized_ordered_transp(m,i) + ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 power_B_tmp(mm) += 1 - contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) + contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in) xyz_ints(mm) += contrib * 1.d0 * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) + * ao_coef_normalized_ordered_transp(m,i) enddo enddo enddo @@ -58,7 +58,7 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center) BEGIN_DOC ! phi_j_erf_mu_r_phi = int dr phi_j(r) [erf(mu |r - C|)/|r-C|] phi_i(r) END_DOC - + implicit none integer, intent(in) :: i,j double precision, intent(in) :: mu_in, C_center(3) @@ -77,24 +77,24 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center) n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3) = ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3) = ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) - + do l = 1, ao_prim_num(j) alpha = ao_expo_ordered_transp(l,j) do m = 1, ao_prim_num(i) beta = ao_expo_ordered_transp(m,i) - contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) - phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) + phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) enddo enddo @@ -124,11 +124,11 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints) return endif n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3)= ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3)= ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) @@ -141,7 +141,7 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints) call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints) do mm = 1, 4 gauss_ints(mm) += xyz_ints(mm) * ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) + * ao_coef_normalized_ordered_transp(m,i) enddo enddo enddo @@ -161,7 +161,7 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints) integer, intent(in) :: i, j double precision, intent(in) :: mu, C_center(3), delta double precision, intent(out) :: gauss_ints - + integer :: n_pt_in, l, m integer :: num_A, power_A(3), num_b, power_B(3) double precision :: alpha, beta, A_center(3), B_center(3), coef @@ -177,16 +177,16 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints) n_pt_in = n_pt_max_integrals - ! j + ! j num_A = ao_nucl(j) power_A(1:3) = ao_power(j,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - ! i + ! i num_B = ao_nucl(i) power_B(1:3) = ao_power(i,1:3) B_center(1:3) = nucl_coord(num_B,1:3) - + do l = 1, ao_prim_num(j) alpha = ao_expo_ordered_transp(l,j) do m = 1, ao_prim_num(i) @@ -219,7 +219,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) ! END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' implicit none @@ -275,6 +275,83 @@ end subroutine NAI_pol_x_mult_erf_ao ! --- +subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, n_points + double precision, intent(in) :: mu_in, C_center(n_points,3) + double precision, intent(out) :: ints(n_points,3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in + integer :: power_xA(3), m, ipoint + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision, allocatable :: integral(:) + double precision :: NAI_pol_mult_erf + + ints = 0.d0 + if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then + return + endif + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + allocate(integral(n_points)) + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do m = 1, 3 + + power_xA = power_A + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint=1,n_points + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint=1,n_points + ints(ipoint,m) += A_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_v + +! --- + double precision function NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center) BEGIN_DOC @@ -662,6 +739,8 @@ end function NAI_pol_x_mult_erf_ao_with1s_z ! --- +! --- + subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) BEGIN_DOC @@ -676,7 +755,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen ! END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' implicit none @@ -720,7 +799,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen do j = 1, ao_prim_num(j_ao) alphaj = ao_expo_ordered_transp (j,j_ao) - coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) ! First term = (x-Ax)**(ax+1) integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj & @@ -738,6 +817,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen end subroutine NAI_pol_x_mult_erf_ao_with1s +!-- + +subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao, n_points + double precision, intent(in) :: beta, B_center(n_points,3), mu_in, C_center(n_points,3) + double precision, intent(out) :: ints(n_points,3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + + integer :: ipoint + double precision, allocatable :: integral(:) + + if(beta .lt. 1d-10) then + call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points) + return + endif + + ints(:,:) = 0.d0 + if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then + return + endif + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + allocate(integral(n_points)) + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do m = 1, 3 + + ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax + power_xA = power_Ai + power_xA(m) += 1 + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + ! First term = (x-Ax)**(ax+1) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, & + alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint = 1, n_points + ints(ipoint,m) += integral(ipoint) * coef + enddo + + ! Second term = Ax * (x-Ax)**(ax) + call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, & + alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points) + do ipoint = 1, n_points + ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef + enddo + + enddo + enddo + enddo + deallocate(integral) + +end subroutine NAI_pol_x_mult_erf_ao_with1s + + ! --- subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints) @@ -748,7 +912,7 @@ subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints) ! ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z END_DOC - include 'utils/constants.include.F' + include 'utils/constants.include.F' integer, intent(in) :: i_ao,j_ao,m double precision, intent(in) :: mu_in, C_center(3) double precision, intent(out):: ints diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index c058d0d8..facb6264 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -22,15 +22,15 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp) do m = 1, 3 gauss_ints(m) += gauss_ints_tmp(m) * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) + * ao_coef_normalized_ordered_transp(k,j) enddo - enddo + enddo enddo end @@ -61,13 +61,13 @@ double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,m power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx) overlap_gauss_xyz_r12_ao_specific = gauss_int * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo end @@ -90,13 +90,13 @@ subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints) power_B(1:3)= ao_power(j,1:3) B_center(1:3) = nucl_coord(num_B,1:3) do l=1,ao_prim_num(i) - alpha = ao_expo_ordered_transp(l,i) + alpha = ao_expo_ordered_transp(l,i) do k=1,ao_prim_num(j) - beta = ao_expo_ordered_transp(k,j) + beta = ao_expo_ordered_transp(k,j) analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) aos_ints(j,i) += analytical_j * ao_coef_normalized_ordered_transp(l,i) & - * ao_coef_normalized_ordered_transp(k,j) - enddo + * ao_coef_normalized_ordered_transp(k,j) + enddo enddo enddo enddo @@ -114,7 +114,7 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) implicit none integer, intent(in) :: i, j double precision, intent(in) :: D_center(3), delta - + integer :: power_A(3), power_B(3), l, k double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j @@ -133,23 +133,75 @@ double precision function overlap_gauss_r12_ao(D_center, delta, i, j) B_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha = ao_expo_ordered_transp (l,i) - coef1 = ao_coef_normalized_ordered_transp(l,i) + alpha = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) do k = 1, ao_prim_num(j) beta = ao_expo_ordered_transp(k,j) - coef = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta) overlap_gauss_r12_ao += coef * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao +! -- + +subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) + + BEGIN_DOC + ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2} + END_DOC + + implicit none + integer, intent(in) :: i, j, n_points + double precision, intent(in) :: D_center(n_points,3), delta + double precision, intent(out) :: resv(n_points) + + integer :: power_A(3), power_B(3), l, k + double precision :: A_center(3), B_center(3), alpha, beta, coef, coef1 + double precision, allocatable :: analytical_j(:) + + double precision, external :: overlap_gauss_r12 + integer :: ipoint + + resv(:) = 0.d0 + if(ao_overlap_abs(j,i).lt.1.d-12) then + return + endif + + power_A(1:3) = ao_power(i,1:3) + power_B(1:3) = ao_power(j,1:3) + + A_center(1:3) = nucl_coord(ao_nucl(i),1:3) + B_center(1:3) = nucl_coord(ao_nucl(j),1:3) + + allocate(analytical_j(n_points)) + do l = 1, ao_prim_num(i) + alpha = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) + + do k = 1, ao_prim_num(j) + beta = ao_expo_ordered_transp(k,j) + coef = coef1 * ao_coef_normalized_ordered_transp(k,j) + + if(dabs(coef) .lt. 1d-12) cycle + + call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points) + do ipoint=1, n_points + resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint) + enddo + enddo + enddo + deallocate(analytical_j) + +end + ! --- double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j) @@ -163,14 +215,13 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, implicit none integer, intent(in) :: i, j double precision, intent(in) :: B_center(3), beta, D_center(3), delta - + integer :: power_A1(3), power_A2(3), l, k double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j double precision :: G_center(3), gama, fact_g, gama_inv double precision, external :: overlap_gauss_r12, overlap_gauss_r12_ao - ASSERT(beta .gt. 0.d0) if(beta .lt. 1d-10) then overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j) return @@ -192,7 +243,7 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, fact_g = beta * delta * gama_inv * ( (B_center(1) - D_center(1)) * (B_center(1) - D_center(1)) & + (B_center(2) - D_center(2)) * (B_center(2) - D_center(2)) & + (B_center(3) - D_center(3)) * (B_center(3) - D_center(3)) ) - if(fact_g .gt. 80d0) return + if(fact_g .gt. 10d0) return fact_g = dexp(-fact_g) ! --- @@ -204,22 +255,118 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) do l = 1, ao_prim_num(i) - alpha1 = ao_expo_ordered_transp (l,i) + alpha1 = ao_expo_ordered_transp (l,i) coef1 = fact_g * ao_coef_normalized_ordered_transp(l,i) - !if(dabs(coef1) .lt. 1d-12) cycle + if(dabs(coef1) .lt. 1d-12) cycle do k = 1, ao_prim_num(j) alpha2 = ao_expo_ordered_transp (k,j) - coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) if(dabs(coef12) .lt. 1d-12) cycle analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2) overlap_gauss_r12_ao_with1s += coef12 * analytical_j - enddo + enddo enddo end function overlap_gauss_r12_ao_with1s ! --- +subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points) + BEGIN_DOC + ! + ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} + ! using an array of D_centers. + ! + END_DOC + + implicit none + integer, intent(in) :: i, j, n_points + double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta + double precision, intent(out) :: resv(n_points) + + integer :: power_A1(3), power_A2(3), l, k + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef1 + double precision :: coef12, coef12f + double precision :: gama, gama_inv + double precision :: bg, dg, bdg + + integer :: ipoint + + double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:) + + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + return + endif + + ASSERT(beta .gt. 0.d0) + + if(beta .lt. 1d-10) then + call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) + return + endif + + resv(:) = 0.d0 + + ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2} + + gama = beta + delta + gama_inv = 1.d0 / gama + + power_A1(1:3) = ao_power(i,1:3) + power_A2(1:3) = ao_power(j,1:3) + + A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) + A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) + + allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) ) + + bg = beta * gama_inv + dg = delta * gama_inv + bdg = bg * delta + do ipoint=1,n_points + G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1) + G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2) + G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3) + fact_g(ipoint) = bdg * ( & + (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) & + + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) & + + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) ) + + if(fact_g(ipoint) < 10d0) then + fact_g(ipoint) = dexp(-fact_g(ipoint)) + else + fact_g(ipoint) = 0.d0 + endif + + enddo + + ! --- + + do l = 1, ao_prim_num(i) + alpha1 = ao_expo_ordered_transp (l,i) + coef1 = ao_coef_normalized_ordered_transp(l,i) + + do k = 1, ao_prim_num(j) + alpha2 = ao_expo_ordered_transp (k,j) + coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j) + if(dabs(coef12) .lt. 1d-12) cycle + + call overlap_gauss_r12_v(G_center, gama, A1_center,& + A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points) + + do ipoint=1,n_points + coef12f = coef12 * fact_g(ipoint) + resv(ipoint) += coef12f * analytical_j(ipoint) + end do + + enddo + enddo + deallocate (fact_g, G_center, analytical_j ) + + +end + + diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 6e3cfed4..20796512 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -11,78 +11,65 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n implicit none integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit, expo_fit, coef_fit + double precision :: r(3), expo_fit, coef_fit double precision :: coef, beta, B_center(3) double precision :: tmp double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - int2_grad1u2_grad2u2_j1b2 = 0.d0 + int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) - !$OMP DO - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& + !$OMP ao_overlap_abs) - do i = 1, ao_num - do j = i, ao_num + allocate(int_fit_v(n_points_final_grid)) + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num - tmp = 0.d0 - do i_fit = 1, n_max_fit_slat + if(ao_overlap_abs(j,i) .lt. 1.d-12) then + cycle + endif - expo_fit = expo_gauss_1_erf_x_2(i_fit) - coef_fit = coef_gauss_1_erf_x_2(i_fit) + do i_1s = 1, List_all_comb_b3_size - ! --- + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - coef = List_all_comb_b3_coef (1) - beta = List_all_comb_b3_expo (1) - B_center(1) = List_all_comb_b3_cent(1,1) - B_center(2) = List_all_comb_b3_cent(2,1) - B_center(3) = List_all_comb_b3_cent(3,1) + do i_fit = 1, n_max_fit_slat - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - if(dabs(int_fit) .lt. 1d-10) cycle + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - tmp += -0.25d0 * coef * coef_fit * int_fit + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & + expo_fit, i, j, int_fit_v, n_points_final_grid) - ! --- + do ipoint = 1, n_points_final_grid + int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) + enddo - do i_1s = 2, List_all_comb_b3_size + enddo - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - - tmp += -0.25d0 * coef * coef_fit * int_fit - enddo - - ! --- - - enddo - - int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp - enddo - enddo - enddo + enddo + enddo + enddo !$OMP END DO + deallocate(int_fit_v) !$OMP END PARALLEL do ipoint = 1, n_points_final_grid @@ -96,7 +83,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n call wall_time(wall1) print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -109,79 +96,60 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit, expo_fit, coef_fit - double precision :: coef, beta, B_center(3), tmp - double precision :: wall0, wall1 + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3), tmp + double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:) - double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - int2_u2_j1b2 = 0.d0 + int2_u2_j1b2(:,:,:) = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u2_j1b2) - !$OMP DO - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2) + allocate(int_fit_v(n_points_final_grid)) + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num - do i = 1, ao_num - do j = i, ao_num + do i_1s = 1, List_all_comb_b3_size + + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - tmp = 0.d0 do i_fit = 1, n_max_fit_slat expo_fit = expo_gauss_j_mu_x_2(i_fit) - coef_fit = coef_gauss_j_mu_x_2(i_fit) + coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef - ! --- + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & + expo_fit, i, j, int_fit_v, n_points_final_grid) - coef = List_all_comb_b3_coef (1) - beta = List_all_comb_b3_expo (1) - B_center(1) = List_all_comb_b3_cent(1,1) - B_center(2) = List_all_comb_b3_cent(2,1) - B_center(3) = List_all_comb_b3_cent(3,1) - - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - if(dabs(int_fit) .lt. 1d-10) cycle - - tmp += coef * coef_fit * int_fit - - ! --- - - do i_1s = 2, List_all_comb_b3_size - - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - - int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) - - tmp += coef * coef_fit * int_fit + do ipoint = 1, n_points_final_grid + int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) enddo - ! --- - enddo - int2_u2_j1b2(j,i,ipoint) = tmp enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + deallocate(int_fit_v) + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -194,7 +162,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final call wall_time(wall1) print*, ' wall time for int2_u2_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -207,117 +175,98 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p END_DOC implicit none - integer :: i, j, ipoint, i_1s, i_fit - double precision :: r(3), int_fit(3), expo_fit, coef_fit - double precision :: coef, beta, B_center(3), dist - double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp - double precision :: tmp_x, tmp_y, tmp_z - double precision :: wall0, wall1 + integer :: i, j, ipoint, i_1s, i_fit + double precision :: r(3), expo_fit, coef_fit + double precision :: coef, beta, B_center(3) + double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp + double precision :: tmp_x, tmp_y, tmp_z + double precision :: wall0, wall1 + double precision, allocatable :: int_fit_v(:,:), dist(:), centr_1s(:,:) - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) - int2_u_grad1u_x_j1b2 = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, n_max_fit_slat, & - !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) - !$OMP DO + allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3)) do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) + r(1) = final_grid_points_transp(ipoint,1) + r(2) = final_grid_points_transp(ipoint,2) + r(3) = final_grid_points_transp(ipoint,3) - do i = 1, ao_num - do j = i, ao_num + dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1)) & + + (B_center(2) - r(2)) * (B_center(2) - r(2)) & + + (B_center(3) - r(3)) * (B_center(3) - r(3)) + enddo - tmp_x = 0.d0 - tmp_y = 0.d0 - tmp_z = 0.d0 - do i_fit = 1, n_max_fit_slat + int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0 - expo_fit = expo_gauss_j_mu_1_erf(i_fit) - coef_fit = coef_gauss_j_mu_1_erf(i_fit) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& + !$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& + !$OMP final_grid_points_transp, n_max_fit_slat, dist, & + !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2) + allocate(int_fit_v(n_points_final_grid,3)) - ! --- + do i_1s = 1, List_all_comb_b3_size - coef = List_all_comb_b3_coef (1) - beta = List_all_comb_b3_expo (1) - B_center(1) = List_all_comb_b3_cent(1,1) - B_center(2) = List_all_comb_b3_cent(2,1) - B_center(3) = List_all_comb_b3_cent(3,1) - dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + coef = List_all_comb_b3_coef (i_1s) + beta = List_all_comb_b3_expo (i_1s) + B_center(1) = List_all_comb_b3_cent(1,i_1s) + B_center(2) = List_all_comb_b3_cent(2,i_1s) + B_center(3) = List_all_comb_b3_cent(3,i_1s) - alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + do i_fit = 1, n_max_fit_slat - centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) - centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) - centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) + expo_fit = expo_gauss_j_mu_1_erf(i_fit) + coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef - expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-10) cycle - - call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) - if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle + alpha_1s = beta + expo_fit + alpha_1s_inv = 1.d0 / alpha_1s - tmp_x += coef_tmp * int_fit(1) - tmp_y += coef_tmp * int_fit(2) - tmp_z += coef_tmp * int_fit(3) + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points_transp(ipoint,1) + r(2) = final_grid_points_transp(ipoint,2) + r(3) = final_grid_points_transp(ipoint,3) - ! --- + centr_1s(ipoint,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) + centr_1s(ipoint,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) + centr_1s(ipoint,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) + enddo - do i_1s = 2, List_all_comb_b3_size - - coef = List_all_comb_b3_coef (i_1s) - beta = List_all_comb_b3_expo (i_1s) - B_center(1) = List_all_comb_b3_cent(1,i_1s) - B_center(2) = List_all_comb_b3_cent(2,i_1s) - B_center(3) = List_all_comb_b3_cent(3,i_1s) - dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) & - + (B_center(2) - r(2)) * (B_center(2) - r(2)) & - + (B_center(3) - r(3)) * (B_center(3) - r(3)) + expo_coef_1s = beta * expo_fit * alpha_1s_inv + !$OMP BARRIER + !$OMP DO SCHEDULE(dynamic) + do i = 1, ao_num + do j = i, ao_num + call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,& + 1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid) - alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s - - centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) - centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) - centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) - - expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-10) cycle - - call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) - - tmp_x += coef_tmp * int_fit(1) - tmp_y += coef_tmp * int_fit(2) - tmp_z += coef_tmp * int_fit(3) + do ipoint = 1, n_points_final_grid + coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint)) + int2_u_grad1u_x_j1b2(1,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1) + int2_u_grad1u_x_j1b2(2,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2) + int2_u_grad1u_x_j1b2(3,j,i,ipoint) = & + int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3) enddo ! --- enddo - - int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x - int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y - int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z enddo + !$OMP END DO NOWAIT + enddo enddo - !$OMP END DO - !$OMP END PARALLEL + deallocate(int_fit_v) + !$OMP END PARALLEL + + deallocate(dist) do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -332,7 +281,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p call wall_time(wall1) print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -360,11 +309,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist, & - !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & !$OMP final_grid_points, n_max_fit_slat, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_u_grad1u_j1b2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -420,15 +369,13 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points + (B_center(3) - r(3)) * (B_center(3) - r(3)) alpha_1s = beta + expo_fit - alpha_1s_inv = 1.d0 / alpha_1s + alpha_1s_inv = 1.d0 / alpha_1s centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1)) centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2)) centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-10) cycle - int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) tmp += coef_tmp * int_fit @@ -456,7 +403,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points call wall_time(wall1) print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index ff9f8ae5..1178cc31 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -63,7 +63,6 @@ END_PROVIDER tmp_cent_z += tmp_alphaj * nucl_coord(j,3) enddo - ASSERT(List_all_comb_b2_expo(i) .gt. 0d0) if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) @@ -169,7 +168,7 @@ END_PROVIDER do j = 1, nucl_num tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) - + print*,List_all_comb_b3(j,i),j1b_pen(j) List_all_comb_b3_expo(i) += tmp_alphaj List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1) List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2) @@ -177,8 +176,8 @@ END_PROVIDER enddo - ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle + ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 749227ea..1638aa9e 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -1,67 +1,139 @@ - double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta) BEGIN_DOC ! Computes the following integral : ! - ! .. math:: - ! + ! .. math :: + ! ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! END_DOC - implicit none + implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" - double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" - integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) - double precision :: overlap_x,overlap_y,overlap_z,overlap - ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom - double precision :: A_center_new(3) ! new center - integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A - double precision :: alpha_new ! new exponent - double precision :: fact_a_new ! constant factor - double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr - integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 - dim1=100 - thr = 1.d-10 - d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 + double precision :: overlap_x,overlap_y,overlap_z,overlap + ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) + double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_center_new(3) ! new center + integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A + double precision :: alpha_new ! new exponent + double precision :: fact_a_new ! constant factor + double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1 + dim1=100 + thr = 1.d-10 + d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & - delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) - ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 - accu = 0.d0 - do lx = 0, iorder_a_new(1) - coefx = A_new(lx,1) - if(dabs(coefx).lt.thr)cycle - iorder_tmp(1) = lx - do ly = 0, iorder_a_new(2) - coefy = A_new(ly,2) - coefxy = coefx * coefy - if(dabs(coefxy).lt.thr)cycle - iorder_tmp(2) = ly - do lz = 0, iorder_a_new(3) - coefz = A_new(lz,3) - coefxyz = coefxy * coefz - if(dabs(coefxyz).lt.thr)cycle - iorder_tmp(3) = lz - call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - accu += coefxyz * overlap - enddo + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,& + delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) + ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 + accu = 0.d0 + do lx = 0, iorder_a_new(1) + coefx = A_new(lx,1) + if(dabs(coefx).lt.thr)cycle + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + coefy = A_new(ly,2) + coefxy = coefx * coefy + if(dabs(coefxy).lt.thr)cycle + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + coefz = A_new(lz,3) + coefxyz = coefxy * coefz + if(dabs(coefxyz).lt.thr)cycle + iorder_tmp(3) = lz + call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + accu += coefxyz * overlap + enddo + enddo enddo - enddo - overlap_gauss_r12 = fact_a_new * accu + overlap_gauss_r12 = fact_a_new * accu end +!--- + +subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) + BEGIN_DOC + ! Computes the following integral : + ! + ! .. math :: + ! + ! \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! using an array of D_centers + ! + END_DOC + + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points + double precision, intent(in) :: D_center(n_points,3), delta ! pure gaussian "D" + double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" + integer, intent(in) :: power_A(3),power_B(3) + double precision, intent(out) :: rvec(n_points) + + double precision, allocatable :: overlap(:) + double precision :: overlap_x, overlap_y, overlap_z + + integer :: maxab + integer, allocatable :: iorder_a_new(:) + double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) + double precision, allocatable :: fact_a_new(:) + double precision :: alpha_new + double precision :: accu,thr, coefxy + integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint + + dim1=100 + thr = 1.d-10 + d(:) = 0 + + maxab = maxval(power_A(1:3)) + + allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), & + fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) + + call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & + alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & + D_center, A_center, n_points) + + do ipoint=1,n_points + rvec(ipoint) = 0.d0 + enddo + + do lx = 0, iorder_a_new(1) + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + iorder_tmp(3) = lz + call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * & + A_new(ipoint,ly,2) * & + A_new(ipoint,lz,3) * overlap(ipoint) + enddo + enddo + enddo + enddo + + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint) + enddo + deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap) +end + +!--- +!--- subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints) BEGIN_DOC ! Computes the following integral : ! ! .. math:: - ! + ! ! gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z @@ -69,14 +141,14 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: gauss_ints(3) double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -88,8 +160,8 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 gauss_ints = 0.d0 @@ -99,12 +171,12 @@ subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_ iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz do m = 1, 3 @@ -129,7 +201,7 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z @@ -137,13 +209,13 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center implicit none include 'constants.include.F' - double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3),mx double precision :: overlap_x,overlap_y,overlap_z,overlap ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 ) - double precision :: A_new(0:max_dim,3)! new polynom + double precision :: A_new(0:max_dim,3)! new polynom double precision :: A_center_new(3) ! new center integer :: iorder_a_new(3) ! i_order(i) = order of the new polynom ==> should be equal to power_A integer :: power_B_new(3) @@ -155,8 +227,8 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center thr = 1.d-10 d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 ) == 0 - ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order - call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & + ! New gaussian/polynom defined by :: new pol new center new expo cst fact new order + call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals) ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 overlap_gauss_xyz_r12_specific = 0.d0 @@ -166,12 +238,12 @@ double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center iorder_tmp(1) = lx do ly = 0, iorder_a_new(2) coefy = A_new(ly,2) - coefxy = coefx * coefy + coefxy = coefx * coefy if(dabs(coefxy).lt.thr)cycle iorder_tmp(2) = ly do lz = 0, iorder_a_new(3) coefz = A_new(lz,3) - coefxyz = coefxy * coefz + coefxyz = coefxy * coefz if(dabs(coefxyz).lt.thr)cycle iorder_tmp(3) = lz m = mx diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index 263e9845..96625df5 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -124,7 +124,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. ! @@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, end function NAI_pol_mult_erf +! --- +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math:: + ! + ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in, n_points + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(out) :: res_v(n_points) + + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new + + double precision :: rint + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + + dist = 0.d0 + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + enddo + + do ipoint=1,n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const_factor = dist * rho + if(const_factor > 80.d0) then + res_V(ipoint) = 0.d0 + cycle + endif + + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + const = p * dist_integral * p_new * p_new + if(n_pt == 0) then + res_v(ipoint) = coeff * rint(0, const) + cycle + endif + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + p_new = p_new * p_new + call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)& + , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + res_v(ipoint) = accu * coeff + enddo + +end + ! --- double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & @@ -207,7 +293,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A ! Computes the following integral : ! ! .. math:: - ! + ! ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) @@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A end function NAI_pol_mult_erf_with1s +!-- + +subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2& + , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math :: + ! + ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) + ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) + ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) + ! \exp(-\beta (r - B)^2) + ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in, n_points + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + double precision, intent(out) :: res_v(n_points) + + integer :: i, n_pt, n_pt_out, ipoint + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new, p_new2 + + double precision :: rint + + + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} + alpha12 = alpha1 + alpha2 + alpha12_inv = 1.d0 / alpha12 + alpha12_inv_2 = 0.5d0 * alpha12_inv + rho12 = alpha1 * alpha2 * alpha12_inv + A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv + A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv + A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv + dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))& + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))& + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + const_factor12 = dist12 * rho12 + + if(const_factor12 > 80.d0) then + res_v(:) = 0.d0 + return + endif + + ! --- + + ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) & + + power_A1(3) + power_A2(3) ) + + do ipoint=1,n_points + + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv + dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))& + + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))& + + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) + + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) then + res_v(ipoint) = 0.d0 + cycle + endif + + dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))& + + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))& + + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3)) + + ! --- + + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + const = p * dist_integral * p_new2 + if(n_pt == 0) then + res_v(ipoint) = coeff * rint(0, const) + cycle + endif + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + + !TODO: VECTORIZE HERE + call give_polynomial_mult_center_one_e_erf_opt( & + A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)& + , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + res_v(ipoint) = accu * coeff + end do + +end + +! --- ! --- subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center & @@ -432,10 +643,11 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt ! --- + subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,& power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) BEGIN_DOC - ! Returns the explicit polynomial in terms of the $t$ variable of the + ! Returns the explicit polynomial in terms of the $t$ variable of the ! following polynomial: ! ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$. diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90 index 141292d8..bad641ab 100644 --- a/src/ao_one_e_ints/pseudopot.f90 +++ b/src/ao_one_e_ints/pseudopot.f90 @@ -1095,9 +1095,9 @@ double precision function overlap_orb_ylm_grid(nptsgrid,r_orb,npower_orb,center_ implicit none !! PSEUDOS integer nptsgridmax,nptsgrid -double precision coefs_pseudo,ptsgrid parameter(nptsgridmax=50) -common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +double precision coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) +common/pseudos/coefs_pseudo,ptsgrid !!!!! integer npower_orb(3),l,m,i double precision x,g_orb,two_pi,dx,dphi,term,orb_phi,ylm_real,sintheta,r_orb,phi,center_orb(3) @@ -1235,10 +1235,10 @@ end subroutine initpseudos(nptsgrid) implicit none integer nptsgridmax,nptsgrid,ik - double precision coefs_pseudo,ptsgrid double precision p,q,r,s parameter(nptsgridmax=50) - common/pseudos/coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + double precision :: coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3) + common/pseudos/coefs_pseudo,ptsgrid p=1.d0/dsqrt(2.d0) q=1.d0/dsqrt(3.d0) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index a72200f7..64b522d8 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -58,3 +58,18 @@ END_PROVIDER enddo END_PROVIDER + + +BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] + implicit none + BEGIN_DOC + ! final_grid_points_transp(j,1:3) = (/ x, y, z /) of the jth grid point + END_DOC + integer :: i + do i=1,n_points_final_grid + final_grid_points_transp(i,1) = final_grid_points(1,i) + final_grid_points_transp(i,2) = final_grid_points(2,i) + final_grid_points_transp(i,3) = final_grid_points(3,i) + enddo +END_PROVIDER + diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index e32d4707..33f512cf 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -127,6 +127,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, implicit none integer :: ipoint + print*,'providing int2_grad1_u12_bimo_transp' + double precision :: wall0, wall1 + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & @@ -142,6 +145,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, enddo !$OMP END DO !$OMP END PARALLEL + call wall_time(wall1) + print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 END_PROVIDER diff --git a/src/bitmask/core_inact_act_virt.irp.f b/src/bitmask/core_inact_act_virt.irp.f index d83d69e9..f3eaff32 100644 --- a/src/bitmask/core_inact_act_virt.irp.f +++ b/src/bitmask/core_inact_act_virt.irp.f @@ -173,10 +173,7 @@ BEGIN_PROVIDER [integer, n_core_inact_act_orb ] END_DOC n_core_inact_act_orb = (n_core_orb + n_inact_orb + n_act_orb) END_PROVIDER - - - BEGIN_PROVIDER [ integer(bit_kind), core_bitmask , (N_int,2) ] implicit none BEGIN_DOC @@ -443,5 +440,4 @@ BEGIN_PROVIDER [integer, list_all_but_del_orb, (n_all_but_del_orb)] endif enddo -END_PROVIDER - +END_PROVIDER diff --git a/src/cis/cis.irp.f b/src/cis/cis.irp.f index cc047622..ab2294ad 100644 --- a/src/cis/cis.irp.f +++ b/src/cis/cis.irp.f @@ -79,6 +79,6 @@ subroutine run call ezfio_set_cis_energy(CI_energy) psi_coef = ci_eigenvectors SOFT_TOUCH psi_coef - call save_wavefunction_truncated(thresh_save_wf) + call save_wavefunction_truncated(save_threshold) end diff --git a/src/csf/cfgCI_interface.f90 b/src/csf/cfgCI_interface.f90 index b701f0ec..73bd600d 100644 --- a/src/csf/cfgCI_interface.f90 +++ b/src/csf/cfgCI_interface.f90 @@ -46,6 +46,24 @@ module cfunctions real (kind=C_DOUBLE ),intent(out) :: csftodetmatrix(rowsmax,colsmax) end subroutine getCSFtoDETTransformationMatrix end interface + interface + subroutine gramSchmidt(A, m, n, B) bind(C, name='gramSchmidt') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt + end interface + interface + subroutine gramSchmidt_qp(A, m, n, B) bind(C, name='gramSchmidt_qp') + import C_INT32_T, C_INT64_T, C_DOUBLE + integer(kind=C_INT32_T),value,intent(in) :: m + integer(kind=C_INT32_T),value,intent(in) :: n + real (kind=C_DOUBLE ),intent(in) :: A(m,n) + real (kind=C_DOUBLE ),intent(out) :: B(m,n) + end subroutine gramSchmidt_qp + end interface end module cfunctions subroutine f_dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC) & diff --git a/src/csf/cfgCI_utils.c b/src/csf/cfgCI_utils.c index 746de04e..3510db37 100644 --- a/src/csf/cfgCI_utils.c +++ b/src/csf/cfgCI_utils.c @@ -1,5 +1,6 @@ #include #include +#include #include "tree_utils.h" void int_to_bin_digit(int64_t in, int count, int* out) @@ -28,19 +29,19 @@ void getncsfs1(int *inpnsomo, int *inpms, int *outncsfs){ int nsomo = *inpnsomo; int ms = *inpms; int nparcoupl = (nsomo + ms)/2; - *outncsfs = binom(nsomo, nparcoupl); + *outncsfs = binom((double)nsomo, (double)nparcoupl); } void getncsfs(int NSOMO, int MS, int *outncsfs){ - int nparcoupl = (NSOMO + MS)/2; - int nparcouplp1 = ((NSOMO + MS)/2)+1; + int nparcoupl = (NSOMO + MS)/2; // n_alpha + int nparcouplp1 = ((NSOMO + MS)/2)+1; // n_alpha + 1 double tmpndets=0.0; if(NSOMO == 0){ (*outncsfs) = 1; return; } - tmpndets = binom(NSOMO, nparcoupl); - (*outncsfs) = round(tmpndets - binom(NSOMO, nparcouplp1)); + tmpndets = binom((double)NSOMO, (double)nparcoupl); + (*outncsfs) = round(tmpndets - binom((double)NSOMO, (double)nparcouplp1)); } #include @@ -252,6 +253,27 @@ void generateAllBFs(int64_t Isomo, int64_t MS, Tree *bftree, int *NBF, int *NSOM buildTreeDriver(bftree, *NSOMO, MS, NBF); } +void ortho_qr_csf(double *overlapMatrix, int lda, double *orthoMatrix, int rows, int cols); + +// QR to orthogonalize CSFs does not work +//void gramSchmidt_qp(double *overlapMatrix, int rows, int cols, double *orthoMatrix){ +// int i,j; +// //for(j=0;j 0){ + ndets = (int)binom((double)NSOMO, (double)nalpha); + } + else if(NSOMO == 0){ + ndets = 1; + } + else printf("Something is wrong in calcMEdetpair\n"); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1389,16 +1431,6 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * } else{ - //int addr = -1; - //int inpdet[NSOMO]; - //inpdet[0] = 1; - //inpdet[1] = 1; - //inpdet[2] = 1; - //inpdet[3] = 0; - //inpdet[4] = 0; - //inpdet[5] = 0; - - //findAddofDetDriver(&dettree, NSOMO, inpdet, &addr); int detlist[ndets]; getDetlistDriver(&dettree, NSOMO, detlist); @@ -1411,6 +1443,9 @@ void convertBFtoDetBasis(int64_t Isomo, int MS, double **bftodetmatrixptr, int * generateAllBFs(Isomo, MS, &bftree, &NBF, &NSOMO); // Initialize transformation matrix + //printf("MS=%d NBF=%d ndets=%d NSOMO=%d\n",MS,NBF,ndets,NSOMO); + assert( NBF > 0); + assert( ndets > 0); (*bftodetmatrixptr) = malloc(NBF*ndets*sizeof(double)); (*rows) = NBF; (*cols) = ndets; @@ -1465,9 +1500,10 @@ void convertBFtoDetBasisWithArrayDims(int64_t Isomo, int MS, int rowsmax, int co getSetBits(Isomo, &NSOMO); int ndets = 0; int NBF = 0; - double dNSOMO = NSOMO*1.0; - double nalpha = (NSOMO + MS)/2.0; - ndets = (int)binom(dNSOMO, nalpha); + //double dNSOMO = NSOMO*1.0; + //double nalpha = (NSOMO + MS)/2.0; + int nalpha = (NSOMO + MS)/2; + ndets = (int)binom((double)NSOMO, (double)nalpha); Tree dettree = (Tree){ .rootNode = NULL, .NBF = -1 }; dettree.rootNode = malloc(sizeof(Node)); @@ -1551,6 +1587,7 @@ void getApqIJMatrixDims(int64_t Isomo, int64_t Jsomo, int64_t MS, int32_t *rowso getncsfs(NSOMOJ, MS, &NBFJ); (*rowsout) = NBFI; (*colsout) = NBFJ; + //exit(0); } void getApqIJMatrixDriver(int64_t Isomo, int64_t Jsomo, int orbp, int orbq, int64_t MS, int64_t NMO, double **CSFICSFJApqIJptr, int *rowsout, int *colsout){ @@ -1669,6 +1706,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetI, colsbftodetI; + //printf(" 1Calling convertBFtoDetBasis Isomo=%ld MS=%ld\n",Isomo,MS); convertBFtoDetBasis(Isomo, MS, &bftodetmatrixI, &rowsbftodetI, &colsbftodetI); // Fill matrix @@ -1676,8 +1714,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int colsI = 0; //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); - //getOverlapMatrix(Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + //printf("Isomo=%ld\n",Isomo); getOverlapMatrix_withDet(bftodetmatrixI, rowsbftodetI, colsbftodetI, Isomo, MS, &overlapMatrixI, &rowsI, &colsI, &NSOMO); + if(Isomo == 0){ + rowsI = 1; + colsI = 1; + } + + //printf("Isomo=%ld\n",Isomo); orthoMatrixI = malloc(rowsI*colsI*sizeof(double)); @@ -1689,6 +1733,7 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int rowsbftodetJ, colsbftodetJ; + //printf(" 2Calling convertBFtoDetBasis Jsomo=%ld MS=%ld\n",Jsomo,MS); convertBFtoDetBasis(Jsomo, MS, &bftodetmatrixJ, &rowsbftodetJ, &colsbftodetJ); int rowsJ = 0; @@ -1696,6 +1741,10 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in // Fill matrix //getOverlapMatrix(Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); getOverlapMatrix_withDet(bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, Jsomo, MS, &overlapMatrixJ, &rowsJ, &colsJ, &NSOMO); + if(Jsomo == 0){ + rowsJ = 1; + colsJ = 1; + } orthoMatrixJ = malloc(rowsJ*colsJ*sizeof(double)); @@ -1713,18 +1762,25 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in int transA=false; int transB=false; + //printf("1Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsbftodetI,colsbftodetI,rowsA,colsA); callBlasMatxMat(bftodetmatrixI, rowsbftodetI, colsbftodetI, ApqIJ, rowsA, colsA, bfIApqIJ, transA, transB); + //printf("done\n"); // now transform I in csf basis double *CSFIApqIJ = malloc(rowsI*colsA*sizeof(double)); transA = false; transB = false; + //printf("2Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsI,colsI,colsA); callBlasMatxMat(orthoMatrixI, rowsI, colsI, bfIApqIJ, colsI, colsA, CSFIApqIJ, transA, transB); // now transform J in BF basis double *CSFIbfJApqIJ = malloc(rowsI*rowsbftodetJ*sizeof(double)); transA = false; transB = true; + //printf("3Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,colsA,rowsbftodetJ,colsbftodetJ); callBlasMatxMat(CSFIApqIJ, rowsI, colsA, bftodetmatrixJ, rowsbftodetJ, colsbftodetJ, CSFIbfJApqIJ, transA, transB); // now transform J in CSF basis @@ -1735,13 +1791,14 @@ void getApqIJMatrixDriverArrayInp(int64_t Isomo, int64_t Jsomo, int32_t orbp, in double *tmpCSFICSFJApqIJ = malloc(rowsI*rowsJ*sizeof(double)); transA = false; transB = true; + //printf("4Calling blas\n"); + //printf("rowsA=%d colsA=%d\nrowB=%d colB=%d\n",rowsI,rowsbftodetJ,rowsJ,colsJ); callBlasMatxMat(CSFIbfJApqIJ, rowsI, rowsbftodetJ, orthoMatrixJ, rowsJ, colsJ, tmpCSFICSFJApqIJ, transA, transB); // Transfer to actual buffer in Fortran order for(int i = 0; i < rowsI; i++) for(int j = 0; j < rowsJ; j++) CSFICSFJApqIJ[j*rowsI + i] = tmpCSFICSFJApqIJ[i*rowsJ + j]; - // Garbage collection free(overlapMatrixI); free(overlapMatrixJ); diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f index f73362eb..cea7640c 100644 --- a/src/csf/configuration_CI_sigma_helpers.irp.f +++ b/src/csf/configuration_CI_sigma_helpers.irp.f @@ -1,3 +1,592 @@ +use bitmasks + + BEGIN_PROVIDER [ integer(bit_kind), alphasIcfg_list , (N_int,2,N_configuration,mo_num*(mo_num))] +&BEGIN_PROVIDER [ integer, NalphaIcfg_list, (N_configuration) ] + implicit none + !use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer :: idxI ! The id of the Ith CFG + integer(bit_kind) :: Icfg(N_int,2) + integer :: NalphaIcfg + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i,ii + integer :: j,jj + integer :: k,kk + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + integer*8 :: MS + + double precision :: t0, t1 + call wall_time(t0) + + MS = elec_alpha_num-elec_beta_num + + allocate(tableUniqueAlphas(mo_num,mo_num)) + NalphaIcfg_list = 0 + + do idxI = 1, N_configuration + + Icfg = psi_configuration(:,:,idxI) + + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do ii = 1,n_act_orb + i = list_act(ii) + if(IAND(Idomo,(IBSET(0_8,i-1))) .EQ. 0) then + if(IAND(Isomo,(IBSET(0_8,i-1))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end if + end do + + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) + endif + kend = idxI-1 + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Jsomo).lt.MS)then + cycle + endif + + ! Again, we don't have to search from 1 + ! we just use seniority to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + EXIT + endif + end do + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + if(POPCNT(Jsomo).ge.MS)then + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else + cycle + endif + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + !print *,i,j,"|",NalphaIcfg, Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Jsomo + alphasIcfg_list(1,2,idxI,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,1)) + Idomo = iand(reunion_of_act_virt_bitmask(1,1),Icfg(1,2)) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + do k = kstart, idxI-1 + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if (ndiffSOMO /= 2) cycle + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4)) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + if(POPCNT(Jsomo) .ge. NSOMOMin) then + NalphaIcfg += 1 + alphasIcfg_list(1,1,idxI,NalphaIcfg) = Icfg(1,1) + alphasIcfg_list(1,2,idxI,NalphaIcfg) = Icfg(1,2) + NalphaIcfg_list(idxI) = NalphaIcfg + endif + endif + + NalphaIcfg = 0 + enddo ! end loop idxI + call wall_time(t1) + print *, 'Preparation : ', t1 - t0 + +END_PROVIDER + + subroutine obtain_associated_alphaI(idxI, Icfg, alphasIcfg, NalphaIcfg) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for alphasI + ! Returns the associated alpha's for + ! the input configuration Icfg. + END_DOC + + integer,intent(in) :: idxI ! The id of the Ith CFG + integer(bit_kind),intent(in) :: Icfg(N_int,2) + integer,intent(out) :: NalphaIcfg + integer(bit_kind),intent(out) :: alphasIcfg(N_int,2,*) + logical,dimension(:,:),allocatable :: tableUniqueAlphas + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ndiffAll + integer :: i, ii + integer :: j, jj + integer :: k, kk + integer :: kstart + integer :: kend + integer :: Nsomo_I + integer :: hole + integer :: p + integer :: q + integer :: countalphas + logical :: pqAlreadyGenQ + logical :: pqExistsQ + logical :: ppExistsQ + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + !print*,"Input cfg" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Isomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,n_act_orb + i = list_act(ii) + if(POPCNT(IAND(Idomo,IBSET(0_8,i-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = i + holetype(nholes) = 2 + endif + end do + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do ii = 1,n_act_orb + i = list_act(ii) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 1 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = i + vmotype(nvmos) = 2 + end if + end do + + !print *,"Nvmo=",nvmos + !print *,listvmos + !print *,vmotype + + allocate(tableUniqueAlphas(mo_num,mo_num)) + tableUniqueAlphas = .FALSE. + + ! Now find the allowed (p,q) excitations + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + Nsomo_I = POPCNT(Isomo) + if(Nsomo_I .EQ. 0) then + kstart = 1 + else + kstart = cfg_seniority_index(max(NSOMOMin,Nsomo_I-2)) + endif + kend = idxI-1 + !print *,"Isomo" + !call debug_spindet(Isomo,1) + !call debug_spindet(Idomo,1) + + !print *,"Nholes=",nholes," Nvmos=",nvmos, " idxi=",idxI + !do i = 1,nholes + ! print *,i,"->",listholes(i) + !enddo + !do i = 1,nvmos + ! print *,i,"->",listvmos(i) + !enddo + + do i = 1,nholes + p = listholes(i) + do j = 1,nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-4))) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + kstart = cfg_seniority_index(Nsomo_I) + kend = idxI-1 + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + kstart = max(1,cfg_seniority_index(max(NSOMOMin,Nsomo_I-2))) + kend = idxI-1 + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! Again, we don't have to search from 1 + ! we just use seniortiy to find the + ! first index with NSOMO - 2 to NSOMO + 2 + ! this is what is done in kstart, kend + + pqAlreadyGenQ = .FALSE. + ! First check if it can be generated before + do k = kstart, kend + diffSOMO = IEOR(Jsomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + ndiffSOMO = POPCNT(diffSOMO) + if((ndiffSOMO .NE. 0) .AND. (ndiffSOMO .NE. 2)) cycle + diffDOMO = IEOR(Jdomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + !if(POPCNT(IEOR(diffSOMO,diffDOMO)) .LE. 1 .AND. ndiffDOMO .LT. 3) then + if((ndiffSOMO+ndiffDOMO) .EQ. 0) then + pqAlreadyGenQ = .TRUE. + ppExistsQ = .TRUE. + EXIT + endif + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + pqAlreadyGenQ = .TRUE. + !EXIT + !ppExistsQ = .TRUE. + !print *,i,k,ndiffSOMO,ndiffDOMO + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,1,k)),1) + !call debug_spindet(iand(reunion_of_act_virt_bitmask(1,1),psi_configuration(1,2,k)),1) + EXIT + endif + end do + + !print *,"(,",p,",",q,")",pqAlreadyGenQ + + if(pqAlreadyGenQ) cycle + + pqExistsQ = .FALSE. + ! now check if this exists in the selected list + !do k = idxI+1, N_configuration + ! diffSOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jsomo),psi_configuration(1,1,k)) + ! diffDOMO = IEOR(OR(reunion_of_act_virt_bitmask(1,1),Jdomo),psi_configuration(1,2,k)) + ! ndiffSOMO = POPCNT(diffSOMO) + ! ndiffDOMO = POPCNT(diffDOMO) + ! if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! pqExistsQ = .TRUE. + ! EXIT + ! endif + !end do + + if(.NOT. pqExistsQ) then + tableUniqueAlphas(p,q) = .TRUE. + !print *,p,q + !call debug_spindet(Jsomo,1) + !call debug_spindet(Jdomo,1) + endif + end do + end do + + !print *,tableUniqueAlphas(:,:) + + ! prune list of alphas + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + Jsomo = Icfg(1,1) + Jdomo = Icfg(1,2) + NalphaIcfg = 0 + do i = 1, nholes + p = listholes(i) + do j = 1, nvmos + q = listvmos(j) + if(p .EQ. q) cycle + if(tableUniqueAlphas(p,q)) then + if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 1) then + ! SOMO -> VMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = Idomo + else if(holetype(i) .EQ. 1 .AND. vmotype(j) .EQ. 2) then + ! SOMO -> SOMO + Jsomo = IBCLR(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBSET(Idomo,q-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 1) then + ! DOMO -> VMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBSET(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + else if(holetype(i) .EQ. 2 .AND. vmotype(j) .EQ. 2) then + ! DOMO -> SOMO + Jsomo = IBSET(Isomo,p-1) + Jsomo = IBCLR(Jsomo,q-1) + Jdomo = IBCLR(Idomo,p-1) + Jdomo = IBSET(Jdomo,q-1) + else + print*,"Something went wrong in obtain_associated_alphaI" + endif + + ! SOMO + NalphaIcfg += 1 + !print *,i,j,"|",NalphaIcfg + alphasIcfg(1,1,NalphaIcfg) = Jsomo + alphasIcfg(1,2,NalphaIcfg) = IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + !print *,"I = ",idxI, " Na=",NalphaIcfg," - ",Jsomo, IOR(Jdomo,ISHFT(1_8,n_core_orb)-1) + endif + end do + end do + + ! Check if this Icfg has been previously generated as a mono + ppExistsQ = .False. + Isomo = iand(act_bitmask(1,1),Icfg(1,1)) + Idomo = iand(act_bitmask(1,1),Icfg(1,2)) + do k = 1, idxI-1 + diffSOMO = IEOR(Isomo,iand(act_bitmask(1,1),psi_configuration(1,1,k))) + diffDOMO = IEOR(Idomo,iand(act_bitmask(1,1),psi_configuration(1,2,k))) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffSOMO = POPCNT(diffSOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + if((ndiffSOMO+ndiffDOMO+nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + ppExistsQ = .TRUE. + EXIT + endif + end do + ! Diagonal part (pp,qq) + if(nholes > 0 .AND. (.NOT. ppExistsQ))then + ! SOMO + NalphaIcfg += 1 + !print *,p,q,"|",holetype(i),vmotype(j),NalphaIcfg + !call debug_spindet(Idomo,1) + !call debug_spindet(Jdomo,1) + alphasIcfg(1,1,NalphaIcfg) = Icfg(1,1) + alphasIcfg(1,2,NalphaIcfg) = Icfg(1,2) + endif + + end subroutine + function getNSOMO(Icfg) result(NSOMO) implicit none integer(bit_kind),intent(in) :: Icfg(N_int,2) @@ -8,98 +597,3 @@ NSOMO += POPCNT(Icfg(i,1)) enddo end function getNSOMO - -subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) - implicit none - BEGIN_DOC - ! This function converts the orbital ids - ! in real space to those used in model space - ! in order to identify the matrices required - ! for the calculation of MEs. - ! - ! The type of excitations are ordered as follows: - ! Type 1 - SOMO -> SOMO - ! Type 2 - DOMO -> VMO - ! Type 3 - SOMO -> VMO - ! Type 4 - DOMO -> SOMO - END_DOC - integer(bit_kind),intent(in) :: Ialpha(N_int,2) - integer(bit_kind),intent(in) :: Jcfg(N_int,2) - integer,intent(in) :: p,q - integer,intent(in) :: extype - integer,intent(out) :: pmodel,qmodel - integer*8 :: Isomo - integer*8 :: Idomo - integer*8 :: Jsomo - integer*8 :: Jdomo - integer*8 :: mask - integer*8 :: Isomotmp - integer*8 :: Jsomotmp - integer :: pos0,pos0prev - - ! TODO Flag (print) when model space indices is > 64 - Isomo = Ialpha(1,1) - Idomo = Ialpha(1,2) - Jsomo = Jcfg(1,1) - Jdomo = Jcfg(1,2) - pos0prev = 0 - pmodel = p - qmodel = q - - if(p .EQ. q) then - pmodel = 1 - qmodel = 1 - else - !print *,"input pq=",p,q,"extype=",extype - !call debug_spindet(Isomo,1) - !call debug_spindet(Idomo,1) - !call debug_spindet(Jsomo,1) - !call debug_spindet(Jdomo,1) - select case(extype) - case (1) - ! SOMO -> SOMO - ! remove all domos - !print *,"type -> SOMO -> SOMO" - mask = ISHFT(1_8,p) - 1 - Isomotmp = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Isomotmp = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) - case (2) - ! DOMO -> VMO - ! remove all domos except one at p - !print *,"type -> DOMO -> VMO" - mask = ISHFT(1_8,p) - 1 - Jsomotmp = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomotmp = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) - case (3) - ! SOMO -> VMO - !print *,"type -> SOMO -> VMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Isomo = IAND(Isomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - mask = ISHFT(1_8,q) - 1 - Jsomo = IAND(Jsomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - case (4) - ! DOMO -> SOMO - ! remove all domos except one at p - !print *,"type -> DOMO -> SOMO" - !Isomo = IEOR(Isomo,Jsomo) - mask = ISHFT(1_8,p) - 1 - Jsomo = IAND(Jsomo,mask) - pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) - mask = ISHFT(1_8,q) - 1 - Isomo = IAND(Isomo,mask) - qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) - case default - print *,"something is wrong in convertOrbIdsToModelSpaceIds" - end select - endif - !print *,p,q,"model ids=",pmodel,qmodel -end subroutine convertOrbIdsToModelSpaceIds diff --git a/src/csf/configurations.irp.f b/src/csf/configurations.irp.f index 8e2a513c..c11a49a4 100644 --- a/src/csf/configurations.irp.f +++ b/src/csf/configurations.irp.f @@ -458,8 +458,9 @@ end END_PROVIDER - BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num) ] + BEGIN_PROVIDER [ integer, cfg_seniority_index, (0:elec_num+2) ] &BEGIN_PROVIDER [ integer, cfg_nsomo_max ] +&BEGIN_PROVIDER [ integer, cfg_nsomo_min ] implicit none BEGIN_DOC ! Returns the index in psi_configuration of the first cfg with @@ -467,9 +468,10 @@ END_PROVIDER ! ! cfg_nsomo_max : Max number of SOMO in the current wave function END_DOC - integer :: i, k, s, sold + integer :: i, k, s, sold, soldmin cfg_seniority_index(:) = -1 sold = -1 + soldmin = 2000 cfg_nsomo_max = 0 do i=1,N_configuration s = 0 @@ -482,6 +484,10 @@ END_PROVIDER cfg_seniority_index(s) = i cfg_nsomo_max = s endif + if (soldmin .GT. s ) then + soldmin = s + cfg_nsomo_min = s + endif enddo END_PROVIDER @@ -743,41 +749,112 @@ BEGIN_PROVIDER [ integer(bit_kind), dominant_dets_of_cfgs, (N_int,2,N_dominant_d enddo END_PROVIDER -subroutine binary_search_cfg(cfgInp,addcfg) +subroutine binary_search_cfg(cfgInp,addcfg,bit_tmp) use bitmasks implicit none BEGIN_DOC ! Documentation for binary_search - ! - ! Does a binary search to find + ! + ! Does a binary search to find ! the address of a configuration in a list of ! configurations. END_DOC integer(bit_kind), intent(in) :: cfgInp(N_int,2) integer , intent(out) :: addcfg - integer :: i,j,k,r,l - integer*8 :: key, key2 - logical :: found - !integer*8, allocatable :: bit_tmp(:) - !integer*8, external :: configuration_search_key + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) - !allocate(bit_tmp(0:N_configuration)) - !bit_tmp(0) = 0 - do i=1,N_configuration - !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) - found = .True. - do k=1,N_int - found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & - .and. (psi_configuration(k,2,i) == cfgInp(k,2)) - enddo - if (found) then - addcfg = i - exit + logical :: found + integer :: l, r, j, k + integer*8 :: key + + integer*8, external :: configuration_search_key + + key = configuration_search_key(cfgInp,N_int) + + ! Binary search + l = 0 + r = N_configuration+1 +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF + do while (j>=1) + j = j+l + if (bit_tmp(j) == key) then + ! Find 1st element which matches the key + if (j > 1) then + do while (j>1 .and. bit_tmp(j-1) == key) + j = j-1 + enddo + endif + ! Find correct element matching the key + do while (bit_tmp(j) == key) + found = .True. + do k=1,N_int + found = found .and. (psi_configuration(k,1,j) == cfgInp(k,1))& + .and. (psi_configuration(k,2,j) == cfgInp(k,2)) + enddo + if (found) then + addcfg = j + return + endif + j = j+1 + enddo + addcfg = -1 + return + else if (bit_tmp(j) > key) then + r = j + else + l = j endif +IRP_IF WITHOUT_SHIFTRL + j = ishft(r-l,-1) +IRP_ELSE + j = shiftr(r-l,1) +IRP_ENDIF enddo + addcfg = -1 + return + end subroutine +!subroutine binary_search_cfg(cfgInp,addcfg) +! use bitmasks +! implicit none +! BEGIN_DOC +! ! Documentation for binary_search +! ! +! ! Does a binary search to find +! ! the address of a configuration in a list of +! ! configurations. +! END_DOC +! integer(bit_kind), intent(in) :: cfgInp(N_int,2) +! integer , intent(out) :: addcfg +! integer :: i,j,k,r,l +! integer*8 :: key, key2 +! logical :: found +! !integer*8, allocatable :: bit_tmp(:) +! !integer*8, external :: configuration_search_key +! +! !allocate(bit_tmp(0:N_configuration)) +! !bit_tmp(0) = 0 +! do i=1,N_configuration +! !bit_tmp(i) = configuration_search_key(psi_configuration(1,1,i),N_int) +! found = .True. +! do k=1,N_int +! found = found .and. (psi_configuration(k,1,i) == cfgInp(k,1)) & +! .and. (psi_configuration(k,2,i) == cfgInp(k,2)) +! enddo +! if (found) then +! addcfg = i +! exit +! endif +! enddo +! +!end subroutine +! BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det, (2,N_configuration) ] &BEGIN_PROVIDER [ integer, psi_configuration_to_psi_det_data, (N_det) ] diff --git a/src/csf/conversion.irp.f b/src/csf/conversion.irp.f index fecc6123..bdd8b327 100644 --- a/src/csf/conversion.irp.f +++ b/src/csf/conversion.irp.f @@ -1,3 +1,16 @@ +BEGIN_PROVIDER [ double precision, psi_csf_coef, (N_csf, N_states) ] + implicit none + BEGIN_DOC + ! Wafe function in CSF basis + END_DOC + + double precision, allocatable :: buffer(:,:) + allocate ( buffer(N_det, N_states) ) + buffer(1:N_det, 1:N_states) = psi_coef(1:N_det, 1:N_states) + call convertWFfromDETtoCSF(N_states, buffer, psi_csf_coef) +END_PROVIDER + + subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) use cfunctions use bitmasks @@ -12,7 +25,7 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) double precision, intent(out) :: psi_coef_cfg_out(n_CSF,N_st) integer*8 :: Isomo, Idomo, mask integer(bit_kind) :: Ialpha(N_int) ,Ibeta(N_int) - integer :: rows, cols, i, j, k + integer :: rows, cols, i, j, k, salpha integer :: startdet, enddet integer :: ndetI integer :: getNSOMO @@ -26,6 +39,8 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) integer s, bfIcfg integer countcsf + integer MS + MS = elec_alpha_num-elec_beta_num countcsf = 0 phasedet = 1.0d0 do i = 1,N_configuration @@ -44,12 +59,19 @@ subroutine convertWFfromDETtoCSF(N_st,psi_coef_det_in, psi_coef_cfg_out) enddo enddo - s = 0 + s = 0 ! s == total number of SOMOs do k=1,N_int if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + if(iand(s,1) .EQ. 0) then + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + else + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + endif ! perhaps blocking with CFGs of same seniority ! can be more efficient @@ -80,7 +102,7 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,intent(in) :: psi_coef_cfg_in(n_CSF,N_st) double precision,intent(out) :: psi_coef_det(N_det,N_st) double precision :: tmp_psi_coef_det(maxDetDimPerBF,N_st) - integer :: s, bfIcfg + integer :: s, bfIcfg, salpha integer :: countcsf integer(bit_kind) :: Ialpha(N_int), Ibeta(N_int) integer :: rows, cols, i, j, k @@ -91,6 +113,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) double precision,allocatable :: tempCoeff (:,:) double precision :: phasedet integer :: idx + integer MS + MS = elec_alpha_num-elec_beta_num countcsf = 0 @@ -104,7 +128,8 @@ subroutine convertWFfromCSFtoDET(N_st,psi_coef_cfg_in, psi_coef_det) if (psi_configuration(k,1,i) == 0_bit_kind) cycle s = s + popcnt(psi_configuration(k,1,i)) enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + salpha = (s + MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) allocate(tempCoeff(bfIcfg,N_st)) diff --git a/src/csf/create_excitations.irp.f b/src/csf/create_excitations.irp.f index c1560148..1c59d579 100644 --- a/src/csf/create_excitations.irp.f +++ b/src/csf/create_excitations.irp.f @@ -226,7 +226,7 @@ subroutine generate_all_singles_cfg(cfg,singles,n_singles,Nint) enddo end -subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) +subroutine generate_all_singles_cfg_with_type(bit_tmp,cfgInp,singles,idxs_singles,pq_singles,ex_type_singles,n_singles,Nint) implicit none use bitmasks BEGIN_DOC @@ -238,6 +238,7 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin ! ex_type_singles : on output contains type of excitations : ! END_DOC + integer*8, intent(in) :: bit_tmp(0:N_configuration+1) integer, intent(in) :: Nint integer, intent(inout) :: n_singles integer, intent(out) :: idxs_singles(*) @@ -248,20 +249,26 @@ subroutine generate_all_singles_cfg_with_type(cfgInp,singles,idxs_singles,pq_sin integer(bit_kind) :: Jdet(Nint,2) integer :: i,k, n_singles_ma, i_hole, i_particle, ex_type, addcfg + integer :: ii,kk integer(bit_kind) :: single(Nint,2) logical :: i_ok + n_singles = 0 !TODO !Make list of Somo and Domo for holes !Make list of Unocc and Somo for particles - do i_hole = 1+n_core_orb, n_core_orb + n_act_orb - do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + !do i_hole = 1+n_core_orb, n_core_orb + n_act_orb + do ii = 1, n_act_orb + i_hole = list_act(ii) + !do i_particle = 1+n_core_orb, n_core_orb + n_act_orb + do kk = 1, n_act_orb + i_particle = list_act(kk) if(i_hole .EQ. i_particle) cycle addcfg = -1 call do_single_excitation_cfg_with_type(cfgInp,single,i_hole,i_particle,ex_type,i_ok) if (i_ok) then - call binary_search_cfg(single,addcfg) + call binary_search_cfg(single,addcfg,bit_tmp) if(addcfg .EQ. -1) cycle n_singles = n_singles + 1 do k=1,Nint diff --git a/src/csf/obtain_I_foralpha.irp.f b/src/csf/obtain_I_foralpha.irp.f new file mode 100644 index 00000000..7d7ae09b --- /dev/null +++ b/src/csf/obtain_I_foralpha.irp.f @@ -0,0 +1,397 @@ +subroutine obtain_connected_J_givenI(idxI, givenI, connectedI, idxs_connectedI, nconnectedI,ntotalconnectedI) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! givenI by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: givenI(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: ntotalconnectedI + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_I + + ! + ! 2 2 1 1 0 0 : 1 1 0 0 0 0 + ! 0 0 1 1 0 0 + ! + ! 2 1 1 1 1 0 : 1 0 0 0 0 0 + ! 0 1 1 1 1 0 + !xorS 0 1 0 0 1 0 : 2 + !xorD 0 1 0 0 0 0 : 1 + !xorSD 0 0 0 0 1 0 : 1 + ! ----- + ! 4 + ! 1 1 1 1 1 1 : 0 0 0 0 0 0 + ! 1 1 1 1 1 1 + ! 1 1 0 0 1 1 : 4 + ! 1 1 0 0 0 0 : 2 + ! 0 0 0 0 1 1 : 2 + ! ----- + ! 8 + ! + + nconnectedI = 0 + ntotalconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Nsomo_I = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_I+6,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + !print *,"Start and End = ",idxI, end_index + + + p = 0 + q = 0 + do i=idxI,end_index + !if(.True.) then + ! nconnectedI += 1 + ! connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + ! idxs_connectedI(nconnectedI)=i + ! cycle + !endif + Isomo = givenI(1,1) + Idomo = givenI(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + !------- + ! MONO | + !------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 8) .AND. ndiffSOMO .EQ. 4) then + !---------------------------- + ! DOMO -> VMO + DOMO -> VMO | + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 6) .AND. ndiffSOMO .EQ. 2) then + !---------------------------- + ! DOUBLE + !---------------------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 2) .AND. ndiffSOMO .EQ. 3) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 0) then + !----------------- + ! DOUBLE + !----------------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=i + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)) + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + !-------- + ! I = I | + !-------- + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)= i + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + ntotalconnectedI += max(1,(psi_config_data(i,2)-psi_config_data(i,1)+1)*nholes) + endif + end do + +end subroutine obtain_connected_J_givenI + +subroutine obtain_connected_I_foralpha(idxI, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: iii,ii,i,j,k,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + integer*8 :: MS + MS = elec_alpha_num-elec_beta_num + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + end_index = N_configuration + + + p = 0 + q = 0 + if (N_int > 1) stop 'obtain_connected_i_foralpha : N_int > 1' + do i=idxI,end_index + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = psi_configuration(1,1,i) + Jdomo = psi_configuration(1,2,i) + ! Check for Minimal alpha electrons (MS) + if(POPCNT(Isomo).lt.MS)then + cycle + endif + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + !if(idxI.eq.1)then + ! print *," \t idxI=",i," diffS=",ndiffSOMO," popJs=", POPCNT(Jsomo)," popIs=",POPCNT(Isomo) + !endif + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Isomo,IJsomo) , IAND(Isomo,IJsomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IEOR(Idomo,Jdomo),IEOR(Idomo,Jdomo) -1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(Isomo) + 1 +!IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +!IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(Isomo) + 1 +!IRP_ENDIF + ! Check for Minimal alpha electrons (MS) + !if(POPCNT(Isomo).lt.MS)then + ! cycle + !endif + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +!IRP_IF WITHOUT_TRAILZ +! p = (popcnt(ieor( IAND(Jsomo,IJsomo), IAND(Jsomo,IJsomo)-1))-1) + 1 +!IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +!IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +!IRP_IF WITHOUT_TRAILZ +! q = (popcnt(ieor( IJsomo , IJsomo -1))-1) + 1 +!IRP_ELSE + q = TRAILZ(IJsomo) + 1 +!IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + do k=1,N_int + connectedI(k,1,nconnectedI) = psi_configuration(k,1,i) + connectedI(k,2,nconnectedI) = psi_configuration(k,2,i) + enddo + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do iii = 1,n_act_orb + ii = list_act(iii) + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = psi_configuration(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha diff --git a/src/csf/sigma_vector.irp.f b/src/csf/sigma_vector.irp.f index 85ed5f84..833fa7b0 100644 --- a/src/csf/sigma_vector.irp.f +++ b/src/csf/sigma_vector.irp.f @@ -1,10 +1,13 @@ real*8 function logabsgamma(x) implicit none real*8, intent(in) :: x - logabsgamma = log(abs(gamma(x))) + logabsgamma = 1.d32 ! Avoid floating point exception + if (x>0.d0) then + logabsgamma = log(abs(gamma(x))) + endif end function logabsgamma - BEGIN_PROVIDER [ integer, NSOMOMax] + &BEGIN_PROVIDER [ integer, NSOMOMin] &BEGIN_PROVIDER [ integer, NCSFMax] &BEGIN_PROVIDER [ integer*8, NMO] &BEGIN_PROVIDER [ integer, NBFMax] @@ -16,11 +19,19 @@ ! The maximum number of SOMOs for the current calculation. ! required for the calculation of prototype arrays. END_DOC + integer MS, ialpha + MS = elec_alpha_num-elec_beta_num NSOMOMax = min(elec_num, cfg_nsomo_max + 2) + if(AND(cfg_nsomo_min , 1) .eq. 0)then + NSOMOMin = max(0,cfg_nsomo_min-2) + else + NSOMOMin = max(1,cfg_nsomo_min-2) + endif ! Note that here we need NSOMOMax + 2 sizes - NCSFMax = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)-binom(NSOMOMax,((NSOMOMax+1)/2)+1)))) ! TODO: NCSFs for MS=0 + ialpha = (NSOMOMax + MS)/2 + NCSFMax = max(1,nint((binom(NSOMOMax,ialpha)-binom(NSOMOMax,ialpha+1)))) ! TODO: NCSFs for MS=0 (CHECK) NBFMax = NCSFMax - maxDetDimPerBF = max(1,nint((binom(NSOMOMax,(NSOMOMax+1)/2)))) + maxDetDimPerBF = max(1,nint((binom(NSOMOMax,ialpha)))) NMO = n_act_orb integer i,j,k,l integer startdet,enddet @@ -29,63 +40,113 @@ integer dimcsfpercfg integer detDimperBF real*8 :: coeff, binom1, binom2 - integer MS integer ncfgpersomo real*8, external :: logabsgamma detDimperBF = 0 - MS = elec_alpha_num-elec_beta_num ! number of cfgs = number of dets for 0 somos + n_CSF = cfg_seniority_index(NSOMOMin)-1 + ncfgprev = cfg_seniority_index(NSOMOMin) + !do i = 0-iand(MS,1)+2, NSOMOMax,2 + !!print *," i=",0," dimcsf=",1," ncfg=",ncfgprev, " senor=",cfg_seniority_index(0) + !!do i = NSOMOMin+2, NSOMOMax,2 + !! if(cfg_seniority_index(i) .EQ. -1)then + !! ncfgpersomo = N_configuration + 1 + !! else + !! ncfgpersomo = cfg_seniority_index(i) + !! endif + !!ncfg = ncfgpersomo - ncfgprev + !!!detDimperBF = max(1,nint((binom(i,(i+1)/2)))) + !!!dimcsfpercfg = max(1,nint((binom(i-2,(i-2+1)/2)-binom(i-2,((i-2+1)/2)+1)))) + !!n_CSF += ncfg * dimcsfpercfg + !!!if(cfg_seniority_index(i+2) == -1) EXIT + !!!if(detDimperBF > maxDetDimPerBF) maxDetDimPerBF = detDimperBF + !!ncfgprev = cfg_seniority_index(i) + !!print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) + !!enddo + !!print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration n_CSF = 0 - ncfgprev = cfg_seniority_index(0) - ncfgpersomo = ncfgprev - do i = iand(MS,1), NSOMOMax-2,2 - if(cfg_seniority_index(i) .EQ. -1) then - cycle - endif - if(cfg_seniority_index(i+2) .EQ. -1) then - ncfgpersomo = N_configuration + 1 + !ncfgprev = cfg_seniority_index(0) + !ncfgpersomo = ncfgprev + !do i = iand(MS,1), NSOMOMax-2,2 + ! if(cfg_seniority_index(i) .EQ. -1) then + ! cycle + ! endif + ! if(cfg_seniority_index(i+2) .EQ. -1) then + ! ncfgpersomo = N_configuration + 1 + ! else + ! if(cfg_seniority_index(i+2) > ncfgpersomo) then + ! ncfgpersomo = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgpersomo) + ! k = k + 2 + ! ncfgpersomo = cfg_seniority_index(i+2+k) + ! enddo + ! endif + ! endif + ! ncfg = ncfgpersomo - ncfgprev + ! if(i .EQ. 0 .OR. i .EQ. 1) then + ! dimcsfpercfg = 1 + ! elseif( i .EQ. 3) then + ! dimcsfpercfg = 2 + ! else + ! if(iand(MS,1) .EQ. 0) then + ! dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) + ! else + ! dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) + ! endif + ! endif + ! n_CSF += ncfg * dimcsfpercfg + ! print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " senor=",cfg_seniority_index(i) + ! if(cfg_seniority_index(i+2) > ncfgprev) then + ! ncfgprev = cfg_seniority_index(i+2) + ! else + ! k = 0 + ! do while(cfg_seniority_index(i+2+k) < ncfgprev) + ! k = k + 2 + ! ncfgprev = cfg_seniority_index(i+2+k) + ! enddo + ! endif + !enddo + n_CSF = 0 + !print *," -9(((((((((((((( NSOMOMin=",NSOMOMin + ncfgprev = cfg_seniority_index(NSOMOMin) ! can be -1 + if(ncfgprev.eq.-1)then + ncfgprev=1 + endif + do i=NSOMOMin,NSOMOMax+2,2 + !k=0 + !do while((cfg_seniority_index(i+2+k) .eq. -1) .and. (k.le.NSOMOMax)) + ! k=k+2 + !end do + if(cfg_seniority_index(i).eq.-1)cycle + if(cfg_seniority_index(i+2).eq.-1)then + ncfg = N_configuration - ncfgprev + 1 + if(ncfg .eq. 0)then + ncfg=1 + endif else - if(cfg_seniority_index(i+2) > ncfgpersomo) then - ncfgpersomo = cfg_seniority_index(i+2) + ncfg = cfg_seniority_index(i+2) - ncfgprev + endif + if(i .EQ. 0 .OR. i .EQ. 1) then + dimcsfpercfg = 1 + elseif( i .EQ. 3) then + dimcsfpercfg = 2 + else + if(iand(MS,1) .EQ. 0) then + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgpersomo) - k = k + 2 - ncfgpersomo = cfg_seniority_index(i+2+k) - enddo + ialpha = (i + MS)/2 + dimcsfpercfg = max(1,nint((binom(i,ialpha)-binom(i,ialpha+1)))) endif endif - ncfg = ncfgpersomo - ncfgprev - if(iand(MS,1) .EQ. 0) then - !dimcsfpercfg = max(1,nint((binom(i,i/2)-binom(i,i/2+1)))) - binom1 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*((i/2)+1)) & - - logabsgamma(1.0d0*(i-((i/2))+1))); - binom2 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*(((i/2)+1)+1)) & - - logabsgamma(1.0d0*(i-((i/2)+1)+1))); - dimcsfpercfg = max(1,nint(binom1 - binom2)) - else - !dimcsfpercfg = max(1,nint((binom(i,(i+1)/2)-binom(i,(i+3)/2)))) - binom1 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*(((i+1)/2)+1)) & - - logabsgamma(1.0d0*(i-(((i+1)/2))+1))); - binom2 = dexp(logabsgamma(1.0d0*(i+1)) & - - logabsgamma(1.0d0*((((i+3)/2)+1)+1)) & - - logabsgamma(1.0d0*(i-(((i+3)/2)+1)+1))); - dimcsfpercfg = max(1,nint(binom1 - binom2)) - endif - n_CSF += ncfg * dimcsfpercfg - if(cfg_seniority_index(i+2) > ncfgprev) then - ncfgprev = cfg_seniority_index(i+2) - else - k = 0 - do while(cfg_seniority_index(i+2+k) < ncfgprev) - k = k + 2 - ncfgprev = cfg_seniority_index(i+2+k) - enddo - endif - enddo + n_CSF += ncfg*dimcsfpercfg + !print *," i=",i," dimcsf=",dimcsfpercfg," ncfg=",ncfg, " ncfgprev=",ncfgprev, " senor=",cfg_seniority_index(i) + ncfgprev = cfg_seniority_index(i+2) + end do + !print *," ^^^^^ N_CSF = ",n_CSF," N_CFG=",N_configuration + END_PROVIDER @@ -106,7 +167,7 @@ subroutine get_phase_qp_to_cfg(Ialpha, Ibeta, phaseout) real*8,intent(out) :: phaseout integer(bit_kind) :: mask, deta(N_int), detb(N_int) integer :: nbetas - integer :: k + integer :: count, k ! Initialize deta and detb deta = Ialpha @@ -146,7 +207,117 @@ end subroutine get_phase_qp_to_cfg - BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,2)] + BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] + &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF,1)] + &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] + &BEGIN_PROVIDER [ integer, psi_csf_to_config_data, (n_CSF)] + use cfunctions + implicit none + BEGIN_DOC + ! Documentation for DetToCSFTransformationMatrix + ! Provides the matrix of transformatons for the + ! conversion between determinant to CSF basis (in BFs) + END_DOC + integer*8 :: Isomo, Idomo + integer(bit_kind) :: Ialpha(N_int),Ibeta(N_int) + integer :: rows, cols, i, j, k + integer :: startdet, enddet, idx + integer*8 MS, salpha + integer ndetI + integer :: getNSOMO + real*8,dimension(:,:),allocatable :: tempBuffer + real*8,dimension(:),allocatable :: tempCoeff + real*8 :: norm_det1, phasedet + + integer :: nt + + + norm_det1 = 0.d0 + MS = elec_alpha_num - elec_beta_num + ! initialization + psi_coef_config = 0.d0 + DetToCSFTransformationMatrix(0,:,:) = 1.d0 + do i = 2-iand(MS,1_8), NSOMOMax,2 + Isomo = IBSET(0_8, i) - 1_8 + ! rows = Ncsfs + ! cols = Ndets + salpha = (i+MS)/2 + bfIcfg = max(1,nint((binom(i,salpha)-binom(i,salpha+1)))) + ndetI = max(1,nint((binom(i,salpha)))) + !bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) + !ndetI = max(1,nint((binom(i,(i+1)/2)))) + + allocate(tempBuffer(bfIcfg,ndetI)) + call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) + DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) + deallocate(tempBuffer) + enddo + + integer s, bfIcfg + integer countcsf + countcsf = 0 + integer countdet + countdet = 0 + integer istate + istate = 1 + psi_csf_to_config_data(1) = 1 + phasedet = 1.0d0 + call omp_set_max_active_levels(1) + !$OMP PARALLEL + !$OMP MASTER + do i = 1,N_configuration + startdet = psi_configuration_to_psi_det(1,i) + enddet = psi_configuration_to_psi_det(2,i) + ndetI = enddet-startdet+1 + + allocate(tempCoeff(ndetI)) + countdet = 1 + do j = startdet, enddet + idx = psi_configuration_to_psi_det_data(j) + Ialpha(:) = psi_det(:,1,idx) + Ibeta(:) = psi_det(:,2,idx) + call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) + tempCoeff(countdet) = psi_coef(idx, istate)*phasedet + norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) + countdet += 1 + enddo + + !print *,"dimcoef=",bfIcfg,norm_det1 + !call printMatrix(tempCoeff,ndetI,1) + + s = 0 + do k=1,N_int + if (psi_configuration(k,1,i) == 0_bit_kind) cycle + s = s + popcnt(psi_configuration(k,1,i)) + enddo + salpha = (s+MS)/2 + bfIcfg = max(1,nint((binom(s,salpha)-binom(s,salpha+1)))) + !bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) + + ! perhaps blocking with CFGs of same seniority + ! can be more efficient + allocate(tempBuffer(bfIcfg,ndetI)) + tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) + + call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1,1), size(psi_coef_config,1)) + !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) + + deallocate(tempCoeff) + deallocate(tempBuffer) + psi_config_data(i,1) = countcsf + 1 + do k=1,bfIcfg + psi_csf_to_config_data(countcsf+k) = i + enddo + countcsf += bfIcfg + psi_config_data(i,2) = countcsf + enddo + !$OMP END MASTER + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + END_PROVIDER + + BEGIN_PROVIDER [ integer, AIJpqMatrixDimsList, (NSOMOMin:NSOMOMax,4,NSOMOMax+1,NSOMOMax+1,2)] &BEGIN_PROVIDER [ integer, rowsmax] &BEGIN_PROVIDER [ integer, colsmax] use cfunctions @@ -165,14 +336,18 @@ end subroutine get_phase_qp_to_cfg cols = -1 integer*8 MS MS = elec_alpha_num-elec_beta_num - integer nsomomin nsomomin = elec_alpha_num-elec_beta_num rowsmax = 0 colsmax = 0 + !print *,"NSOMOMax = ",NSOMOMax + !print *,"NSOMOMin = ",NSOMOMin !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) ! Type ! 1. SOMO -> SOMO - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing SOMO->SOMO" + AIJpqMatrixDimsList(NSOMOMin,1,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,1,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i-2,i-2, 2 Jsomo = ISHFT(1_8,j)-1 @@ -199,6 +374,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, "SOMO->SOMO \t",i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -206,15 +382,18 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,1,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,1,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,1,k,l,2) = cols end do end do end do end do ! Type ! 2. DOMO -> VMO - do i = 0+iand(nsomomin,1), NSOMOMax, 2 + !print *,"Doing DOMO->VMO" + AIJpqMatrixDimsList(NSOMOMin,2,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,2,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -247,6 +426,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -254,8 +434,8 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,1) = rows - AIJpqMatrixDimsList(nsomoi,nsomoj,2,k,l,2) = cols + AIJpqMatrixDimsList(nsomoi,2,k,l,1) = rows + AIJpqMatrixDimsList(nsomoi,2,k,l,2) = cols end do end do end do @@ -263,15 +443,17 @@ end subroutine get_phase_qp_to_cfg ! Type ! 3. SOMO -> VMO !print *,"Doing SOMO->VMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,3,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -286,6 +468,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -293,25 +476,27 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,3,k,l,1) = rows - AIJpqMatrixDimsList(i,j,3,k,l,2) = cols + AIJpqMatrixDimsList(i,3,k,l,1) = rows + AIJpqMatrixDimsList(i,3,k,l,2) = cols end do end do end do end do ! Type - ! 4. DOMO -> VMO + ! 4. DOMO -> SOMO !print *,"Doing DOMO->SOMO" - do i = 2-iand(nsomomin,1), NSOMOMax, 2 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,1) = 1 + AIJpqMatrixDimsList(NSOMOMin,4,1,1,2) = 1 + do i = NSOMOMin, NSOMOMax, 2 do j = i,i, 2 if(j .GT. NSOMOMax .OR. j .LE. 0) then cycle end if - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 - Isomo = IBCLR(Isomo,k+1-1) + Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 Jsomo = IBCLR(Jsomo,l-1) else @@ -323,6 +508,7 @@ end subroutine get_phase_qp_to_cfg MS, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols if(rowsmax .LT. rows) then rowsmax = rows end if @@ -330,15 +516,16 @@ end subroutine get_phase_qp_to_cfg colsmax = cols end if ! i -> j - AIJpqMatrixDimsList(i,j,4,k,l,1) = rows - AIJpqMatrixDimsList(i,j,4,k,l,2) = cols + AIJpqMatrixDimsList(i,4,k,l,1) = rows + AIJpqMatrixDimsList(i,4,k,l,2) = cols end do end do end do end do + !print *,"Rowsmax=",rowsmax," Colsmax=",colsmax END_PROVIDER - BEGIN_PROVIDER [ real*8, AIJpqContainer, (0:NSOMOMax,0:NSOMOMax,4,NSOMOMax,NSOMOMax,NBFMax,NBFMax)] + BEGIN_PROVIDER [ real*8, AIJpqContainer, (NBFMax,NBFmax,NSOMOMax+1,NSOMOMax+1,4,NSOMOMin:NSOMOMax)] use cfunctions implicit none BEGIN_DOC @@ -368,70 +555,79 @@ end subroutine get_phase_qp_to_cfg rows = -1 cols = -1 integer*8 MS - MS = 0 - touch AIJpqMatrixDimsList + MS = elec_alpha_num-elec_beta_num real*8,dimension(:,:),allocatable :: meMatrix integer maxdim - !maxdim = max(rowsmax,colsmax) - ! allocate matrix - !allocate(AIJpqMatrixDimsList(NSOMOMax,NSOMOMax,4,NSOMOMax,NSOMOMax,2)) + ! Type ! 1. SOMO -> SOMO - do i = 2, NSOMOMax, 2 + AIJpqContainer = 0.d0 + AIJpqContainer(1,1,1,1,1,NSOMOMin) = 1.0d0 + integer :: rows_old, cols_old + rows_old = -1 + cols_old = -1 + allocate(meMatrix(1,1)) + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 - do j = i-2,i-2, 2 - if(j .GT. NSOMOMax .OR. j .LT. 0) cycle - do k = 1,i - do l = 1,i + j=i-2 + if(j .GT. NSOMOMax .OR. j .LT. 0) cycle + nsomoi = i + do k = 1,i + orbp = k + do l = 1,i - ! Define Jsomo - if(k .NE. l) then - Jsomo = IBCLR(Isomo, k-1) - Jsomo = IBCLR(Jsomo, l-1) - nsomoi = i - nsomoj = j - else - Isomo = ISHFT(1_8,i)-1 - Jsomo = ISHFT(1_8,i)-1 - nsomoi = i - nsomoj = i - endif + ! Define Jsomo + if(k .NE. l) then + Jsomo = IBCLR(Isomo, k-1) + Jsomo = IBCLR(Jsomo, l-1) + nsomoj = j + else + Isomo = ISHFT(1_8,i)-1 + Jsomo = ISHFT(1_8,i)-1 + nsomoj = i + endif - AIJpqContainer(nsomoi,nsomoj,1,k,l,:,:) = 0.0d0 - call getApqIJMatrixDims(Isomo, & - Jsomo, & - MS, & - rows, & - cols) + call getApqIJMatrixDims(Isomo, & + Jsomo, & + MS, & + rows, & + cols) - orbp = k - orbq = l - allocate(meMatrix(rows,cols)) - meMatrix = 0.0d0 - ! fill matrix - call getApqIJMatrixDriver(Isomo, & - Jsomo, & - orbp, & - orbq, & - MS, & - NMO, & - meMatrix, & - rows, & - cols) - ! i -> j - do ri = 1,rows - do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,1,k,l,ri,ci) = meMatrix(ri, ci) - end do + orbq = l + if ((rows /= rows_old).or.(cols /= cols_old)) then + deallocate(meMatrix) + allocate(meMatrix(rows,cols)) + rows_old = rows + cols_old = cols + endif + meMatrix = 0.0d0 + ! fill matrix + call getApqIJMatrixDriver(Isomo, & + Jsomo, & + orbp, & + orbq, & + MS, & + NMO, & + meMatrix, & + rows, & + cols) + ! i -> j + do ri = 1,rows + do ci = 1,cols + AIJpqContainer(ri,ci,k,l,1,nsomoi) = meMatrix(ri, ci) end do - deallocate(meMatrix) end do end do end do end do + deallocate(meMatrix) + ! Type ! 2. DOMO -> VMO - do i = 0, NSOMOMax, 2 + !print *,"Doing DOMO -> VMO" + !AIJpqContainer(NSOMOMin,2,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,2,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 tmpsomo = ISHFT(1_8,i+2)-1 do j = i+2,i+2, 2 @@ -456,7 +652,12 @@ end subroutine get_phase_qp_to_cfg nsomoj = j endif - AIJpqContainer(nsomoi,nsomoj,2,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(nsomoi,2,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,2,nsomoi) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -477,10 +678,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax + !call printMatrix(meMatrix,rows,cols) ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(nsomoi,nsomoj,2,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(nsomoi,2,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,2,nsomoi) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -490,13 +694,16 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 3. SOMO -> VMO - do i = 2, NSOMOMax, 2 + !print *,"Doing SOMO -> VMO" + !AIJpqContainer(NSOMOMin,3,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1:2,1:2,3,NSOMOMin) = 1.0d0 + do i = NSOMOMin, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,j)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,l-1) @@ -507,7 +714,12 @@ end subroutine get_phase_qp_to_cfg Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,3,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(i,3,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,3,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -528,10 +740,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,3,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,3,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,3,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -541,24 +756,35 @@ end subroutine get_phase_qp_to_cfg end do ! Type ! 4. DOMO -> SOMO - do i = 2, NSOMOMax, 2 + !print *,"Doing DOMO -> SOMO" + !AIJpqContainer(NSOMOMin,4,1,1,1,1) = 1.0d0 + AIJpqContainer(1,1,1,1,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,2,4,NSOMOMin) = 1.0d0 + AIJpqContainer(1,1,2,1,4,NSOMOMin) =-1.0d0 + AIJpqContainer(1,1,1,2,4,NSOMOMin) =-1.0d0 + do i = NSOMOMin+2, NSOMOMax, 2 Isomo = ISHFT(1_8,i)-1 do j = i,i, 2 Jsomo = ISHFT(1_8,i)-1 if(j .GT. NSOMOMax .OR. j .LE. 0) cycle - do k = 1,i - do l = 1,i + do k = 1,i+1 + do l = 1,i+1 if(k .NE. l) then Isomo = ISHFT(1_8,i+1)-1 Isomo = IBCLR(Isomo,k-1) Jsomo = ISHFT(1_8,j+1)-1 - Jsomo = IBCLR(Jsomo,l+1-1) + Jsomo = IBCLR(Jsomo,l-1) else Isomo = ISHFT(1_8,i)-1 Jsomo = ISHFT(1_8,j)-1 endif - AIJpqContainer(i,j,4,k,l,:,:) = 0.0d0 + !print *,"k,l=",k,l + !call debug_spindet(Jsomo,1) + !call debug_spindet(Isomo,1) + + !AIJpqContainer(i,4,k,l,:,:) = 0.0d0 + AIJpqContainer(:,:,k,l,4,i) = 0.0d0 call getApqIJMatrixDims(Isomo, & Jsomo, & MS, & @@ -580,10 +806,13 @@ end subroutine get_phase_qp_to_cfg meMatrix, & rows, & cols) + !call printMatrix(meMatrix,rows,cols) + !print *, i,j,k,l,">",Isomo,Jsomo,">",rows, cols,">",rowsmax,colsmax ! i -> j do ri = 1,rows do ci = 1,cols - AIJpqContainer(i,j,4,k,l,ri,ci) = meMatrix(ri, ci) + !AIJpqContainer(i,4,k,l,ri,ci) = meMatrix(ri, ci) + AIJpqContainer(ri,ci,k,l,4,i) = meMatrix(ri, ci) end do end do deallocate(meMatrix) @@ -593,93 +822,1215 @@ end subroutine get_phase_qp_to_cfg end do END_PROVIDER - -!!!!!! - - BEGIN_PROVIDER [ real*8, DetToCSFTransformationMatrix, (0:NSOMOMax,NBFMax,maxDetDimPerBF)] - &BEGIN_PROVIDER [ real*8, psi_coef_config, (n_CSF)] - &BEGIN_PROVIDER [ integer, psi_config_data, (N_configuration,2)] - use cfunctions +subroutine calculate_preconditioner_cfg(diag_energies) + implicit none use bitmasks + BEGIN_DOC + ! Documentation for calculate_preconditioner + ! + ! Calculates the diagonal energies of + ! the configurations in psi_configuration + ! returns : diag_energies : + END_DOC + integer :: i,j,k,kk,l,p,q,noccp,noccq, ii, jj + real*8,intent(out) :: diag_energies(n_CSF) + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer :: NSOMOI + integer :: NSOMOJ + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: starti, endi, cnti, cntj, rows,cols + integer :: extype,pmodel,qmodel + integer(bit_kind) :: Icfg(N_INT,2) + integer(bit_kind) :: Jcfg(N_INT,2) + integer,external :: getNSOMO + real*8, external :: mo_two_e_integral + real*8 :: hpp + real*8 :: meCC + real*8 :: ecore + real*8 :: core_act_contrib + + !PROVIDE h_core_ri + PROVIDE core_fock_operator + PROVIDE h_act_ri + ! initialize energies + diag_energies = 0.d0 + !print *,"Core energy=",core_energy," nucler rep=",nuclear_repulsion, " n_core_orb=",n_core_orb," n_act_orb=",n_act_orb," mo_num=",mo_num + + ! calculate core energy + !call get_core_energy(ecore) + diag_energies = core_energy - nuclear_repulsion + + ! calculate the core energy + !print *,"Core 2energy=",ref_bitmask_energy + + do i=1,N_configuration + + Isomo = psi_configuration(1,1,i) + Idomo = psi_configuration(1,2,i) + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + NSOMOI = getNSOMO(psi_configuration(:,:,i)) + + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + core_act_contrib = 0.0d0 + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = n_core_orb+1,n_core_orb + n_act_orb + !do k = 1+n_core_inact_orb,n_core_orb+n_core_inact_act_orb + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + !do k = n_core_orb+1,n_core_orb + n_act_orb + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + !print *,"I=",i + !call debug_spindet(psi_configuration(1,1,i),N_int) + !call debug_spindet(psi_configuration(1,2,i),N_int) + + do k=1,nholes + p = listholes(k) + noccp = holetype(k) + + + ! core-virtual + do l = 1, n_core_orb + jj = list_core(l) + core_act_contrib += noccp * (2.d0 * mo_two_e_integrals_jj(jj,p) - mo_two_e_integrals_jj_exchange(jj,p)) + enddo + + ! Calculate one-electron + ! and two-electron coulomb terms + do l=1,nholes + q = listholes(l) + noccq = holetype(l) + !print *,"--------------- K=",p," L=",q + + ! one-electron term + if(p.EQ.q) then + hpp = noccq * h_act_ri(p,q)!mo_one_e_integrals(q,q) + else + hpp = 0.d0 + endif + + + do j=starti,endi + ! coulomb term + ! (pp,qq) = + if(p.EQ.q) then + diag_energies(j) += hpp !+ 0.5d0 * (noccp * noccq * mo_two_e_integral(p,q,p,q)) + !print *,"hpp=",hpp,"diga= ",diag_energies(j) +! else +! diag_energies(j) += ! 0.5d0 * noccp * noccq * mo_two_e_integral(p,q,p,q) +! print *,"diga= ",diag_energies(j) + endif + enddo + enddo + + enddo + !print *,"I=",i," core_act=",core_act_contrib + do j=starti,endi + diag_energies(j) += core_act_contrib + end do + enddo + +end subroutine calculate_preconditioner_cfg + +subroutine obtain_connected_I_foralpha_fromfilterdlist(idxI, nconnectedJ, idslistconnectedJ, listconnectedJ, Ialpha, connectedI, idxs_connectedI, nconnectedI, excitationIds, excitationTypes, diagfactors) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for obtain_connected_I_foralpha + ! This function returns all those selected configurations + ! which are connected to the input configuration + ! Ialpha by a single excitation. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO + ! + ! Order of operators + ! \alpha> = a^\dag_p a_q |I> = E_pq |I> + END_DOC + integer ,intent(in) :: idxI + integer ,intent(in) :: nconnectedJ + integer(bit_kind),intent(in) :: listconnectedJ(N_int,2,*) + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(out) :: connectedI(N_int,2,*) + integer ,intent(in) :: idslistconnectedJ(*) + integer ,intent(out) :: idxs_connectedI(*) + integer,intent(out) :: nconnectedI + integer,intent(out) :: excitationIds(2,*) + integer,intent(out) :: excitationTypes(*) + real*8 ,intent(out) :: diagfactors(*) + integer*8 :: Idomo + integer*8 :: Isomo + integer*8 :: Jdomo + integer*8 :: Jsomo + integer*8 :: IJsomo + integer*8 :: diffSOMO + integer*8 :: diffDOMO + integer*8 :: xordiffSOMODOMO + integer :: ndiffSOMO + integer :: ndiffDOMO + integer :: nxordiffSOMODOMO + integer :: ii,i,j,k,kk,l,p,q,nsomoJ,nsomoalpha,starti,endi,extyp,nholes, idxJ + integer :: listholes(mo_num) + integer :: holetype(mo_num) + integer :: end_index + integer :: Nsomo_alpha + logical :: isOKlistJ + + PROVIDE DetToCSFTransformationMatrix + + isOKlistJ = .False. + + nconnectedI = 0 + end_index = N_configuration + + ! Since CFGs are sorted wrt to seniority + ! we don't have to search the full CFG list + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Nsomo_alpha = POPCNT(Isomo) + end_index = min(N_configuration,cfg_seniority_index(min(Nsomo_alpha+4,elec_num))-1) + if(end_index .LT. 0) end_index= N_configuration + !end_index = N_configuration + + + p = 0 + q = 0 + do i=1,nconnectedJ + idxJ = idslistconnectedJ(i) + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = listconnectedJ(1,1,i) + Jdomo = listconnectedJ(1,2,i) + diffSOMO = IEOR(Isomo,Jsomo) + ndiffSOMO = POPCNT(diffSOMO) + diffDOMO = IEOR(Idomo,Jdomo) + xordiffSOMODOMO = IEOR(diffSOMO,diffDOMO) + ndiffDOMO = POPCNT(diffDOMO) + nxordiffSOMODOMO = POPCNT(xordiffSOMODOMO) + nxordiffSOMODOMO += ndiffSOMO + ndiffDOMO + if((nxordiffSOMODOMO .EQ. 4) .AND. ndiffSOMO .EQ. 2) then + select case(ndiffDOMO) + case (0) + ! SOMO -> VMO + !print *,"obt SOMO -> VMO" + extyp = 3 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IAND(Isomo,IJsomo), IAND(Isomo,IJsomo)-1)) -1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Isomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case (1) + ! DOMO -> VMO + ! or + ! SOMO -> SOMO + nsomoJ = POPCNT(Jsomo) + nsomoalpha = POPCNT(Isomo) + if(nsomoJ .GT. nsomoalpha) then + ! DOMO -> VMO + !print *,"obt DOMO -> VMO" + extyp = 2 +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + p = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(Isomo) + 1 +IRP_ENDIF + else + ! SOMO -> SOMO + !print *,"obt SOMO -> SOMO" + extyp = 1 +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor( IEOR(Idomo,Jdomo), IEOR(Idomo,Jdomo)-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IEOR(Idomo,Jdomo)) + 1 +IRP_ENDIF + Isomo = IEOR(Isomo, Jsomo) + Isomo = IBCLR(Isomo,q-1) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(Isomo,Isomo-1))-1) + 1 +IRP_ELSE + p = TRAILZ(Isomo) + 1 +IRP_ENDIF + end if + case (2) + ! DOMO -> SOMO + !print *,"obt DOMO -> SOMO" + extyp = 4 + IJsomo = IEOR(Isomo, Jsomo) +IRP_IF WITHOUT_TRAILZ + p = (popcnt(ieor(IAND(Jsomo,IJsomo) ,IAND(Jsomo,IJsomo) -1))-1) + 1 +IRP_ELSE + p = TRAILZ(IAND(Jsomo,IJsomo)) + 1 +IRP_ENDIF + IJsomo = IBCLR(IJsomo,p-1) +IRP_IF WITHOUT_TRAILZ + q = (popcnt(ieor(IJsomo,IJsomo-1))-1) + 1 +IRP_ELSE + q = TRAILZ(IJsomo) + 1 +IRP_ENDIF + case default + print *,"something went wront in get connectedI" + end select + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else if((ndiffSOMO + ndiffDOMO) .EQ. 0) then + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + Isomo = listconnectedJ(1,1,i) + Idomo = listconnectedJ(1,2,i) + do ii = 1,mo_num + if(POPCNT(IAND(Isomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 1 + endif + end do + ! holes in DOMO + do ii = 1,mo_num + if(POPCNT(IAND(Idomo,IBSET(0_8,ii-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = ii + holetype(nholes) = 2 + endif + end do + + do k=1,nholes + p = listholes(k) + q = p + extyp = 1 + if(holetype(k) .EQ. 1) then + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 1.0d0 + else + starti = psi_config_data(idxJ,1) + endi = psi_config_data(idxJ,2) + nconnectedI += 1 + connectedI(:,:,nconnectedI) = listconnectedJ(:,:,i) + idxs_connectedI(nconnectedI)=starti + excitationIds(1,nconnectedI)=p + excitationIds(2,nconnectedI)=q + excitationTypes(nconnectedI) = extyp + diagfactors(nconnectedI) = 2.0d0 + endif + enddo + endif + end do + +end subroutine obtain_connected_I_foralpha_fromfilterdlist + + +subroutine convertOrbIdsToModelSpaceIds(Ialpha, Jcfg, p, q, extype, pmodel, qmodel) implicit none BEGIN_DOC - ! Documentation for DetToCSFTransformationMatrix - ! Provides the matrix of transformatons for the - ! conversion between determinant to CSF basis (in BFs) + ! This function converts the orbital ids + ! in real space to those used in model space + ! in order to identify the matrices required + ! for the calculation of MEs. + ! + ! The type of excitations are ordered as follows: + ! Type 1 - SOMO -> SOMO + ! Type 2 - DOMO -> VMO + ! Type 3 - SOMO -> VMO + ! Type 4 - DOMO -> SOMO END_DOC - integer(bit_kind) :: mask(N_int), Ialpha(N_int),Ibeta(N_int) - integer :: rows, cols, i, j, k - integer :: startdet, enddet - integer*8 MS, Isomo, Idomo - integer ndetI - integer :: getNSOMO - real*8,dimension(:,:),allocatable :: tempBuffer - real*8,dimension(:),allocatable :: tempCoeff - real*8 :: norm_det1, phasedet - norm_det1 = 0.d0 - MS = elec_alpha_num - elec_beta_num - ! initialization - psi_coef_config = 0.d0 - DetToCSFTransformationMatrix(0,:,:) = 1.d0 - do i = 2-iand(elec_alpha_num-elec_beta_num,1), NSOMOMax,2 - Isomo = IBSET(0_8, i) - 1_8 - ! rows = Ncsfs - ! cols = Ndets - bfIcfg = max(1,nint((binom(i,(i+1)/2)-binom(i,((i+1)/2)+1)))) - ndetI = max(1,nint((binom(i,(i+1)/2)))) + integer(bit_kind),intent(in) :: Ialpha(N_int,2) + integer(bit_kind),intent(in) :: Jcfg(N_int,2) + integer,intent(in) :: p,q + integer,intent(in) :: extype + integer,intent(out) :: pmodel,qmodel + !integer(bit_kind) :: Isomo(N_int) + !integer(bit_kind) :: Idomo(N_int) + !integer(bit_kind) :: Jsomo(N_int) + !integer(bit_kind) :: Jdomo(N_int) + integer*8 :: Isomo + integer*8 :: Idomo + integer*8 :: Jsomo + integer*8 :: Jdomo + integer*8 :: mask + integer :: iint, ipos + !integer(bit_kind) :: Isomotmp(N_int) + !integer(bit_kind) :: Jsomotmp(N_int) + integer*8 :: Isomotmp + integer*8 :: Jsomotmp + integer :: pos0,pos0prev - allocate(tempBuffer(bfIcfg,ndetI)) - call getCSFtoDETTransformationMatrix(Isomo, MS, NBFMax, maxDetDimPerBF, tempBuffer) - DetToCSFTransformationMatrix(i,1:bfIcfg,1:ndetI) = tempBuffer(1:bfIcfg,1:ndetI) - deallocate(tempBuffer) + ! TODO Flag (print) when model space indices is > 64 + Isomo = Ialpha(1,1) + Idomo = Ialpha(1,2) + Jsomo = Jcfg(1,1) + Jdomo = Jcfg(1,2) + pos0prev = 0 + pmodel = p + qmodel = q + + if(p .EQ. q) then + pmodel = 1 + qmodel = 1 + else + select case(extype) + case (1) + ! SOMO -> SOMO + ! remove all domos + !print *,"type -> SOMO -> SOMO" + mask = ISHFT(1_8,p) - 1 + Isomotmp = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Isomotmp = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomotmp,mask)) + case (2) + ! DOMO -> VMO + ! remove all domos except one at p + !print *,"type -> DOMO -> VMO" + mask = ISHFT(1_8,p) - 1 + Jsomotmp = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomotmp = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomotmp,mask)) + case (3) + ! SOMO -> VMO + !print *,"type -> SOMO -> VMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Isomo = IAND(Isomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Jsomo = IAND(Jsomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + endif + case (4) + ! DOMO -> SOMO + ! remove all domos except one at p + !print *,"type -> DOMO -> SOMO" + !Isomo = IEOR(Isomo,Jsomo) + if(p.LT.q) then + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + 1 + else + mask = ISHFT(1_8,p) - 1 + Jsomo = IAND(Jsomo,mask) + pmodel = POPCNT(mask) - POPCNT(XOR(Jsomo,mask)) + 1 + mask = ISHFT(1_8,q) - 1 + Isomo = IAND(Isomo,mask) + qmodel = POPCNT(mask) - POPCNT(XOR(Isomo,mask)) + endif + case default + print *,"something is wrong in convertOrbIdsToModelSpaceIds" + end select + endif + !print *,p,q,"model ids=",pmodel,qmodel +end subroutine convertOrbIdsToModelSpaceIds + +subroutine calculate_sigma_vector_cfg_nst_naive_store(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + use omp_lib + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(n_st,sze) + real*8,intent(out) :: psi_out(n_st,sze) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, m, n, idxI, nocck,orbk + integer :: ii,jj,kk,ll,pp,qq + integer(bit_kind),dimension(:,:,:),allocatable :: listconnectedJ + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + integer,dimension(:),allocatable :: idslistconnectedJ + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg, num_threads_max + integer :: nconnectedJ,nconnectedtotalmax,nconnectedmaxJ,maxnalphas,ntotJ + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8,dimension(:),allocatable :: psi_out_tmp + real*8,dimension(:,:),allocatable :: CCmattmp + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8,dimension(:),allocatable:: diag_energies + real*8 :: tmpvar, tmptot + real*8 :: core_act_contrib + + integer(omp_lock_kind), allocatable :: lock(:) + call omp_set_max_active_levels(1) + + !print *," sze = ",sze + allocate(lock(sze)) + do i=1,sze + call omp_init_lock(lock(i)) + enddo + !do i=1,size(psi_config_data,1) + ! print *,"i=",i," psi_cfg_data_1=",psi_config_data(i,1)," psi_cfg_data_2=",psi_config_data(i,2) + !end do + + allocate(diag_energies(n_CSF)) + call calculate_preconditioner_cfg(diag_energies) + !print *," diag energy =",diag_energies(1) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + !nconnectedtotalmax = 1000 + !nconnectedmaxJ = 1000 + maxnalphas = elec_num*mo_num + Icfg(1,1) = psi_configuration(1,1,1) + Icfg(1,2) = psi_configuration(1,2,1) + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) + call obtain_connected_J_givenI(1, Icfg, listconnectedJ, idslistconnectedJ, nconnectedmaxJ, nconnectedtotalmax) + deallocate(listconnectedJ) + deallocate(idslistconnectedJ) + + integer*8, allocatable :: bit_tmp(:) + integer*8, external :: configuration_search_key + double precision :: diagfactors_0 + allocate( bit_tmp(0:N_configuration+1)) + do j=1,N_configuration + bit_tmp(j) = configuration_search_key(psi_configuration(1,1,j),N_int) enddo - integer s, bfIcfg - integer countcsf - countcsf = 0 - integer countdet - countdet = 0 - integer idx - integer istate - istate = 1 - phasedet = 1.0d0 - do i = 1,N_configuration - startdet = psi_configuration_to_psi_det(1,i) - enddet = psi_configuration_to_psi_det(2,i) - ndetI = enddet-startdet+1 + call omp_set_max_active_levels(1) + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP private(i,icfg, isomo, idomo, NSOMOI, NSOMOJ, nholes, k, listholes,& + !$OMP holetype, vmotype, nvmos, listvmos, starti, endi, & + !$OMP nsinglesI, singlesI,idxs_singlesI,excitationIds_single,& + !$OMP excitationTypes_single, idxI, p, q, extype, pmodel, qmodel,& + !$OMP Jsomo, Jdomo, startj, endj, kk, jj, ii, cnti, cntj, meCC1,& + !$OMP nconnectedJ,listconnectedJ,idslistconnectedJ,ntotJ, & + !$OMP Nalphas_Icfg,alphas_Icfg,connectedI_alpha, & + !$OMP idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors,& + !$OMP totcolsTKI,rowsTKI,NSOMOalpha,rowsikpq, & + !$OMP colsikpq, GIJpqrs,TKIGIJ,j,l,m,TKI,CCmattmp, moi, moj, mok, mol,& + !$OMP diagfac, tmpvar, diagfactors_0) & + !$OMP shared(istart_cfg, iend_cfg, psi_configuration, mo_num, psi_config_data,& + !$OMP N_int, N_st, psi_out, psi_in, h_core_ri, core_energy, h_act_ri, AIJpqContainer,& + !$OMP pp, sze, NalphaIcfg_list,alphasIcfg_list, bit_tmp, & + !$OMP AIJpqMatrixDimsList, diag_energies, n_CSF, lock, NBFmax,nconnectedtotalmax, nconnectedmaxJ,maxnalphas,& + !$OMP n_core_orb, n_act_orb, list_act, n, list_core, list_core_is_built,core_act_contrib, num_threads_max,& + !$OMP n_core_orb_is_built, mo_integrals_map, mo_integrals_map_is_built) - allocate(tempCoeff(ndetI)) - countdet = 1 - do j = startdet, enddet - idx = psi_configuration_to_psi_det_data(j) - Ialpha(:) = psi_det(:,1,idx) - Ibeta(:) = psi_det(:,2,idx) - call get_phase_qp_to_cfg(Ialpha, Ibeta, phasedet) - tempCoeff(countdet) = psi_coef(idx, istate)*phasedet - norm_det1 += tempCoeff(countdet)*tempCoeff(countdet) - countdet += 1 + allocate(singlesI(N_INT,2,max(sze,10000))) + allocate(idxs_singlesI(max(sze,10000))) + allocate(excitationIds_single(2,max(sze,10000))) + allocate(excitationTypes_single(max(sze,10000))) +! + + !!!====================!!! + !!! Single Excitations !!! + !!!====================!!! + + !$OMP DO SCHEDULE(dynamic,16) + do i=istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + Isomo = Icfg(1,1) + Idomo = Icfg(1,2) + NSOMOI = getNSOMO(Icfg) + + ! find out all pq holes possible + nholes = 0 + ! holes in SOMO + ! list_act + ! list_core + ! list_core_inact + ! bitmasks + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Isomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 1 + endif + enddo + ! holes in DOMO + !do k = 1,mo_num + do kk = 1,n_act_orb + k = list_act(kk) + if(POPCNT(IAND(Idomo,IBSET(0_8,k-1))) .EQ. 1) then + nholes += 1 + listholes(nholes) = k + holetype(nholes) = 2 + endif + enddo + + ! find vmos + listvmos = -1 + vmotype = -1 + nvmos = 0 + do kk = 1,n_act_orb + k = list_act(kk) + !print *,i,IBSET(0,i-1),POPCNT(IAND(Isomo,(IBSET(0_8,i-1)))), POPCNT(IAND(Idomo,(IBSET(0_8,i-1)))) + if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 0 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 0 + else if(POPCNT(IAND(Isomo,(IBSET(0_8,k-1)))) .EQ. 1 .AND. POPCNT(IAND(Idomo,(IBSET(0_8,k-1)))) .EQ. 0 ) then + nvmos += 1 + listvmos(nvmos) = k + vmotype(nvmos) = 1 + end if + enddo + + + ! Icsf ids + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + NSOMOI = getNSOMO(Icfg) + + call generate_all_singles_cfg_with_type(bit_tmp,Icfg,singlesI,idxs_singlesI,excitationIds_single,& + excitationTypes_single,nsinglesI,N_int) + + do j = 1,nsinglesI + idxI = idxs_singlesI(j) + NSOMOJ = getNSOMO(singlesI(1,1,j)) + p = excitationIds_single(1,j) + q = excitationIds_single(2,j) + extype = excitationTypes_single(j) + ! Off diagonal terms + call convertOrbIdsToModelSpaceIds(Icfg, singlesI(1,1,j), p, q, extype, pmodel, qmodel) + Jsomo = singlesI(1,1,j) + Jdomo = singlesI(1,2,j) + + ! Add the hole on J + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes += 1 + listholes(nholes) = q + holetype(nholes) = 2 + endif + + startj = psi_config_data(idxI,1) + endj = psi_config_data(idxI,2) + !print *,"i=",i," idxI=",idxI," startj=",startj," endj=",endj," sze=",sze + + !!! One-electron contribution !!! + do ii = starti, endi + cnti = ii-starti+1 + do jj = startj, endj + cntj = jj-startj+1 + !meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* h_core_ri(p,q) + core_act_contrib = 0.0d0 + if(p.ne.q)then + do pp=1,n_core_orb + n=list_core(pp) + core_act_contrib += 2.d0 * get_two_e_integral(p,n,q,n,mo_integrals_map) - get_two_e_integral(p,n,n,q,mo_integrals_map) + end do + endif + meCC1 = AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI)* (h_act_ri(p,q) + core_act_contrib) + !if(jj.eq.1.and.ii.eq.1)then + ! print *,"CC=",AIJpqContainer(cnti,cntj,pmodel,qmodel,extype,NSOMOI), " p=",p," q=",q + !endif + call omp_set_lock(lock(jj)) + do kk = 1,n_st + psi_out(kk,jj) = psi_out(kk,jj) + meCC1 * psi_in(kk,ii) + enddo + call omp_unset_lock(lock(jj)) + enddo enddo - s = 0 - do k=1,N_int - if (psi_configuration(k,1,i) == 0_bit_kind) cycle - s = s + popcnt(psi_configuration(k,1,i)) - enddo - bfIcfg = max(1,nint((binom(s,(s+1)/2)-binom(s,((s+1)/2)+1)))) - - ! perhaps blocking with CFGs of same seniority - ! can be more efficient - allocate(tempBuffer(bfIcfg,ndetI)) - tempBuffer = DetToCSFTransformationMatrix(s,:bfIcfg,:ndetI) - - call dgemm('N','N', bfIcfg, 1, ndetI, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, size(tempCoeff,1), 0.d0, psi_coef_config(countcsf+1), size(psi_coef_config,1)) - !call dgemv('N', NBFMax, maxDetDimPerBF, 1.d0, tempBuffer, size(tempBuffer,1), tempCoeff, 1, 0.d0, psi_coef_config(countcsf), 1) - - deallocate(tempCoeff) - deallocate(tempBuffer) - psi_config_data(i,1) = countcsf + 1 - countcsf += bfIcfg - psi_config_data(i,2) = countcsf + ! Undo setting in listholes + if(POPCNT(IAND(Jsomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + if((POPCNT(IAND(Jdomo,IBSET(0_8,q-1))) .EQ. 1 .AND. POPCNT(IAND(Idomo,IBSET(0_8,q-1))) .EQ. 0) .AND. POPCNT(IAND(Isomo,IBSET(0_8,q-1))) .EQ. 0) then + nholes -= 1 + endif + enddo enddo + !$OMP END DO + deallocate(singlesI) + deallocate(idxs_singlesI) + deallocate(excitationIds_single) + deallocate(excitationTypes_single) - END_PROVIDER + !print *," singles part psi(1,1)=",psi_out(1,1) + + allocate(listconnectedJ(N_INT,2,max(sze,10000))) + allocate(alphas_Icfg(N_INT,2,max(sze,10000))) + allocate(connectedI_alpha(N_INT,2,max(sze,10000))) + allocate(idxs_connectedI_alpha(max(sze,10000))) + allocate(excitationIds(2,max(sze,10000))) + allocate(excitationTypes(max(sze,10000))) + allocate(diagfactors(max(sze,10000))) + allocate(idslistconnectedJ(max(sze,10000))) + allocate(CCmattmp(n_st,NBFmax)) + + !!!====================!!! + !!! Double Excitations !!! + !!!====================!!! + + ! Loop over all selected configurations + !$OMP DO SCHEDULE(static) + do i = istart_cfg,iend_cfg + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:n_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + if(Nalphas_Icfg .GT. maxnalphas) then + print *,"Nalpha > maxnalpha" + endif + + call obtain_connected_J_givenI(i, Icfg, listconnectedJ, idslistconnectedJ, nconnectedJ, ntotJ) + + ! TODO : remove doubly excited for return + !print *,"I=",i," isomo=",psi_configuration(1,1,i)," idomo=",psi_configuration(1,2,i), " psiout=",psi_out(1,5) + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + + !call obtain_connected_I_foralpha_fromfilterdlist(i,nconnectedJ, idslistconnectedJ, & + ! listconnectedJ, alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI, & + ! excitationIds,excitationTypes,diagfactors) + + call obtain_connected_I_foralpha(i, alphas_Icfg(1,1,k), connectedI_alpha, idxs_connectedI_alpha, & + nconnectedI, excitationIds, excitationTypes, diagfactors) + + + if(nconnectedI .EQ. 0) then + cycle + endif + + !if(i .EQ. 1) then + ! print *,'k=',k,' kcfgSOMO=',alphas_Icfg(1,1,k),' ',POPCNT(alphas_Icfg(1,1,k)),' kcfgDOMO=',alphas_Icfg(1,2,k),' ',POPCNT(alphas_Icfg(1,2,k)) + !endif + + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + totcolsTKI = 0 + rowsTKI = -1 + NSOMOalpha = getNSOMO(alphas_Icfg(:,:,k)) + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + !print *,"j=",j," Nsomo=",NSOMOalpha," rowsikpq=",rowsikpq," colsikpq=",colsikpq, " p=",pmodel," q=",qmodel, " extyp=",extype + totcolsTKI += colsikpq + rowsTKI = rowsikpq + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the integral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + !print *,"\t---rowsTKI=",rowsTKI," totCols=",totcolsTKI + TKI = 0.d0 + GIJpqrs = 0.d0 + TKIGIJ = 0.d0 + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(:,:,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + !if(i.eq.1) then + ! print *,rowsTKI,colsikpq," | ",pmodel,qmodel,extype,NSOMOalpha + !endif + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) & + * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + !if(i.eq.1) then + ! print *,AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) + !endif + enddo + enddo + + diagfactors_0 = diagfactors(j)*0.5d0 + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + do l=1,nconnectedI + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + diagfac = diagfactors_0 * diagfactors(l)* mo_two_e_integral(mok,mol,moi,moj)! g(pq,sr) = + !print *,"p=",mok,"q=",mol,"r=",moi,"s=",moj + do m = 1,colsikpq + ! = (ik|jl) + GIJpqrs(totcolsTKI+m,l) = diagfac + enddo + enddo + totcolsTKI += colsikpq + enddo + + + ! Do big BLAS + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0, & + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0, & + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + rowsTKI = rowsikpq + CCmattmp = 0.d0 + + call dgemm('N','N', n_st, colsikpq, rowsTKI, 1.d0, & + TKIGIJ(1,1,j), size(TKIGIJ,1), & + AIJpqContainer(1,1,pmodel,qmodel,extype,NSOMOalpha), & + size(AIJpqContainer,1), 0.d0, & + CCmattmp, size(CCmattmp,1) ) + + do m = 1,colsikpq + call omp_set_lock(lock(idxs_connectedI_alpha(j)+m-1)) + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) += CCmattmp(kk,m) + enddo + call omp_unset_lock(lock(idxs_connectedI_alpha(j)+m-1)) + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + !$OMP END DO + call omp_set_max_active_levels(4) + deallocate(CCmattmp) + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + !print *," psi(1,823)=",psi_out(1,823), " g(1 8, 3 15)=",mo_two_e_integral(1,8,3,15), " ncore=",n_core_orb + !print *," psi(1,1)=",psi_out(1,1) + + ! Add the diagonal contribution + !$OMP DO + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + !$OMP END DO + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + deallocate(diag_energies) + deallocate(bit_tmp) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store + + + + +subroutine calculate_sigma_vector_cfg_nst(psi_out, psi_in, n_st, sze, istart, iend, ishift, istep) + implicit none + use bitmasks + BEGIN_DOC + ! Documentation for sigma-vector calculation + ! + ! Calculates the result of the + ! application of the hamiltonian to the + ! wavefunction in CFG basis once + ! TODO : Things prepare outside this routine + ! 1. Touch the providers for + ! a. ApqIJ containers + ! b. DET to CSF transformation matrices + ! 2. DET to CSF transcormation + ! 2. CSF to DET back transcormation + ! returns : psi_coef_out_det : + END_DOC + integer,intent(in) :: sze, istart,iend, istep, ishift, n_st + real*8,intent(in) :: psi_in(sze,n_st) + real*8,intent(out) :: psi_out(sze,n_st) + integer(bit_kind) :: Icfg(N_INT,2) + integer :: i,j,k,l,p,q,noccp,noccq, ii, jj, m, n, idxI, kk, nocck,orbk + integer(bit_kind),dimension(:,:,:),allocatable :: alphas_Icfg + integer(bit_kind),dimension(:,:,:),allocatable :: singlesI + integer(bit_kind),dimension(:,:,:),allocatable :: connectedI_alpha + integer,dimension(:),allocatable :: idxs_singlesI + integer,dimension(:),allocatable :: idxs_connectedI_alpha + integer,dimension(:,:),allocatable :: excitationIds_single + integer,dimension(:),allocatable :: excitationTypes_single + integer,dimension(:,:),allocatable :: excitationIds + integer,dimension(:),allocatable :: excitationTypes + real*8,dimension(:),allocatable :: diagfactors + integer :: nholes + integer :: nvmos + integer :: listvmos(mo_num) + integer :: vmotype(mo_num) ! 1 -> VMO 2 -> SOMO + integer :: listholes(mo_num) + integer :: holetype(mo_num) ! 1-> SOMO 2->DOMO + integer :: Nalphas_Icfg, nconnectedI, rowsikpq, colsikpq, nsinglesI + integer :: extype,NSOMOalpha,NSOMOI,NSOMOJ,pmodel,qmodel + integer :: getNSOMO + integer :: totcolsTKI + integer :: rowsTKI + integer :: noccpp + integer :: istart_cfg, iend_cfg + integer*8 :: MS, Isomo, Idomo, Jsomo, Jdomo, Ialpha, Ibeta + integer :: moi, moj, mok, mol, starti, endi, startj, endj, cnti, cntj, cntk + real*8 :: norm_coef_cfg, fac2eints + real*8 :: norm_coef_det + real*8 :: meCC1, meCC2, diagfac + real*8,dimension(:,:,:),allocatable :: TKI + real*8,dimension(:,:),allocatable :: GIJpqrs + real*8,dimension(:,:,:),allocatable :: TKIGIJ + real*8, external :: mo_two_e_integral + real*8, external :: get_two_e_integral + real*8 :: diag_energies(n_CSF) + + ! allocate + allocate(alphas_Icfg(N_INT,2,max(sze/2,100))) + allocate(singlesI(N_INT,2,max(sze/2,100))) + allocate(connectedI_alpha(N_INT,2,max(sze/2,100))) + allocate(idxs_singlesI(max(sze/2,100))) + allocate(idxs_connectedI_alpha(max(sze/2,100))) + allocate(excitationIds_single(2,max(sze/2,100))) + allocate(excitationTypes_single(max(sze/2,100))) + allocate(excitationIds(2,max(sze/2,100))) + allocate(excitationTypes(max(sze/2,100))) + allocate(diagfactors(max(sze/2,100))) + + + !print *," sze = ",sze + call calculate_preconditioner_cfg(diag_energies) + + MS = 0 + norm_coef_cfg=0.d0 + + psi_out=0.d0 + + istart_cfg = psi_csf_to_config_data(istart) + iend_cfg = psi_csf_to_config_data(iend) + + + !!! Single Excitations !!! + do i=istart_cfg,iend_cfg + print *,"I=",i + + ! if Seniority_range > 8 then + ! continue + ! else + ! cycle + + Icfg(1,1) = psi_configuration(1,1,i) + Icfg(1,2) = psi_configuration(1,2,i) + starti = psi_config_data(i,1) + endi = psi_config_data(i,2) + + ! Returns all unique (checking the past) singly excited cfgs connected to I + Nalphas_Icfg = 0 + ! TODO: + ! test if size(alphas_Icfg,1) < Nmo**2) then deallocate + allocate + !call obtain_associated_alphaI(i, Icfg, alphas_Icfg, Nalphas_Icfg) + Nalphas_Icfg = NalphaIcfg_list(i) + alphas_Icfg(1:N_int,1:2,1:Nalphas_Icfg) = alphasIcfg_list(1:n_int,1:2,i,1:Nalphas_Icfg) + + ! TODO : remove doubly excited for return + ! Here we do 2x the loop. One to count for the size of the matrix, then we compute. + do k = 1,Nalphas_Icfg + ! Now generate all singly excited with respect to a given alpha CFG + call obtain_connected_I_foralpha(i,alphas_Icfg(1,1,k),connectedI_alpha,idxs_connectedI_alpha,nconnectedI,excitationIds,excitationTypes,diagfactors) + + totcolsTKI = 0 + rowsTKI = -1 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + ! for E_pp E_rs and E_ppE_rr case + if(p.EQ.q) then + NSOMOalpha = NSOMOI + endif + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + totcolsTKI += colsikpq +! if(rowsTKI .LT. rowsikpq .AND. rowsTKI .NE. -1) then +! print *,">",j,"Something is wrong in sigma-vector", rowsTKI, rowsikpq, "(p,q)=",pmodel,qmodel,"ex=",extype,"na=",NSOMOalpha," nI=",NSOMOI +! !rowsTKI = rowsikpq +! else + rowsTKI = rowsikpq +! endif + enddo + + allocate(TKI(n_st,rowsTKI,totcolsTKI)) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + allocate(GIJpqrs(totcolsTKI,nconnectedI)) ! gpqrs + allocate(TKIGIJ(n_st,rowsTKI,nconnectedI)) ! TKI * gpqrs + + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(1,1,k), connectedI_alpha(1,1,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + TKI(kk,l,totcolsTKI+m) = AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * psi_in(kk,idxs_connectedI_alpha(j)+m-1) + enddo + enddo + enddo + do m = 1,colsikpq + do l = 1,nconnectedI + ! = (ik|jl) + moi = excitationIds(1,j) ! p + mok = excitationIds(2,j) ! q + moj = excitationIds(2,l) ! s + mol = excitationIds(1,l) ! r + if(moi.EQ.mok .AND. moj.EQ.mol)then + diagfac = diagfactors(j) + diagfac *= diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + else + diagfac = diagfactors(j)*diagfactors(l) + !print *,"integrals (",totcolsTKI+m,l,")",mok,moi,mol,moj, "|", diagfac + GIJpqrs(totcolsTKI+m,l) = diagfac*0.5d0*mo_two_e_integral(mok,mol,moi,moj) ! g(pq,sr) = + !endif + endif + enddo + enddo + totcolsTKI += colsikpq + enddo + + + + ! Do big BLAS + ! TODO TKI, size(TKI,1)*size(TKI,2) + call dgemm('N','N', rowsTKI*n_st, nconnectedI, totcolsTKI, 1.d0,& + TKI, size(TKI,1)*size(TKI,2), GIJpqrs, size(GIJpqrs,1), 0.d0,& + TKIGIJ , size(TKIGIJ,1)*size(TKIGIJ,2) ) + + + ! Collect the result + totcolsTKI = 0 + do j = 1,nconnectedI + NSOMOalpha = getNSOMO(alphas_Icfg(1,1,k)) + NSOMOI = getNSOMO(connectedI_alpha(1,1,j)) + p = excitationIds(1,j) + q = excitationIds(2,j) + extype = excitationTypes(j) + call convertOrbIdsToModelSpaceIds(alphas_Icfg(:,:,k), connectedI_alpha(:,:,j), p, q, extype, pmodel, qmodel) + rowsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,1) + colsikpq = AIJpqMatrixDimsList(NSOMOalpha,extype,pmodel,qmodel,2) + do m = 1,colsikpq + do l = 1,rowsTKI + do kk = 1,n_st + psi_out(kk,idxs_connectedI_alpha(j)+m-1) = psi_out(kk,idxs_connectedI_alpha(j)+m-1) + & + AIJpqContainer(l,m,pmodel,qmodel,extype,NSOMOalpha) * TKIGIJ(kk,l,j) + enddo + enddo + enddo + totcolsTKI += colsikpq + enddo + + deallocate(TKI) ! coefficients of CSF + ! Initialize the inegral container + ! dims : (totcolsTKI, nconnectedI) + deallocate(GIJpqrs) ! gpqrs + deallocate(TKIGIJ) ! gpqrs + + enddo ! loop over alphas + enddo ! loop over I + deallocate(connectedI_alpha) + deallocate(idxs_connectedI_alpha) + deallocate(excitationIds) + deallocate(excitationTypes) + deallocate(diagfactors) + + + ! Add the diagonal contribution + do i = 1,n_CSF + do kk=1,n_st + psi_out(kk,i) += diag_energies(i)*psi_in(kk,i) + enddo + enddo + call omp_set_max_active_levels(4) + +end subroutine calculate_sigma_vector_cfg_nst_naive_store diff --git a/src/csf/tree_utils.c b/src/csf/tree_utils.c index 1266b890..91bae8b2 100644 --- a/src/csf/tree_utils.c +++ b/src/csf/tree_utils.c @@ -1,3 +1,4 @@ +#include #include "tree_utils.h" void buildTree(Tree *bftree, @@ -52,6 +53,7 @@ void buildTreeDriver(Tree *bftree, int NSOMO, int MS, int *NBF){ int icpl = 0; // keep track of the ith ms (cannot be -ve) int addr = 0; // Counts the total BF's + assert(bftree->rootNode->addr == 0); buildTree(bftree, &(bftree->rootNode), isomo, izeros, icpl, NSOMO, MS); *NBF = bftree->rootNode->addr; @@ -264,6 +266,8 @@ void genDetBasis(Tree *dettree, int Isomo, int MS, int *ndets){ int NSOMO=0; getSetBits(Isomo, &NSOMO); genDetsDriver(dettree, NSOMO, MS, ndets); + // Closed shell case + if(NSOMO==0) (*ndets) = 1; } @@ -311,3 +315,13 @@ void callBlasMatxMat(double *A, int rowA, int colA, double *B, int rowB, int col break; } } + +void printRealMatrix(double *orthoMatrix, int rows, int cols){ + int i,j; + for(i=0;i += Dress_jj(i) * - call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze) + call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze) else ! Already computed in update below continue @@ -303,9 +303,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -319,7 +319,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,Dress_jj,energies,sze,N_st,N_st_d enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f new file mode 100644 index 00000000..670b2395 --- /dev/null +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -0,0 +1,500 @@ + +! --- + +subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in + double precision, intent(in) :: H_jj(sze),Dress_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(2,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:) + + double precision, allocatable :: W(:,:) + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.d0*dble(sze*m)*(N_st_diag*itermax) &! W + + 2.d0*(N_st_diag*itermax)**2 &! h,y + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m = 0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + ! --- + + + allocate( W(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U + call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call dress_calc(W(1,shift+1), Dress_jj, U(1,shift+1), N_st_diag, sze) + + else + + ! Already computed in update below + continue + endif + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- + +subroutine dress_calc(v,dress,u,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Routine that computed the action of the diagonal dressing dress + ! + ! WARNING :: v is not initialiazed !!! + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u(sze,N_st),dress(sze) + double precision, intent(inout) :: v(sze,N_st) + integer :: i,istate + + do istate = 1, N_st + do i = 1, sze + v(i,istate) += dress(i) * u(i,istate) + enddo + enddo +end + + + + + + diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index 884fd672..e59d21d1 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -31,7 +31,8 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies double precision, intent(inout) :: u_in(sze,N_st_diag) double precision, intent(out) :: energies(N_st_diag) logical, intent(out) :: converged - external hcalc + + external :: hcalc double precision, allocatable :: H_jj_tmp(:) ASSERT (N_st > 0) @@ -224,7 +225,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies u_in(k,k) = u_in(k,k) + 10.d0 enddo do k=1,N_st_diag_in - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo do k=1,N_st_diag_in @@ -248,10 +249,10 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies if ((iter > 1).or.(itertot == 1)) then ! Compute |W_k> = \sum_i |i> ! ----------------------------------- - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag_in,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag_in,sze) ! Compute then the DIAGONAL PART OF THE DRESSING ! += Dress_jj(i) * - call dressing_diag_uv(W(1,shift+1),U(1,shift+1),Dress_jj,N_st_diag_in,sze) + call dressing_diag_uv(W(:,shift+1),U(:,shift+1),Dress_jj,N_st_diag_in,sze) else ! Already computed in update below continue @@ -275,20 +276,20 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies ! ! call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, & ! psi_coef, size(psi_coef,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, & ! Dressing_vec, size(Dressing_vec,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! ! ! call dgemm('T','N', N_st, N_st_diag_in, sze, 1.d0, & ! Dressing_vec, size(Dressing_vec,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag_in, N_st, 1.0d0, & ! psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! endif @@ -376,9 +377,9 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag_in, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag_in, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -392,7 +393,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c3bfe91a..c045aa1a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -214,7 +214,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -244,7 +244,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di call ortho_qr(U,size(U,1),sze,shift2) ! it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag) ! where sze is the size of the vector, N_st_diag is the number of states - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze) else ! Already computed in update below continue @@ -268,20 +268,20 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di stop ! call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & ! psi_coef, size(psi_coef,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & ! dressing_vec, size(dressing_vec,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) ! ! ! call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & ! dressing_vec, size(dressing_vec,1), & -! U(1,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) +! U(:,shift+1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) ! ! call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & ! psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & -! 1.d0, W(1,shift+1), size(W,1)) +! 1.d0, W(:,shift+1), size(W,1)) endif endif @@ -370,9 +370,9 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -386,7 +386,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index aee4ba09..a4c47c27 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -196,7 +196,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -226,7 +226,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co call ortho_qr(U,size(U,1),sze,shift2) ! it does W = H U with W(sze,N_st_diag),U(sze,N_st_diag) ! where sze is the size of the vector, N_st_diag is the number of states - call hcalc(W(1,shift+1),U(1,shift+1),N_st_diag,sze) + call hcalc(W(:,shift+1),U(:,shift+1),N_st_diag,sze) else ! Already computed in update below continue @@ -288,9 +288,9 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -304,7 +304,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index 39cb68bb..96775c50 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -206,7 +206,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv enddo ! Normalize all states do k=1,N_st_diag - call normalize(u_in(1,k),sze) + call normalize(u_in(:,k),sze) enddo ! Copy from the guess input "u_in" to the working vectors "U" @@ -236,8 +236,8 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv call ortho_qr(U,size(U,1),sze,shift2) call ortho_qr(U,size(U,1),sze,shift2) -! call H_S2_u_0_nstates_openmp(W(1,shift+1),U(1,shift+1),N_st_diag,sze) - call hpsi(W(1,shift+1),U(1,shift+1),N_st_diag,sze,h_mat) +! call H_S2_u_0_nstates_openmp(W(:,shift+1),U(:,shift+1),N_st_diag,sze) + call hpsi(W(:,shift+1),U(:,shift+1),N_st_diag,sze,h_mat) else ! Already computed in update below continue @@ -299,9 +299,9 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv ! -------------------------------------------------- call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1)) + 1.d0, U, size(U,1), y, size(y,1), 0.d0, U(:,shift2+1), size(U,1)) call dgemm('N','N', sze, N_st_diag, shift2, & - 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1)) + 1.d0, W, size(W,1), y, size(y,1), 0.d0, W(:,shift2+1), size(W,1)) ! Compute residual vector and davidson step ! ----------------------------------------- @@ -315,7 +315,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv enddo if (k <= N_st) then - residual_norm(k) = u_dot_u(U(1,shift2+k),sze) + residual_norm(k) = u_dot_u(U(:,shift2+k),sze) to_print(1,k) = lambda(k) to_print(2,k) = residual_norm(k) endif diff --git a/src/davidson/diagonalization_hcfg.irp.f b/src/davidson/diagonalization_hcfg.irp.f new file mode 100644 index 00000000..659602a1 --- /dev/null +++ b/src/davidson/diagonalization_hcfg.irp.f @@ -0,0 +1,624 @@ +subroutine davidson_diag_h_cfg(dets_in,u_in,dim_in,energies,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + use bitmasks + implicit none + BEGIN_DOC + ! Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + double precision, intent(out) :: energies(N_st_diag) + integer, intent(in) :: dressing_state + logical, intent(out) :: converged + double precision, allocatable :: H_jj(:) + + double precision, external :: diag_H_mat_elem, diag_S_mat_elem + integer :: i,k + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + allocate(H_jj(sze)) + + H_jj(1) = diag_h_mat_elem(dets_in(1,1,1),Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze,H_jj, dets_in,Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i=2,sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i),Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if (dressing_state > 0) then + do k=1,N_st + do i=1,sze + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + enddo + enddo + endif + + call davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag,Nint,dressing_state,converged) + deallocate(H_jj) +end + + +subroutine davidson_diag_cfg_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N_st,N_st_diag_in,Nint,dressing_state,converged) + use bitmasks + use mmap_module + implicit none + BEGIN_DOC + ! Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten + ! on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + END_DOC + integer, intent(in) :: dim_in, sze, sze_csf, N_st, N_st_diag_in, Nint + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + integer, intent(in) :: dressing_state + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + double precision, intent(out) :: energies(N_st_diag_in) + + integer :: iter, N_st_diag + integer :: i,j,k,l,m,kk,ii,ll + logical, intent(inout) :: converged + + double precision, external :: u_dot_v, u_dot_u + + integer :: k_pairs, kl + + integer :: iter2, itertot + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:) + double precision :: diag_h_mat_elem + double precision, allocatable :: residual_norm(:) + character*(16384) :: write_buffer + double precision :: to_print(2,N_st) + double precision :: cpu, wall + integer :: shift, shift2, itermax, istate + double precision :: r1, r2, alpha + logical :: state_ok(N_st_diag_in*davidson_sze_max) + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: cmax + double precision, allocatable :: U(:,:), U_csf(:,:), overlap(:,:) + double precision, allocatable :: tmpU(:,:), tmpW(:,:) + double precision, pointer :: W(:,:), W_csf(:,:) + logical :: disk_based + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + + include 'constants.include.F' + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda + if (N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2,min(davidson_sze_max, sze/N_st_diag))+1 + itertot = 0 + + if (state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag) &! U + + dble(sze_csf)*(N_st_diag*itermax) &! U_csf + + dble(sze)*(N_st_diag) &! W + + dble(sze_csf)*(N_st_diag*itermax) &! W_csf + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if (nproc_target == 0) then + call check_mem(r1,irp_here) + nproc_target = 1 + exit + endif + + if (r1+rss < qp_max_mem) then + exit + endif + + if (itermax > 4) then + itermax = itermax - 1 + else if (m==1.and.disk_based_davidson) then + m=0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + call write_int(6,N_st,'Number of states') + call write_int(6,N_st_diag,'Number of states in diagonalization') + call write_int(6,sze,'Number of determinants') + call write_int(6,sze_csf,'Number of CSFs') + call write_int(6,nproc_target,'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if (disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if (disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, W_csf, (/sze_csf,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag),W_csf(sze_csf,N_st_diag*itermax)) + endif + + allocate( & + ! Large + U(sze,N_st_diag), & + U_csf(sze_csf,N_st_diag*itermax), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax)) + + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1)) + do k=N_st+1,N_st_diag + do i=1,sze_csf + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st) + enddo + U_csf(k,k) = u_csf(k,k) + 10.d0 + enddo + do k=1,N_st_diag + call normalize(U_csf(1,k),sze_csf) + enddo + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1)) + + do while (.not.converged) + itertot = itertot+1 + if (itertot == 8) then + exit + endif + + do iter=1,itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + +! if ((iter > 1).or.(itertot == 1)) then + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + !call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift+1),U) + PROVIDE mo_two_e_integrals_in_map mo_integrals_map big_array_exchange_integrals + if ((sze > 100000).and.distributed_davidson) then + !call H_u_0_nstates_zmq (W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + deallocate(tmpW) + deallocate(tmpU) + else + !call H_u_0_nstates_openmp(W,U,N_st_diag,sze) + allocate(tmpW(N_st_diag,sze_csf)) + allocate(tmpU(N_st_diag,sze_csf)) + do kk=1,N_st_diag + do ii=1,sze_csf + tmpU(kk,ii) = U_csf(ii,shift+kk) + enddo + enddo + !tmpU =0.0d0 + !tmpU(1,2)=1.0d0 + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + !ticks_0 = irp_rdtsc() + call calculate_sigma_vector_cfg_nst_naive_store(tmpW,tmpU,N_st_diag,sze_csf,1,sze_csf,0,1) + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" + do kk=1,N_st_diag + do ii=1,sze_csf + W_csf(ii,shift+kk)=tmpW(kk,ii) + enddo + enddo + + !U_csf = 0.0d0 + !U_csf(1,1) = 1.0d0 + !u_in = 0.0d0 + !call convertWFfromCSFtoDET(N_st_diag,tmpU,U2) + !call H_u_0_nstates_openmp(u_in,U2,N_st_diag,sze) + !call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),W_csf2(1,1)) + !do i=1,sze_csf + ! print *,"I=",i," qp=",W_csf2(i,1)," my=",W_csf(i,1)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! if(dabs(dabs(W_csf2(i,1))-dabs(W_csf(i,1))) .gt. 1.0e-10)then + ! print *,"somo=",psi_configuration(1,1,i)," domo=",psi_configuration(1,2,i)," diff=",dabs(W_csf2(i,1))-dabs(W_csf(i,1)) + ! endif + !end do + !stop + deallocate(tmpW) + deallocate(tmpU) + endif +! else +! ! Already computed in update below +! continue +! endif + + if (dressing_state > 0) then + + if (N_st == 1) then + + l = dressed_column_idx(1) + double precision :: f + f = 1.0d0/psi_coef(l,1) + do istate=1,N_st_diag + do i=1,sze + W(i,istate) += dressing_column_h(i,1) *f * U(l,istate) + W(l,istate) += dressing_column_h(i,1) *f * U(i,istate) + enddo + + enddo + + else + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + psi_coef, size(psi_coef,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + dressing_column_h, size(dressing_column_h,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + + call dgemm('T','N', N_st, N_st_diag, sze, 1.d0, & + dressing_column_h, size(dressing_column_h,1), & + U(1,1), size(U,1), 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('N','N', sze, N_st_diag, N_st, 1.0d0, & + psi_coef, size(psi_coef,1), s_tmp, size(s_tmp,1), & + 1.d0, W(1,1), size(W,1)) + + endif + endif + + !call convertWFfromDETtoCSF(N_st_diag,W,W_csf(1,shift+1)) + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), W_csf, size(W_csf,1), & + 0.d0, h, size(h,1)) + call dgemm('T','N', shift2, shift2, sze_csf, & + 1.d0, U_csf, size(U_csf,1), U_csf, size(U_csf,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + ! Diagonalize h + ! --------------- + + integer :: lwork, info + double precision, allocatable :: work(:) + + y = h + lwork = -1 + allocate(work(1)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + lwork = int(work(1)) + deallocate(work) + allocate(work(lwork)) + call dsygv(1,'V','U',shift2,y,size(y,1), & + s_tmp,size(s_tmp,1), lambda, work,lwork,info) + deallocate(work) + if (info /= 0) then + stop 'DSYGV Diagonalization failed' + endif + + ! Compute Energy for each eigenvector + ! ----------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, h, size(h,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, h, size(h,1)) + + do k=1,shift2 + lambda(k) = h(k,k) + enddo + + if (state_following) then + + overlap = -1.d0 + do i=1,shift2 + do k=1,shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k=1,N_st + cmax = -1.d0 + do i=1,N_st + if (overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i=1,N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k=1,N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k=1,N_st + overlap(k,1) = lambda(k) + enddo + + endif + + + ! Express eigenvectors of h in the csf basis + ! ------------------------------------------ + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, U_csf, size(U_csf,1), y, size(y,1), 0.d0, U_csf(1,shift2+1), size(U_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,U_csf(1,shift2+1),U) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, & + 1.d0, W_csf, size(W_csf,1), y, size(y,1), 0.d0, W_csf(1,shift2+1), size(W_csf,1)) + call convertWFfromCSFtoDET(N_st_diag,W_csf(1,shift2+1),W) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !if (without_diagonal) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k=1,N_st_diag + do i=1,sze + U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) & + /max(H_jj(i) - lambda (k),1.d-2) + enddo + enddo + !$OMP END PARALLEL DO + !else + ! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + ! do k=1,N_st_diag + ! do i=1,sze + ! U(i,k) = (lambda(k) * U(i,k) - W(i,k) ) + ! enddo + ! enddo + ! !$OMP END PARALLEL DO + !endif + + do k=1,N_st + residual_norm(k) = u_dot_u(U(1,k),sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + enddo + call convertWFfromDETtoCSF(N_st_diag,U,U_csf(1,shift2+1)) + + if ((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if (iter > 1) then + if (threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson + endif + endif + + do k=1,N_st + if (residual_norm(k) > 1.d8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if (converged) then + exit + endif + + logical, external :: qp_stop + if (qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U + ! ------------- + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + W_csf, size(W_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + W_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,W_csf,W) + + call dgemm('N','N', sze_csf, N_st_diag, shift2, 1.d0, & + U_csf, size(U_csf,1), y, size(y,1), 0.d0, u_in, size(u_in,1)) + do k=1,N_st_diag + do i=1,sze_csf + U_csf(i,k) = u_in(i,k) + enddo + enddo + call convertWFfromCSFtoDET(N_st_diag,U_csf,U) + + enddo + + + call nullify_small_elements(sze,N_st_diag,U,size(U,1),threshold_davidson_pt2) + do k=1,N_st_diag + do i=1,sze + u_in(i,k) = U(i,k) + enddo + enddo + + do k=1,N_st_diag + energies(k) = lambda(k) + enddo + write_buffer = '======' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + if (disk_based)then + ! Remove temp files + integer, external :: getUnitAndOpen + call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w ) + fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r') + close(fd_w,status='delete') + else + deallocate(W, W_csf) + endif + + deallocate ( & + residual_norm, & + U, U_csf, overlap, & + h, y, s_tmp, & + lambda & + ) + FREE nthreads_davidson +end + + + + + + + diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 89e18d65..3020ecd8 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -89,7 +89,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N double precision, intent(out) :: energies(N_st_diag_in) integer :: iter, N_st_diag - integer :: i,j,k,l,m + integer :: i,j,k,l,m,kk logical, intent(inout) :: converged double precision, external :: u_dot_v, u_dot_u diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 68f3420d..0fd9f091 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -154,7 +154,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ character*(16384) :: write_buffer double precision :: to_print(3,N_st) double precision :: cpu, wall - integer :: shift, shift2, itermax, istate + integer :: shift, shift2, itermax, istate, ii double precision :: r1, r2, alpha logical :: state_ok(N_st_diag_in*davidson_sze_max) integer :: nproc_target @@ -361,7 +361,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ if ((sze > 100000).and.distributed_davidson) then call H_S2_u_0_nstates_zmq (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) else + double precision :: irp_rdtsc + double precision :: ticks_0, ticks_1 + integer*8 :: irp_imax + irp_imax = 1 + !ticks_0 = irp_rdtsc() call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + !ticks_1 = irp_rdtsc() + !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----" endif S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) else diff --git a/src/davidson/diagonalize_ci.irp.f b/src/davidson/diagonalize_ci.irp.f index 42f4fcdb..a34637a0 100644 --- a/src/davidson/diagonalize_ci.irp.f +++ b/src/davidson/diagonalize_ci.irp.f @@ -1,9 +1,20 @@ +BEGIN_PROVIDER [ character*(3), sigma_vector_algorithm ] + implicit none + BEGIN_DOC + ! If 'det', use in Davidson + ! + ! If 'cfg', use in Davidson + END_DOC + sigma_vector_algorithm = 'det' + !sigma_vector_algorithm = 'cfg' +END_PROVIDER BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ] implicit none BEGIN_DOC ! :c:data:`n_states` lowest eigenvalues of the |CI| matrix END_DOC + PROVIDE distributed_davidson integer :: j character*(8) :: st @@ -61,7 +72,7 @@ END_PROVIDER if (diag_algorithm == 'Davidson') then if (do_csf) then -! if (sigma_vector_algorithm == 'det') then + if (sigma_vector_algorithm == 'det') then call davidson_diag_H_csf (psi_det, & CI_eigenvectors, & size(CI_eigenvectors,1), & @@ -73,14 +84,14 @@ END_PROVIDER N_int, & 0, & converged) -! else if (sigma_vector_algorithm == 'cfg') then -! call davidson_diag_H_csf(psi_det,CI_eigenvectors, & -! size(CI_eigenvectors,1),CI_electronic_energy, & -! N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) -! else -! print *, irp_here -! stop 'bug' -! endif + else if (sigma_vector_algorithm == 'cfg') then + call davidson_diag_H_cfg(psi_det,CI_eigenvectors, & + size(CI_eigenvectors,1),CI_electronic_energy, & + N_det,N_csf,min(N_det,N_states),min(N_det,N_states_diag),N_int,0,converged) + else + print *, irp_here + stop 'bug' + endif else call davidson_diag_HS2(psi_det, & CI_eigenvectors, & diff --git a/src/determinants/EZFIO.cfg b/src/determinants/EZFIO.cfg index b9c736a9..c6323cd0 100644 --- a/src/determinants/EZFIO.cfg +++ b/src/determinants/EZFIO.cfg @@ -136,9 +136,8 @@ doc: If |true|, discard any Slater determinants with an interaction smaller than interface: ezfio,provider,ocaml default: False - -[thresh_save_wf] +[save_threshold] type: Threshold -doc: Thresholds to save wave function +doc: Cut-off to apply to the CI coefficients when the wave function is stored interface: ezfio,provider,ocaml -default: 1.e-15 +default: 1.e-14 diff --git a/src/dft_utils_func/ecmd_pbe_on_top.irp.f b/src/dft_utils_func/ecmd_pbe_on_top.irp.f index fd60f267..5370e0da 100644 --- a/src/dft_utils_func/ecmd_pbe_on_top.irp.f +++ b/src/dft_utils_func/ecmd_pbe_on_top.irp.f @@ -67,6 +67,7 @@ subroutine ec_md_on_top_PBE_mu_corrected(mu,r,two_dm,eps_c_md_on_top_PBE) double precision :: pi pi = 4.d0 * datan(1.d0) mu_correction_of_on_top = on_top / ( 1.d0 + 2.d0/(dsqrt(pi)*mu) ) +! mu_correction_of_on_top = on_top * dexp(-1.d0/(dsqrt(pi)*mu)) mu_correction_of_on_top = max(mu_correction_of_on_top ,1.d-15) end diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..129fb2ea --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys : Foster-Boys, pipek : Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option requieres a way smaller amount of memory but is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[security_mo_class] +type: logical +doc: If true, call abort if the number of active orbital or the number of core + active orbitals is equal to the number of molecular orbitals, else uses the actual mo_class. It is a security if you forget to set the mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..e9d51654 --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,2 @@ +hartree_fock +utils_trust_region diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..a142ec20 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,108 @@ +# mo_localization + +Some parameters can be changed with qp edit in the mo_localization section +(cf below). Similarly for the trust region parameters in the +utils_trust_region section. The localization without the trust region +is not available for the moment. + +The irf.f files can be generated from the org ones using emacs. +If you modify the .org files, don't forget to do (you need emacs): +``` +./TANGLE_org_mode.sh +ninja +``` + +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +After that the ezfio directory contains the localized MOs + +But to do so the mo_class must be defined before, run +``` +qp set_mo_class -q +``` +for more information or +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +to set the mo classes. We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. + +If you just on kind of mo class to localize all the MOs between them +you have to put: +``` +qp set mo_localization security_mo_class false +``` + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a rotation between the MOs. +This feature can be removed by setting: +``` +qp set mo_localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set mo_localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set mo_localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set mo_localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set mo_localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set mo_localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set mo_localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set mo_localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set mo_localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Further improvements: +- Cleaner repo +- Correction of the errors in the documentations +- option with/without trust region diff --git a/src/mo_localization/TANGLE_org_mode.sh b/src/mo_localization/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..6a1003df --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,42 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/break_spatial_sym.org b/src/mo_localization/break_spatial_sym.org new file mode 100644 index 00000000..5995b138 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.org @@ -0,0 +1,43 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbitals:' + print*, 'qp set orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..a0e5432d --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,62 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_gradient_loc.org b/src/mo_localization/debug_gradient_loc.org new file mode 100644 index 00000000..1fcb7fca --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..bfb1cbbc --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,62 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/debug_hessian_loc.org b/src/mo_localization/debug_hessian_loc.org new file mode 100644 index 00000000..bc23818c --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.org @@ -0,0 +1,64 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:,:), H2(:,:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list = list_act + list_size = dim_list_act_orb + + n = list_size*(list_size-1)/2 + + allocate(H(n,n),H2(n,n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i,i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i,i)) > threshold) then + print*,H(i,i) + nb_error = nb_error + 1 + if (dabs(H(i,i)) > max_elem) then + max_elem = H(i,i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..3ca90fb5 --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,31 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end diff --git a/src/mo_localization/kick_the_mos.org b/src/mo_localization/kick_the_mos.org new file mode 100644 index 00000000..3d57da37 --- /dev/null +++ b/src/mo_localization/kick_the_mos.org @@ -0,0 +1,33 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (dim_list_act_orb == mo_num .or. & + dim_list_core_orb + dim_list_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to kick all the orbital:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + + call apply_pre_rotation + +end +#+END_SRC diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..6d5cc876 --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,531 @@ +program localization + call run_localization +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif + +! Loc + + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization.org b/src/mo_localization/localization.org new file mode 100644 index 00000000..ad42ef74 --- /dev/null +++ b/src/mo_localization/localization.org @@ -0,0 +1,2860 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +qp set_mo_class -h + +*** Foster-Boys localization +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Locality of the orbitals: +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + call run_localization +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | integer | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:,:), tmp_m_x(:,:), tmp_x(:),W(:,:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method (with qp_edit) + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*, 'Security mo_class:', security_mo_class + + ! The default mo_classes are setted only if the MOs to localize are not specified + if (security_mo_class .and. (n_act_orb == mo_num .or. & + n_core_orb + n_act_orb == mo_num)) then + + print*, 'WARNING' + print*, 'You must set different mo_class with qp set_mo_class' + print*, 'If you want to localize all the orbitals:' + print*, 'qp set Orbital_optimization security_mo_class false' + print*, '' + print*, 'abort' + + call abort + + endif +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n, tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n,tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + j = key(i) + W(j,i) = 1d0 + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + integer, allocatable :: tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..39442a12 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2055 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'' + print*,'---End gradient_FB---' + print*,'' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'' + print*,'---End gradient_FB_omp---' + print*,'' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + print*,'' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'' + print*,'---End hessian_FB---' + print*,'' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do j = 1, tmp_n + do i = 1, tmp_n + H(i,j) = 0d0 + enddo + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Min elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + ! Max elem + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + ! Near 0 + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'' + print*,'---End hessian_FB_omp---' + print*,'' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'' + print*,'---End gradient_PM---' + print*,'' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) < max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Min elem H:', max_elem +! +! max_elem = 0d0 +! do tmp_k = 1, tmp_n +! if (H(tmp_k,tmp_k) > max_elem) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Max elem H:', max_elem +! +! max_elem = 1d10 +! do tmp_k = 1, tmp_n +! if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then +! max_elem = H(tmp_k,tmp_k) +! endif +! enddo +! print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n, tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + print*,'' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k,tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Min elem H:', max_elem + + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) > max_elem) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Max elem H:', max_elem + + max_elem = 1d10 + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) < ABS(max_elem)) then + max_elem = H(tmp_k,tmp_k) + endif + enddo + print*, 'Near 0 elem H:', max_elem + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'' + print*,'---End hessian_PM---' + print*,'' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + print*,'' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + print*,'' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + integer, allocatable :: tmp_list(:) + +! allocate(iorder(mo_num), fock_energies_tmp(mo_num), new_mo_coef(ao_num, mo_num)) +! +! do i = 1, mo_num +! fock_energies_tmp(i) = Fock_matrix_diag_mo(i) +! print*,'fock_energies_tmp(i) = ',fock_energies_tmp(i) +! iorder(i) = i +! enddo +! +! print*,'' +! print*,'Sorting by Fock energies' +! print*,'' +! +! call dsort(fock_energies_tmp, iorder, mo_num) +! +! do i = 1, mo_num +! k = iorder(i) +! print*,'fock_energies_new(i) = ',fock_energies_tmp(i) +! do j = 1, ao_num +! new_mo_coef(j,i) = mo_coef(j,k) +! enddo +! enddo + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ', trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f index 3642365e..2afdcf89 100644 --- a/src/mo_two_e_ints/core_quantities.irp.f +++ b/src/mo_two_e_ints/core_quantities.irp.f @@ -59,3 +59,45 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ] enddo END_PROVIDER + +BEGIN_PROVIDER [ double precision, h_act_ri, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Active Hamiltonian with 3-index exchange integrals: + ! + ! $\tilde{h}{pq} = h_{pq} - \frac{1}{2}\sum_{k} g(pk,kq)$ + END_DOC + + integer :: i,j, k + integer :: p,q, r + ! core-core contribution + h_act_ri = core_fock_operator + !print *,' Bef----hact(1,14)=',h_act_ri(4,14) + ! act-act contribution + do p=1,n_act_orb + j=list_act(p) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = mo_one_e_integrals(i,j) + enddo + do r=1,n_act_orb + k=list_act(r) + do q=1,n_act_orb + i=list_act(q) + h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + enddo + enddo + enddo + ! core-act contribution + !do p=1,n_act_orb + ! j=list_core(p) + ! do k=1,n_core_orb + ! do q=1,n_act_orb + ! i=list_act(q) + ! h_act_ri(i,j) = h_act_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + ! enddo + ! enddo + !enddo + !print *,' Aft----hact(1,14)=',h_act_ri(4,14), mo_one_e_integrals(4,14) +END_PROVIDER + diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 0d5f8542..4bfb2da3 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -49,6 +49,8 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) if(Ne(1)+Ne(2).ge.3)then !! ! alpha/alpha/beta three-body + double precision :: accu + accu = 0.d0 do i = 1, Ne(1) ii = occ(i,1) do j = i+1, Ne(1) @@ -60,11 +62,14 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR hthree += direct_int - exchange_int + accu += direct_int - exchange_int enddo enddo enddo + print*,'aab = ',accu ! beta/beta/alpha three-body + accu = 0.d0 do i = 1, Ne(2) ii = occ(i,2) do j = i+1, Ne(2) @@ -74,11 +79,14 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) hthree += direct_int - exchange_int + accu += direct_int - exchange_int enddo enddo enddo + print*,'abb = ',accu ! alpha/alpha/alpha three-body + accu = 0.d0 do i = 1, Ne(1) ii = occ(i,1) ! 1 do j = i+1, Ne(1) @@ -87,11 +95,14 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) mm = occ(m,1) ! 3 ! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS + accu += three_e_diag_parrallel_spin(mm,jj,ii) enddo enddo enddo + print*,'aaa = ',accu ! beta/beta/beta three-body + accu = 0.d0 do i = 1, Ne(2) ii = occ(i,2) ! 1 do j = i+1, Ne(2) @@ -100,9 +111,11 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) mm = occ(m,2) ! 3 ! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS + accu += three_e_diag_parrallel_spin(mm,jj,ii) enddo enddo enddo + print*,'bbb = ',accu endif end diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 406ee9e3..4ae44148 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -2,44 +2,79 @@ &BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)] &BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)] implicit none - integer :: it,n_real,degree,i - double precision :: e_before, e_current,thr, hmono,htwoe,hthree + integer :: it,n_real,degree,i,istate + double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:) double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det)) + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) + dressing_dets = 0.d0 do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo + reigvec_tc_bi_orth_tmp = 0.d0 do i = 1, N_det - e_corr_dets(i) = reigvec_tc_bi_orth(i,1) * h0j(i)/reigvec_tc_bi_orth(1,1) + reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) enddo - print*,'Starting from ',eigval_right_tc_bi_orth(1) + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + print*,'Diagonalizing the TC CISD ' + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + do i = 1, N_det + e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) + enddo + E_before = eigval_tmp(1) + print*,'Starting from ',E_before - e_before = 0.d0 e_current = 10.d0 thr = 1.d-5 it = 0 dressing_dets = 0.d0 + double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) + external htc_bi_ortho_calc_tdav + external htcdag_bi_ortho_calc_tdav + logical :: converged do while (dabs(E_before-E_current).gt.thr) it += 1 E_before = E_current - h_sc2 = htilde_matrix_elmt_bi_ortho +! h_sc2 = htilde_matrix_elmt_bi_ortho call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets) do i = 1, N_det - print*,'dressing_dets(i) = ',dressing_dets(i) +! print*,'dressing_dets(i) = ',dressing_dets(i) h_sc2(i,i) += dressing_dets(i) enddo - call non_hrmt_real_diag(N_det,h_sc2,& - leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& - n_real,eigval_right_tmp) + print*,'********************' + print*,'iteration ',it +! call non_hrmt_real_diag(N_det,h_sc2,& +! leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& +! n_real,eigval_right_tmp) +! print*,'eigval_right_tmp(1)',eigval_right_tmp(1) + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + print*,'outside Davidson' + print*,'eigval_tmp(1) = ',eigval_tmp(1) do i = 1, N_det + reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1) e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) enddo - E_current = eigval_right_tmp(1) +! E_current = eigval_right_tmp(1) + E_current = eigval_tmp(1) print*,'it, E(SC)^2 = ',it,E_current enddo eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states) diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index d585ce6c..a49a5958 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -13,105 +13,44 @@ program test_tc_fock !call routine_1 !call routine_2 - call routine_3() +! call routine_3() + call test_3e end ! --- -subroutine routine_0 +subroutine test_3e implicit none - use bitmasks ! you need to include the bitmasks_module.f90 features - integer :: i,a,j,m,i_ok - integer :: exc(0:2,2,2),h1,p1,s1,h2,p2,s2,degree - - integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,phase - double precision :: same, op, tot, accu - allocate(det_i(N_int,2)) - s1 = 1 - accu = 0.d0 - do i = 1, elec_alpha_num ! occupied - do a = elec_alpha_num+1, mo_num ! virtual - det_i = ref_bitmask - call do_single_excitation(det_i,i,a,s1,i_ok) - if(i_ok == -1)then - print*,'PB !!' - print*,i,a - stop - endif -! call debug_det(det_i,N_int) - call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - op = fock_3_mat_a_op_sh_bi_orth(a,i) - same = fock_3_mat_a_sa_sh_bi_orth(a,i) -! same = 0.d0 - tot = same + op - if(dabs(tot - phase*hthree).gt.1.d-10)then - print*,'------' - print*,i,a,phase - print*,'hthree = ',phase*hthree - print*,'fock = ',tot - print*,'same,op= ',same,op - print*,dabs(tot - phase*hthree) - stop - endif - accu += dabs(tot - phase*hthree) - enddo - enddo + double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu + double precision :: hmono, htwoe, hthree, htot + call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) +! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree) + print*,'hmono = ',hmono + print*,'htwoe = ',htwoe + print*,'hthree= ',hthree + print*,'htot = ',htot + print*,'' + print*,'' + print*,'TC_one= ',TC_HF_one_electron_energy + print*,'TC_two= ',TC_HF_two_e_energy + print*,'TC_3e = ',diag_three_elem_hf + print*,'TC_tot= ',TC_HF_energy + print*,'' + print*,'' + call give_aaa_contrib(integral_aaa) + print*,'integral_aaa = ',integral_aaa + call give_aab_contrib(integral_aab) + print*,'integral_aab = ',integral_aab + call give_abb_contrib(integral_abb) + print*,'integral_abb = ',integral_abb + call give_bbb_contrib(integral_bbb) + print*,'integral_bbb = ',integral_bbb + accu = integral_aaa + integral_aab + integral_abb + integral_bbb print*,'accu = ',accu + print*,'delta = ',hthree - accu -end subroutine routine_0 - -! --- - -subroutine routine_1 - - implicit none - integer :: i, a - double precision :: accu - - accu = 0.d0 - do i = 1, mo_num - do a = 1, mo_num - accu += dabs( fock_3_mat_a_op_sh_bi_orth_old(a,i) - fock_3_mat_a_op_sh_bi_orth(a,i) ) - !if(dabs( fock_3_mat_a_op_sh_bi_orth_old(a,i) - fock_3_mat_a_op_sh_bi_orth(a,i) ) .gt. 1.d-10)then - print*, i, a - print*, dabs( fock_3_mat_a_op_sh_bi_orth_old(a,i) - fock_3_mat_a_op_sh_bi_orth(a,i) ) & - , fock_3_mat_a_op_sh_bi_orth_old(a,i), fock_3_mat_a_op_sh_bi_orth(a,i) - !endif - enddo - enddo - - print *, 'accu = ', accu - -end subroutine routine_1 - -! --- - -subroutine routine_2 - - implicit none - integer :: i, a - double precision :: accu - - accu = 0.d0 - do i = 1, mo_num - do a = 1, mo_num - accu += dabs( fock_3_mat_a_sa_sh_bi_orth_old(a,i) - fock_3_mat_a_sa_sh_bi_orth(a,i) ) - !if(dabs( fock_3_mat_a_sa_sh_bi_orth_old(a,i) - fock_3_mat_a_sa_sh_bi_orth(a,i) ) .gt. 1.d-10)then - print*, i, a - print*, dabs( fock_3_mat_a_sa_sh_bi_orth_old(a,i) - fock_3_mat_a_sa_sh_bi_orth(a,i) ) & - , fock_3_mat_a_sa_sh_bi_orth_old(a,i), fock_3_mat_a_sa_sh_bi_orth(a,i) - !endif - enddo - enddo - - print *, 'accu = ', accu - -end subroutine routine_2 - -! --- +end subroutine routine_3() diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f index e9ad4ce5..f73a5049 100644 --- a/src/tc_scf/fock_three.irp.f +++ b/src/tc_scf/fock_three.irp.f @@ -74,39 +74,46 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf] implicit none integer :: i,j,k,ipoint,mm double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231 - if(.not.bi_ortho)then - if(three_body_h_tc)then - one_third = 1.d0/3.d0 - two_third = 2.d0/3.d0 - four_third = 4.d0/3.d0 + print*,'providing diag_three_elem_hf' + if(.not.three_body_h_tc)then diag_three_elem_hf = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do k = 1, elec_beta_num - call give_integrals_3_body(k,j,i,j,i,k,exchange_int_231) - diag_three_elem_hf += two_third * exchange_int_231 + else + if(.not.bi_ortho)then + one_third = 1.d0/3.d0 + two_third = 2.d0/3.d0 + four_third = 4.d0/3.d0 + diag_three_elem_hf = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + call give_integrals_3_body(k,j,i,j,i,k,exchange_int_231) + diag_three_elem_hf += two_third * exchange_int_231 + enddo enddo enddo - enddo - do mm = 1, 3 - do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & - -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & - -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) - contrib *= four_third - contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & - - four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) - diag_three_elem_hf += weight * contrib + do mm = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & + -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm) & + -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) + contrib *= four_third + contrib += -two_third * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm) & + - four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm) + diag_three_elem_hf += weight * contrib + enddo enddo - enddo - diag_three_elem_hf = - diag_three_elem_hf - else - diag_three_elem_hf = 0.D0 + diag_three_elem_hf = - diag_three_elem_hf + else + double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb + provide mo_l_coef mo_r_coef + call give_aaa_contrib(integral_aaa) + call give_aab_contrib(integral_aab) + call give_abb_contrib(integral_abb) + call give_bbb_contrib(integral_bbb) + diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb endif - else - diag_three_elem_hf = 0.D0 - endif + endif END_PROVIDER diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index 6960ebc2..279670b8 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -1,160 +1,178 @@ - -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh_bi_orth_old, (mo_num, mo_num)] - - BEGIN_DOC - ! Fock matrix for opposite spin contribution for bi ortho - END_DOC - - implicit none - integer :: j, m, i, a - double precision :: direct_int, exch_int - - fock_3_mat_a_op_sh_bi_orth_old = 0.d0 - - do i = 1, mo_num ! alpha single excitation - do a = 1, mo_num ! alpha single excitation - - ! --- - - do j = 1, elec_beta_num - do m = 1, elec_beta_num - call give_integrals_3_body_bi_ort(a, m, j, i, m, j, direct_int) - fock_3_mat_a_op_sh_bi_orth_old(a,i) += 1.d0 * direct_int - call give_integrals_3_body_bi_ort(a, m, j, j, m, i, exch_int) - fock_3_mat_a_op_sh_bi_orth_old(a,i) += -1.d0 * exch_int - enddo - enddo - - ! --- - - do j = 1, elec_beta_num ! beta - do m = j+1, elec_beta_num ! beta - call give_integrals_3_body_bi_ort(a, m, j, i, m, j, direct_int) - fock_3_mat_a_op_sh_bi_orth_old(a,i) += 1.d0 * direct_int - call give_integrals_3_body_bi_ort(a, m, j, i, j, m, exch_int) - fock_3_mat_a_op_sh_bi_orth_old(a,i) += -1.d0 * exch_int - enddo - enddo - - ! --- - +BEGIN_PROVIDER [ double precision, fock_a_abb_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_a_abb_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,beta,beta contribution + END_DOC + fock_a_abb_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_23_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_beta_num + do k = j+1, elec_beta_num + ! see contrib_3e_soo + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23 + fock_a_abb_3e_bi_orth_old(a,i) += direct_int - exch_23_int + enddo enddo - enddo - - fock_3_mat_a_op_sh_bi_orth_old = - fock_3_mat_a_op_sh_bi_orth_old + enddo + enddo + fock_a_abb_3e_bi_orth_old = - fock_a_abb_3e_bi_orth_old END_PROVIDER -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh_bi_orth, (mo_num, mo_num)] - - BEGIN_DOC - ! Fock matrix for opposite spin contribution for bi ortho - END_DOC - - implicit none - integer :: i, a - double precision :: integral1, integral2, integral3 - - fock_3_mat_a_op_sh_bi_orth = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, a, integral1, integral2, integral3) & - !$OMP SHARED (mo_num, fock_3_mat_a_op_sh_bi_orth) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num ! alpha single excitation - do a = 1, mo_num ! alpha single excitation - - call direct_term_imj_bi_ortho(a, i, integral1) - call exch_term_jmi_bi_ortho (a, i, integral2) - call exch_term_ijm_bi_ortho (a, i, integral3) - - fock_3_mat_a_op_sh_bi_orth(a,i) += 1.5d0 * integral1 - integral2 - 0.5d0 * integral3 +BEGIN_PROVIDER [ double precision, fock_a_aba_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_a_aba_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,beta contribution + END_DOC + fock_a_aba_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_13_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_alpha_num ! a + do k = 1, elec_beta_num ! b + ! a b a a b a + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 + fock_a_aba_3e_bi_orth_old(a,i) += direct_int - exch_13_int enddo + enddo + enddo - !$OMP END DO - !$OMP END PARALLEL - - fock_3_mat_a_op_sh_bi_orth = - fock_3_mat_a_op_sh_bi_orth - + enddo + fock_a_aba_3e_bi_orth_old = - fock_a_aba_3e_bi_orth_old END_PROVIDER -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_sa_sh_bi_orth_old, (mo_num, mo_num)] - - BEGIN_DOC - ! Fock matrix for same spin contribution for bi ortho - END_DOC - - implicit none - integer :: j, m, i, a - double precision :: direct_int, cyclic_1, cyclic_2, non_cyclic_1, non_cyclic_2, non_cyclic_3 - - fock_3_mat_a_sa_sh_bi_orth_old = 0.d0 - - do i = 1, mo_num - do a = 1, mo_num - do j = 1, elec_beta_num - do m = j+1, elec_beta_num - - call give_integrals_3_body_bi_ort(a, m, j, i, m, j, direct_int) - call give_integrals_3_body_bi_ort(a, m, j, j, i, m, cyclic_1) - call give_integrals_3_body_bi_ort(a, m, j, m, j, i, cyclic_2) - fock_3_mat_a_sa_sh_bi_orth_old(a,i) += direct_int + cyclic_1 + cyclic_2 - - call give_integrals_3_body_bi_ort(a, m, j, j, m, i, non_cyclic_1) - call give_integrals_3_body_bi_ort(a, m, j, i, j, m, non_cyclic_2) - call give_integrals_3_body_bi_ort(a, m, j, m, i, j, non_cyclic_3) - fock_3_mat_a_sa_sh_bi_orth_old(a,i) += -1.d0 * (non_cyclic_1 + non_cyclic_2 + non_cyclic_3) - - enddo - enddo +BEGIN_PROVIDER [ double precision, fock_a_aaa_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_a_aaa_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution + END_DOC + fock_a_aaa_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_alpha_num + do k = j+1, elec_alpha_num + ! positive terms :: cycle contrib + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > + call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > + fock_a_aaa_3e_bi_orth_old(a,i) += direct_int + c_3_int + c_minus_3_int + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 + fock_a_aaa_3e_bi_orth_old(a,i) += - exch_13_int - exch_23_int - exch_12_int enddo - enddo + enddo - fock_3_mat_a_sa_sh_bi_orth_old = -fock_3_mat_a_sa_sh_bi_orth_old + enddo + enddo + fock_a_aaa_3e_bi_orth_old = - fock_a_aaa_3e_bi_orth_old +END_PROVIDER + +BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC + ! fock_a_tot_3e_bi_orth_old = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions + END_DOC + fock_a_tot_3e_bi_orth_old = fock_a_abb_3e_bi_orth_old + fock_a_aba_3e_bi_orth_old + fock_a_aaa_3e_bi_orth_old END_PROVIDER -! --- - -BEGIN_PROVIDER [ double precision, fock_3_mat_a_sa_sh_bi_orth, (mo_num, mo_num)] - - BEGIN_DOC - ! Fock matrix for same spin contribution for bi ortho - END_DOC - - implicit none - integer :: j, m, i, a - double precision :: integral1, integral2, integral3, integral4 - - fock_3_mat_a_sa_sh_bi_orth = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, a, integral1, integral2, integral3, integral4) & - !$OMP SHARED (mo_num, fock_3_mat_a_sa_sh_bi_orth) - !$OMP DO SCHEDULE (dynamic) - do i = 1, mo_num - do a = 1, mo_num - call direct_term_imj_bi_ortho(a, i, integral1) - call cyclic_term_jim_bi_ortho(a, i, integral2) - call exch_term_jmi_bi_ortho (a, i, integral3) - call exch_term_ijm_bi_ortho (a, i, integral4) - fock_3_mat_a_sa_sh_bi_orth(a,i) += 0.5d0 * (integral1 - integral4) + integral2 - integral3 +BEGIN_PROVIDER [ double precision, fock_b_baa_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_b_baa_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,alpha contribution + END_DOC + fock_b_baa_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_23_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_alpha_num + do k = j+1, elec_alpha_num + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23 + fock_b_baa_3e_bi_orth_old(a,i) += direct_int - exch_23_int enddo + enddo + enddo - !$OMP END DO - !$OMP END PARALLEL - - fock_3_mat_a_sa_sh_bi_orth = -fock_3_mat_a_sa_sh_bi_orth - + enddo + fock_b_baa_3e_bi_orth_old = - fock_b_baa_3e_bi_orth_old END_PROVIDER -! --- +BEGIN_PROVIDER [ double precision, fock_b_bab_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_b_bab_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,beta contribution + END_DOC + fock_b_bab_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_13_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_beta_num + do k = 1, elec_alpha_num + ! b a b b a b + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 + fock_b_bab_3e_bi_orth_old(a,i) += direct_int - exch_13_int + enddo + enddo + enddo + enddo + fock_b_bab_3e_bi_orth_old = - fock_b_bab_3e_bi_orth_old +END_PROVIDER + +BEGIN_PROVIDER [ double precision, fock_b_bbb_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC +! fock_b_bbb_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution + END_DOC + fock_b_bbb_3e_bi_orth_old = 0.d0 + integer :: i,a,j,k + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_beta_num + do k = j+1, elec_beta_num + ! positive terms :: cycle contrib + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > + call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > + fock_b_bbb_3e_bi_orth_old(a,i) += direct_int + c_3_int + c_minus_3_int + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 + fock_b_bbb_3e_bi_orth_old(a,i) += - exch_13_int - exch_23_int - exch_12_int + enddo + enddo + + enddo + enddo + fock_b_bbb_3e_bi_orth_old = - fock_b_bbb_3e_bi_orth_old +END_PROVIDER + +BEGIN_PROVIDER [ double precision, fock_b_tot_3e_bi_orth_old, (mo_num, mo_num)] + implicit none + BEGIN_DOC + ! fock_b_tot_3e_bi_orth_old = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions + END_DOC + fock_b_tot_3e_bi_orth_old = fock_b_bbb_3e_bi_orth_old + fock_b_bab_3e_bi_orth_old + fock_b_baa_3e_bi_orth_old + +END_PROVIDER diff --git a/src/tc_scf/fock_three_bi_ortho_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new.irp.f deleted file mode 100644 index 004a2aa4..00000000 --- a/src/tc_scf/fock_three_bi_ortho_new.irp.f +++ /dev/null @@ -1,177 +0,0 @@ -BEGIN_PROVIDER [ double precision, fock_a_abb_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_a_abb_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,beta,beta contribution - END_DOC - fock_a_abb_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_23_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_beta_num - do k = j+1, elec_beta_num - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23 - fock_a_abb_3e_bi_orth(a,i) += direct_int - exch_23_int - enddo - enddo - - enddo - enddo - fock_a_abb_3e_bi_orth = - fock_a_abb_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_a_aba_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_a_aba_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,beta contribution - END_DOC - fock_a_aba_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_13_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_alpha_num ! a - do k = 1, elec_beta_num ! b - ! a b a a b a - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 - fock_a_aba_3e_bi_orth(a,i) += direct_int - exch_13_int - enddo - enddo - - enddo - enddo - fock_a_aba_3e_bi_orth = - fock_a_aba_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_a_aaa_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_a_aaa_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution - END_DOC - fock_a_aaa_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_alpha_num - do k = j+1, elec_alpha_num - ! positive terms :: cycle contrib - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > - call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > - fock_a_aaa_3e_bi_orth(a,i) += direct_int + c_3_int + c_minus_3_int - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 - fock_a_aaa_3e_bi_orth(a,i) += - exch_13_int - exch_23_int - exch_12_int - enddo - enddo - - enddo - enddo - fock_a_aaa_3e_bi_orth = - fock_a_aaa_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC - ! fock_a_tot_3e_bi_orth = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions - END_DOC - fock_a_tot_3e_bi_orth = fock_a_abb_3e_bi_orth + fock_a_aba_3e_bi_orth + fock_a_aaa_3e_bi_orth - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_b_baa_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_b_baa_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,alpha contribution - END_DOC - fock_b_baa_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_23_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_alpha_num - do k = j+1, elec_alpha_num - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23 - fock_b_baa_3e_bi_orth(a,i) += direct_int - exch_23_int - enddo - enddo - - enddo - enddo - fock_b_baa_3e_bi_orth = - fock_b_baa_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_b_bab_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_b_bab_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,beta contribution - END_DOC - fock_b_bab_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_13_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_beta_num - do k = 1, elec_alpha_num - ! b a b b a b - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 - fock_b_bab_3e_bi_orth(a,i) += direct_int - exch_13_int - enddo - enddo - - enddo - enddo - fock_b_bab_3e_bi_orth = - fock_b_bab_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_b_bbb_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC -! fock_b_bbb_3e_bi_orth(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution - END_DOC - fock_b_bbb_3e_bi_orth = 0.d0 - integer :: i,a,j,k - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - do i = 1, mo_num - do a = 1, mo_num - - do j = 1, elec_beta_num - do k = j+1, elec_beta_num - ! positive terms :: cycle contrib - call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > - call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > - call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > - fock_b_bbb_3e_bi_orth(a,i) += direct_int + c_3_int + c_minus_3_int - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 - fock_b_bbb_3e_bi_orth(a,i) += - exch_13_int - exch_23_int - exch_12_int - enddo - enddo - - enddo - enddo - fock_b_bbb_3e_bi_orth = - fock_b_bbb_3e_bi_orth -END_PROVIDER - -BEGIN_PROVIDER [ double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)] - implicit none - BEGIN_DOC - ! fock_b_tot_3e_bi_orth = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions - END_DOC - fock_b_tot_3e_bi_orth = fock_b_bbb_3e_bi_orth + fock_b_bab_3e_bi_orth + fock_b_baa_3e_bi_orth - -END_PROVIDER diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f new file mode 100644 index 00000000..b0345957 --- /dev/null +++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f @@ -0,0 +1,202 @@ + +BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_sss, contrib_sos, contrib_soo,contrib + fock_a_tot_3e_bi_orth = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i) + fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i) + fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i) + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_sss, contrib_sos, contrib_soo,contrib + fock_b_tot_3e_bi_orth = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i) + fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i) + fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i) + enddo + enddo +END_PROVIDER + +BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_sss, contrib_sos, contrib_soo, contrib + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + double precision :: new + fock_cs_3e_bi_orth = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_beta_num + do k = 1, elec_beta_num +! call contrib_3e_sss(a,i,j,k,contrib_sss) +! call contrib_3e_soo(a,i,j,k,contrib_soo) +! call contrib_3e_sos(a,i,j,k,contrib_sos) +! contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos + + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > + call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 + new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) & + -1.5d0 * exch_13_int - exch_23_int + fock_cs_3e_bi_orth(a,i) += new + enddo + enddo + + enddo + enddo + fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_sss, contrib_sos, contrib_soo, contrib + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + double precision :: new + fock_a_tmp1_bi_ortho = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + + do j = elec_beta_num + 1, elec_alpha_num + do k = 1, elec_beta_num + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > + call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 + fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) & + + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int) + enddo + enddo + + enddo + enddo + fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho +END_PROVIDER + +BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_sss + fock_a_tmp2_bi_ortho = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + do j = 1, elec_alpha_num + do k = elec_beta_num+1, elec_alpha_num + call contrib_3e_sss(a,i,j,k,contrib_sss) + fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss + enddo + enddo + enddo + enddo +END_PROVIDER + + + + + +BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int + double precision :: new + fock_b_tmp1_bi_ortho = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + + do j = 1, elec_beta_num + do k = elec_beta_num+1, elec_alpha_num + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int + enddo + enddo + + enddo + enddo + fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho +END_PROVIDER + +BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)] + implicit none + integer :: i,a,j,k + double precision :: contrib_soo + fock_b_tmp2_bi_ortho = 0.d0 + do i = 1, mo_num + do a = 1, mo_num + do j = elec_beta_num + 1, elec_alpha_num + do k = 1, elec_alpha_num + call contrib_3e_soo(a,i,j,k,contrib_soo) + fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo + enddo + enddo + enddo + enddo +END_PROVIDER + +subroutine contrib_3e_sss(a,i,j,k,integral) + implicit none + integer, intent(in) :: a,i,j,k + BEGIN_DOC + ! returns the pure same spin contribution to F(a,i) from two orbitals j,k + END_DOC + double precision, intent(out) :: integral + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > + call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > + integral = direct_int + c_3_int + c_minus_3_int + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 + integral += - exch_13_int - exch_23_int - exch_12_int + integral = -integral +end + +subroutine contrib_3e_soo(a,i,j,k,integral) + implicit none + integer, intent(in) :: a,i,j,k + BEGIN_DOC + ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k + END_DOC + double precision, intent(out) :: integral + double precision :: direct_int, exch_23_int + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23 + integral = direct_int - exch_23_int + integral = -integral +end + +subroutine contrib_3e_sos(a,i,j,k,integral) + implicit none + integer, intent(in) :: a,i,j,k + BEGIN_DOC + ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k + END_DOC + double precision, intent(out) :: integral + double precision :: direct_int, exch_13_int + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j > + call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 + integral = direct_int - exch_13_int + integral = -integral +end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index bb382c7c..4a875b59 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -81,8 +81,8 @@ subroutine routine_scf() print*,'TC HF total energy = ', TC_HF_energy print*,'TC HF 1 e energy = ', TC_HF_one_electron_energy print*,'TC HF 2 e energy = ', TC_HF_two_e_energy - if(.not. bi_ortho)then - print*,'TC HF 3 body = ', diag_three_elem_hf + if(three_body_h_tc)then + print*,'TC HF 3 body = ', diag_three_elem_hf endif print*,'***' e_delta = 10.d0 @@ -124,6 +124,9 @@ subroutine routine_scf() print*,'TC HF total energy = ', TC_HF_energy print*,'TC HF 1 e energy = ', TC_HF_one_electron_energy print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy + if(three_body_h_tc)then + print*,'TC HF 3 body = ', diag_three_elem_hf + endif print*,'***' e_delta = dabs( TC_HF_energy - e_save ) print*, 'it, delta E = ', it, e_delta diff --git a/src/tc_scf/three_e_energy_bi_ortho.irp.f b/src/tc_scf/three_e_energy_bi_ortho.irp.f new file mode 100644 index 00000000..64212da8 --- /dev/null +++ b/src/tc_scf/three_e_energy_bi_ortho.irp.f @@ -0,0 +1,174 @@ + +subroutine contrib_3e_diag_sss(i,j,k,integral) + implicit none + integer, intent(in) :: i,j,k + BEGIN_DOC + ! returns the pure same spin contribution to diagonal matrix element of 3e term + END_DOC + double precision, intent(out) :: integral + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > + call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > + call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > + integral = direct_int + c_3_int + c_minus_3_int + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 + integral += - exch_13_int - exch_23_int - exch_12_int + integral = -integral +end + +subroutine contrib_3e_diag_soo(i,j,k,integral) + implicit none + integer, intent(in) :: i,j,k + BEGIN_DOC + ! returns the pure same spin contribution to diagonal matrix element of 3e term + END_DOC + double precision, intent(out) :: integral + double precision :: direct_int, exch_23_int + call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j > + call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23 + integral = direct_int - exch_23_int + integral = -integral +end + + +subroutine give_aaa_contrib_bis(integral_aaa) + implicit none + double precision, intent(out) :: integral_aaa + double precision :: integral + integer :: i,j,k + integral_aaa = 0.d0 + do i = 1, elec_alpha_num + do j = i+1, elec_alpha_num + do k = j+1, elec_alpha_num + call contrib_3e_diag_sss(i,j,k,integral) + integral_aaa += integral + enddo + enddo + enddo + +end + +subroutine give_aaa_contrib(integral_aaa) + implicit none + double precision, intent(out) :: integral_aaa + double precision :: integral + integer :: i,j,k + integral_aaa = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_alpha_num + do k = 1, elec_alpha_num + call contrib_3e_diag_sss(i,j,k,integral) + integral_aaa += integral + enddo + enddo + enddo + integral_aaa *= 1.d0/6.d0 +end + + +subroutine give_aab_contrib(integral_aab) + implicit none + double precision, intent(out) :: integral_aab + double precision :: integral + integer :: i,j,k + integral_aab = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_alpha_num + do k = 1, elec_alpha_num + call contrib_3e_diag_soo(i,j,k,integral) + integral_aab += integral + enddo + enddo + enddo + integral_aab *= 0.5d0 +end + + +subroutine give_aab_contrib_bis(integral_aab) + implicit none + double precision, intent(out) :: integral_aab + double precision :: integral + integer :: i,j,k + integral_aab = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_alpha_num + do k = j+1, elec_alpha_num + call contrib_3e_diag_soo(i,j,k,integral) + integral_aab += integral + enddo + enddo + enddo +end + + +subroutine give_abb_contrib(integral_abb) + implicit none + double precision, intent(out) :: integral_abb + double precision :: integral + integer :: i,j,k + integral_abb = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + call contrib_3e_diag_soo(i,j,k,integral) + integral_abb += integral + enddo + enddo + enddo + integral_abb *= 0.5d0 +end + +subroutine give_abb_contrib_bis(integral_abb) + implicit none + double precision, intent(out) :: integral_abb + double precision :: integral + integer :: i,j,k + integral_abb = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + do k = j+1, elec_beta_num + call contrib_3e_diag_soo(i,j,k,integral) + integral_abb += integral + enddo + enddo + enddo +end + +subroutine give_bbb_contrib_bis(integral_bbb) + implicit none + double precision, intent(out) :: integral_bbb + double precision :: integral + integer :: i,j,k + integral_bbb = 0.d0 + do i = 1, elec_beta_num + do j = i+1, elec_beta_num + do k = j+1, elec_beta_num + call contrib_3e_diag_sss(i,j,k,integral) + integral_bbb += integral + enddo + enddo + enddo + +end + +subroutine give_bbb_contrib(integral_bbb) + implicit none + double precision, intent(out) :: integral_bbb + double precision :: integral + integer :: i,j,k + integral_bbb = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + call contrib_3e_diag_sss(i,j,k,integral) + integral_bbb += integral + enddo + enddo + enddo + integral_bbb *= 1.d0/6.d0 +end + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index c2bff2e8..f68465c7 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,13 +86,11 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 p = 1.d+15 - P_new = 0.d0 - iorder = 0 fact_k = 0.d0 return endif @@ -129,6 +127,91 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, end +!--- + +subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points) + BEGIN_DOC + ! Transforms the product of + ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) + ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) + ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) + ! + ! WARNING :: : IF fact_k is too smal then: + ! returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + END_DOC + implicit none + include 'constants.include.F' + integer, intent(in) :: n_points, ldp + integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) + double precision, intent(in) :: alpha, beta ! exponents + double precision, intent(in) :: A_center(n_points,3) ! A center + double precision, intent(in) :: B_center (3) ! B center + double precision, intent(out) :: P_center(n_points,3) ! new center + double precision, intent(out) :: p ! new exponent + double precision, intent(out) :: fact_k(n_points) ! constant factor + double precision, intent(out) :: P_new(n_points,0:ldp,3)! polynomial + integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials + + double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) + + integer :: n_new,i,j, ipoint, lda, ldb, xyz + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) + + if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) + + lda = maxval(a) + ldb = 0 + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3)) + + call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) + + iorder(1:3) = a(1:3) + do ipoint=1,n_points + do xyz=1,3 + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) + do i=1,a(xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) + enddo + enddo + enddo + + return + + endif + + lda = maxval(a) + ldb = maxval(b) + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3)) + + call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) + + iorder(1:3) = a(1:3) + b(1:3) + + do xyz=1,3 + if (b(xyz) == 0) then + do ipoint=1,n_points + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) + do i=1,a(xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) + enddo + enddo + else + do i=0,iorder(xyz) + do ipoint=1,n_points + P_new(ipoint,i,xyz) = 0.d0 + enddo + enddo + call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points) + endif + enddo + +end + +!- subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) BEGIN_DOC @@ -231,6 +314,59 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp) xp(3) = (a*xa(3)+b*xb(3))*p_inv end subroutine +!--- +subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D. + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + ! Using multiple A centers + END_DOC + + integer, intent(in) :: n_points + double precision, intent(in) :: a,b ! Exponents + double precision, intent(in) :: xa(n_points,3),xb(3) ! Centers + double precision, intent(out) :: p ! New exponent + double precision, intent(out) :: xp(n_points,3) ! New center + double precision, intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + + integer :: ipoint + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab(3), ab, ap, bp, bpxb(3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + ap = a*p_inv + bp = b*p_inv + bpxb(1) = bp*xb(1) + bpxb(2) = bp*xb(2) + bpxb(3) = bp*xb(3) + + do ipoint=1,n_points + xab(1) = xa(ipoint,1)-xb(1) + xab(2) = xa(ipoint,2)-xb(2) + xab(3) = xa(ipoint,3)-xb(3) + k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3)) + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + xp(ipoint,1) = 0.d0 + xp(ipoint,2) = 0.d0 + xp(ipoint,3) = 0.d0 + else + k(ipoint) = dexp(-k(ipoint)) + xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1) + xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2) + xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3) + endif + enddo +end subroutine + @@ -269,6 +405,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) end subroutine +!- + +subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D with multiple xa + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + END_DOC + + integer, intent(in) :: n_points + double precision , intent(in) :: a,b ! Exponents + double precision , intent(in) :: xa(n_points),xb ! Centers + double precision , intent(out) :: p(n_points) ! New exponent + double precision , intent(out) :: xp(n_points) ! New center + double precision , intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + integer :: ipoint + + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab, ab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + do ipoint = 1, n_points + xab = xa(ipoint)-xb + k(ipoint) = ab*xab*xab + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + cycle + endif + k(ipoint) = exp(-k(ipoint)) + xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv + enddo +end subroutine + + @@ -313,6 +489,45 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) end +subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) + implicit none + BEGIN_DOC + ! Multiply pairs of polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc, n_points + integer, intent(in) :: nd + double precision, intent(in) :: b(n_points,0:nb), c(n_points,0:nc) + double precision, intent(inout) :: d(n_points,0:nd) + + integer :: ib, ic, id, k, ipoint + if (nd < nb+nc) then + print *, nd, nb, nc + print *, irp_here, ': nd < nb+nc' + stop 1 + endif + + do ic = 0,nc + do ipoint=1, n_points + d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0) + enddo + enddo + + do ib=1,nb + do ipoint=1, n_points + d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib) + enddo + do ic = 1,nc + do ipoint=1, n_points + d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib) + enddo + enddo + enddo + +end + + subroutine add_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -404,22 +619,152 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b) do i = minab+1,min(b,20) P_new2(i) = binom_transp(b-i,b) * pows_b(b-i) enddo - do i = 101,a + do i = 21,a P_new(i) = binom_func(a,a-i) * pows_a(a-i) enddo - do i = 101,b + do i = 21,b P_new2(i) = binom_func(b,b-i) * pows_b(b-i) enddo end -subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) +!- +subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials + END_DOC + integer, intent(in) :: a(3),b(3), n_points, lda, ldb + double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3) + double precision :: binom_func + integer :: i,j,k,l, minab(3), maxab(3),ipoint, xyz + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa, fb + + maxab(1:3) = max(a(1:3),b(1:3)) + minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + + do xyz=1,3 + if ((a(xyz)<0).or.(b(xyz)<0) ) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) + P_new2(ipoint,0,xyz) = pows_b(ipoint,b(xyz)) + enddo + do i = 1,min(minab(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = minab(xyz)+1,min(b(xyz),20) + fb = binom_transp(b(xyz)-i,b(xyz)) + do ipoint=1,n_points + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,b(xyz) + fb = binom_func(b(xyz),b(xyz)-i) + do ipoint=1,n_points + P_new2(ipoint,i,xyz) = fb * pows_b(ipoint,b(xyz)-i) + enddo + enddo + enddo +end + + +subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points) + implicit none + BEGIN_DOC + ! Recenter two polynomials. Special case for b=(0,0,0) + END_DOC + integer, intent(in) :: a(3), n_points, lda + double precision, intent(in) :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3) + double precision, intent(out) :: P_new(n_points,0:lda,3),P_new2(n_points,3) + double precision :: binom_func + integer :: i,j,k,l, xyz, ipoint, maxab(3) + double precision, allocatable :: pows_a(:,:), pows_b(:,:) + double precision :: fa + + maxab(1:3) = max(a(1:3),(/0,0,0/)) + + allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) ) + + do xyz=1,3 + if (a(xyz)<0) cycle + do ipoint=1,n_points + pows_a(ipoint,0) = 1.d0 + pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz)) + pows_b(ipoint,0) = 1.d0 + pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz)) + enddo + do i = 2,maxab(xyz) + do ipoint=1,n_points + pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1) + pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1) + enddo + enddo + do ipoint=1,n_points + P_new (ipoint,0,xyz) = pows_a(ipoint,a(xyz)) + P_new2(ipoint,xyz) = pows_b(ipoint,0) + enddo + do i = 1,min(a(xyz),20) + fa = binom_transp(a(xyz)-i,a(xyz)) + do ipoint=1,n_points + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + do i = 21,a(xyz) + fa = binom_func(a(xyz),a(xyz)-i) + do ipoint=1,n_points + P_new (ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i) + enddo + enddo + + enddo !xyz + + deallocate(pows_a, pows_b) +end + +!-- +!-- + +subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] + ! [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] + ! [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ] ! END_DOC @@ -437,7 +782,7 @@ subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol) do i = 1, 3 Lmax = iorder(i) - call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) + call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) ) enddo return @@ -445,14 +790,14 @@ end subroutine pol_modif_center -subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) +subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol) BEGIN_DOC - ! + ! ! Transform the pol centerd on A: - ! [ \sum_i ax_i (x-x_A)^i ] + ! [ \sum_i ax_i (x-x_A)^i ] ! to a pol centered on B - ! [ \sum_i bx_i (x-x_B)^i ] + ! [ \sum_i bx_i (x-x_B)^i ] ! ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ] ! = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i) @@ -591,7 +936,7 @@ double precision function rint_sum(n_pt_out,rho,d1) u_inv=1.d0/dsqrt(rho) u=rho*u_inv rint_sum=0.5d0*u_inv*sqpi*derf(u) *d1(0) -! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) +! print *, 0, d1(0), 0.5d0*u_inv*sqpi*derf(u) endif do i=2,n_pt_out,2 diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 809f594b..2f84e753 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1136,6 +1136,104 @@ subroutine ortho_svd(A,LDA,m,n) end +! QR to orthonormalize CSFs does not work :-( +!subroutine ortho_qr_withB(A,LDA,B,m,n) +! implicit none +! BEGIN_DOC +! ! Orthogonalization using Q.R factorization +! ! +! ! A : Overlap Matrix +! ! +! ! LDA : leftmost dimension of A +! ! +! ! m : Number of rows of A +! ! +! ! n : Number of columns of A +! ! +! ! B : Output orthogonal basis +! ! +! END_DOC +! integer, intent(in) :: m,n, LDA +! double precision, intent(inout) :: A(LDA,n) +! double precision, intent(inout) :: B(LDA,n) +! +! integer :: LWORK, INFO +! integer, allocatable :: jpvt(:) +! double precision, allocatable :: TAU(:), WORK(:) +! double precision, allocatable :: C(:,:) +! double precision :: norm +! integer :: i,j +! +! allocate (TAU(min(m,n)), WORK(1)) +! allocate (jpvt(n)) +! !print *," In function ortho" +! B = A +! +! jpvt(1:n)=1 +! +! LWORK=-1 +! call dgeqp3( m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! +! ! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! LWORK=max(n,int(WORK(1))) +! +! deallocate(WORK) +! allocate(WORK(LWORK)) +! call dgeqp3(m, n, A, LDA, jpvt, TAU, WORK, LWORK, INFO ) +! print *,A +! print *,jpvt +! deallocate(WORK,TAU) +! !stop +! +! !LWORK=-1 +! !call dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dgeqrf(m, n, A, LDA, TAU, WORK, LWORK, INFO ) +! +! !LWORK=-1 +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! !! /!\ int(WORK(1)) becomes negative when WORK(1) > 2147483648 +! !LWORK=max(n,int(WORK(1))) +! +! !deallocate(WORK) +! !allocate(WORK(LWORK)) +! !call dorgqr(m, n, n, A, LDA, TAU, WORK, LWORK, INFO) +! ! +! !allocate(C(LDA,n)) +! !call dgemm('N','N',m,n,n,1.0d0,B,LDA,A,LDA,0.0d0,C,LDA) +! !norm = 0.0d0 +! !B = 0.0d0 +! !!print *,C +! !do i=1,m +! ! norm = 0.0d0 +! ! do j=1,n +! ! norm = norm + C(j,i)*C(j,i) +! ! end do +! ! norm = 1.0d0/dsqrt(norm) +! ! do j=1,n +! ! B(j,i) = C(j,i) +! ! end do +! !end do +! !print *,B +! +! +! !deallocate(WORK,TAU) +!end + +!subroutine ortho_qr_csf(A, LDA, B, m, n) bind(C, name="ortho_qr_csf") +! use iso_c_binding +! integer(c_int32_t), value :: LDA +! integer(c_int32_t), value :: m +! integer(c_int32_t), value :: n +! integer(c_int16_t) :: A(LDA,n) +! integer(c_int16_t) :: B(LDA,n) +! call ortho_qr_withB(A,LDA,B,m,n) +!end subroutine ortho_qr_csf + subroutine ortho_qr(A,LDA,m,n) implicit none BEGIN_DOC diff --git a/src/utils/loc.f b/src/utils/loc.f index 6e5d7345..02693281 100644 --- a/src/utils/loc.f +++ b/src/utils/loc.f @@ -1,6 +1,6 @@ c************************************************************************ subroutine maxovl(n,m,s,t,w) -C +C C This subprogram contains an iterative procedure to find the C unitary transformation of a set of n vectors which maximizes C the sum of their square overlaps with a set of m reference @@ -10,7 +10,7 @@ C S: overlap matrix C T: rotation matrix C W: new overlap matrix C -C +C implicit real*8(a-h,o-y),logical*1(z) ! parameter (id1=700) ! dimension s(id1,id1),t(id1,id1),w(id1,id1) @@ -29,23 +29,26 @@ C conv=1.d-6 * 5x,'following the principle of maximum overlap with a set of', * i3,' reference vectors'/5x,'required convergence on rotation ', * 'angle =',f13.10///5x,'Starting overlap matrix'/) - do 6 i=1,m - write (6,145) i - 6 write (6,150) (s(i,j),j=1,n) + do i=1,m + write (6,145) i + write (6,150) (s(i,j),j=1,n) + end do 8 mm=m-1 if (m.lt.n) mm=m iter=0 - do 20 j=1,n - do 16 i=1,n - t(i,j)=0.d0 - 16 continue - do 18 i=1,m - 18 w(i,j)=s(i,j) - 20 t(j,j)=1.d0 + do j=1,n + do i=1,n + t(i,j)=0.d0 + end do + do i=1,m + w(i,j)=s(i,j) + enddo + t(j,j)=1.d0 + enddo sum=0.d0 - do 10 i=1,m - sum=sum+s(i,i)*s(i,i) - 10 continue + do i=1,m + sum=sum+s(i,i)*s(i,i) + end do sum=sum/m if (zprt) write (6,12) sum 12 format (//5x,'Average square overlap =',f10.6) @@ -54,18 +57,18 @@ C conv=1.d-6 j=1 21 if (j.ge.last) goto 30 sum=0.d0 - - do 22 i=1,n - 22 sum=sum+s(i,j)*s(i,j) + do i=1,n + sum=sum+s(i,j)*s(i,j) + enddo if (sum.gt.small) goto 28 - do 24 i=1,n - sij=s(i,j) - s(i,j)=-s(i,last) - s(i,last)=sij - tij=t(i,j) - t(i,j)=-t(i,last) - t(i,last)=tij - 24 continue + do i=1,n + sij=s(i,j) + s(i,j)=-s(i,last) + s(i,last)=sij + tij=t(i,j) + t(i,j)=-t(i,last) + t(i,last)=tij + end do last=last-1 goto 21 28 j=j+1 @@ -101,17 +104,18 @@ C conv=1.d-6 sine=1.d0 34 delta=sine*(a*sine+b*cosine) if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta - do 35 k=1,m - p=s(k,i)*cosine-s(k,j)*sine - q=s(k,i)*sine+s(k,j)*cosine - s(k,i)=p - 35 s(k,j)=q - do 40 k=1,n - p=t(k,i)*cosine-t(k,j)*sine - q=t(k,i)*sine+t(k,j)*cosine - t(k,i)=p - t(k,j)=q - 40 continue + do k=1,m + p=s(k,i)*cosine-s(k,j)*sine + q=s(k,i)*sine+s(k,j)*cosine + s(k,i)=p + s(k,j)=q + enddo + do k=1,n + p=t(k,i)*cosine-t(k,j)*sine + q=t(k,i)*sine+t(k,j)*cosine + t(k,i)=p + t(k,j)=q + enddo 45 d=dabs(sine) if (d.le.amax) goto 50 imax=i @@ -132,43 +136,50 @@ C conv=1.d-6 * 'in subroutine maxovl ***'//) stop 100 continue - do 120 j=1,n - if (s(j,j).gt.0.d0) goto 120 - do 105 i=1,m - 105 s(i,j)=-s(i,j) - do 110 i=1,n - 110 t(i,j)=-t(i,j) - 120 continue + do j=1,n + if (s(j,j).gt.0.d0) cycle + do i=1,m + s(i,j)=-s(i,j) + enddo + do i=1,n + t(i,j)=-t(i,j) + enddo + enddo sum=0.d0 - do 125 i=1,m - 125 sum=sum+s(i,i)*s(i,i) + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m - do 122 i=1,m - do 122 j=1,n - sw=s(i,j) - s(i,j)=w(i,j) - 122 w(i,j)=sw + do i=1,m + do j=1,n + sw=s(i,j) + s(i,j)=w(i,j) + w(i,j)=sw + enddo + enddo if (.not.zprt) return write (6,12) sum write (6,130) 130 format (//5x,'transformation matrix') - do 140 i=1,n - write (6,145) i - 140 write (6,150) (t(i,j),j=1,n) + do i=1,n + write (6,145) i + write (6,150) (t(i,j),j=1,n) + enddo 145 format (i8) 150 format (2x,10f12.8) write (6,160) 160 format (//5x,'new overlap matrix'/) - do 170 i=1,m - write (6,145) i - 170 write (6,150) (w(i,j),j=1,n) + do i=1,m + write (6,145) i + write (6,150) (w(i,j),j=1,n) + enddo return end c************************************************************************ subroutine maxovl_no_print(n,m,s,t,w) -C +C C This subprogram contains an iterative procedure to find the C unitary transformation of a set of n vectors which maximizes C the sum of their square overlaps with a set of m reference @@ -178,7 +189,7 @@ C S: overlap matrix C T: rotation matrix C W: new overlap matrix C -C +C implicit real*8(a-h,o-y),logical*1(z) parameter (id1=300) dimension s(id1,id1),t(id1,id1),w(id1,id1) @@ -193,17 +204,19 @@ C conv=1.d-6 8 mm=m-1 if (m.lt.n) mm=m iter=0 - do 20 j=1,n - do 16 i=1,n - t(i,j)=0.d0 - 16 continue - do 18 i=1,m - 18 w(i,j)=s(i,j) - 20 t(j,j)=1.d0 + do j=1,n + do i=1,n + t(i,j)=0.d0 + enddo + do i=1,m + w(i,j)=s(i,j) + enddo + t(j,j)=1.d0 + enddo sum=0.d0 - do 10 i=1,m - sum=sum+s(i,i)*s(i,i) - 10 continue + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m 12 format (//5x,'Average square overlap =',f10.6) if (n.eq.1) goto 100 @@ -211,18 +224,19 @@ C conv=1.d-6 j=1 21 if (j.ge.last) goto 30 sum=0.d0 - - do 22 i=1,n - 22 sum=sum+s(i,j)*s(i,j) + + do i=1,n + sum=sum+s(i,j)*s(i,j) + enddo if (sum.gt.small) goto 28 - do 24 i=1,n - sij=s(i,j) - s(i,j)=-s(i,last) - s(i,last)=sij - tij=t(i,j) - t(i,j)=-t(i,last) - t(i,last)=tij - 24 continue + do i=1,n + sij=s(i,j) + s(i,j)=-s(i,last) + s(i,last)=sij + tij=t(i,j) + t(i,j)=-t(i,last) + t(i,last)=tij + end do last=last-1 goto 21 28 j=j+1 @@ -232,50 +246,52 @@ C conv=1.d-6 jmax=0 dmax=0.d0 amax=0.d0 - do 60 i=1,mm - ip=i+1 - do 50 j=ip,n - a=s(i,j)*s(i,j)-s(i,i)*s(i,i) - b=-s(i,i)*s(i,j) - if (j.gt.m) goto 31 - a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j) - b=b+s(j,i)*s(j,j) - 31 b=b+b - if (a.eq.0.d0) goto 32 - ba=b/a - if (dabs(ba).gt.small) goto 32 - if (a.gt.0.d0) goto 33 - tang=-0.5d0*ba - cosine=1.d0/dsqrt(1.d0+tang*tang) - sine=tang*cosine - goto 34 - 32 tang=0.d0 - if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b - cosine=1.d0/dsqrt(1.d0+tang*tang) - sine=tang*cosine - goto 34 - 33 cosine=0.d0 - sine=1.d0 - 34 delta=sine*(a*sine+b*cosine) - do 35 k=1,m - p=s(k,i)*cosine-s(k,j)*sine - q=s(k,i)*sine+s(k,j)*cosine - s(k,i)=p - 35 s(k,j)=q - do 40 k=1,n - p=t(k,i)*cosine-t(k,j)*sine - q=t(k,i)*sine+t(k,j)*cosine - t(k,i)=p - t(k,j)=q - 40 continue - 45 d=dabs(sine) - if (d.le.amax) goto 50 - imax=i - jmax=j - amax=d - dmax=delta - 50 continue - 60 continue + do i=1,mm + ip=i+1 + do j=ip,n + a=s(i,j)*s(i,j)-s(i,i)*s(i,i) + b=-s(i,i)*s(i,j) + if (j.gt.m) goto 31 + a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j) + b=b+s(j,i)*s(j,j) + 31 b=b+b + if (a.eq.0.d0) goto 32 + ba=b/a + if (dabs(ba).gt.small) goto 32 + if (a.gt.0.d0) goto 33 + tang=-0.5d0*ba + cosine=1.d0/dsqrt(1.d0+tang*tang) + sine=tang*cosine + goto 34 + 32 tang=0.d0 + if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b + cosine=1.d0/dsqrt(1.d0+tang*tang) + sine=tang*cosine + goto 34 + 33 cosine=0.d0 + sine=1.d0 + 34 delta=sine*(a*sine+b*cosine) + do k=1,m + p=s(k,i)*cosine-s(k,j)*sine + q=s(k,i)*sine+s(k,j)*cosine + s(k,i)=p + s(k,j)=q + enddo + do k=1,n + p=t(k,i)*cosine-t(k,j)*sine + q=t(k,i)*sine+t(k,j)*cosine + t(k,i)=p + t(k,j)=q + enddo + 45 d=dabs(sine) + if (d.le.amax) goto 50 + imax=i + jmax=j + amax=d + dmax=delta + 50 continue + end do + end do 70 format (' iter=',i4,' largest rotation=',f12.8, * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5) 71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5) @@ -285,22 +301,27 @@ C conv=1.d-6 * 'in subroutine maxovl ***'//) stop 100 continue - do 120 j=1,n - if (s(j,j).gt.0.d0) goto 120 - do 105 i=1,m - 105 s(i,j)=-s(i,j) - do 110 i=1,n - 110 t(i,j)=-t(i,j) - 120 continue + do j=1,n + if (s(j,j).gt.0.d0) cycle + do i=1,m + s(i,j)=-s(i,j) + enddo + do i=1,n + t(i,j)=-t(i,j) + enddo + enddo sum=0.d0 - do 125 i=1,m - 125 sum=sum+s(i,i)*s(i,i) + do i=1,m + sum=sum+s(i,i)*s(i,i) + enddo sum=sum/m - do 122 i=1,m - do 122 j=1,n - sw=s(i,j) - s(i,j)=w(i,j) - 122 w(i,j)=sw + do i=1,m + do j=1,n + sw=s(i,j) + s(i,j)=w(i,j) + w(i,j)=sw + enddo + enddo return end diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90 index 98e73470..ceaec874 100644 --- a/src/utils/map_module.f90 +++ b/src/utils/map_module.f90 @@ -238,11 +238,11 @@ subroutine cache_map_sort(map) iorder(i) = i enddo if (cache_key_kind == 2) then - call i2radix_sort(map%key,iorder,map%n_elements,-1) + call i2sort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 4) then - call iradix_sort(map%key,iorder,map%n_elements,-1) + call isort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 8) then - call i8radix_sort(map%key,iorder,map%n_elements,-1) + call i8sort(map%key,iorder,map%n_elements,-1) endif if (integral_kind == 4) then call set_order(map%value,iorder,map%n_elements) diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 9c1d2445..a62c657e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -92,9 +92,9 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& overlap = overlap_x * overlap_y * overlap_z end - + ! --- - + subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx) BEGIN_DOC @@ -151,4 +151,71 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over end +! --- +subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& + power_B,overlap,dim, n_points) + implicit none + BEGIN_DOC + !.. math:: + ! + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\ + ! S = S_x S_y S_z + ! + END_DOC + include 'constants.include.F' + integer,intent(in) :: dim, n_points + double precision,intent(in) :: A_center(n_points,3),B_center(3) ! center of the x1 functions + double precision, intent(in) :: alpha,beta + integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions + double precision, intent(out) :: overlap(n_points) + double precision :: F_integral_tab(0:max_dim) + double precision :: p, overlap_x, overlap_y, overlap_z + double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:) + integer :: iorder_p(3), ipoint, ldp + integer :: nmax + double precision :: F_integral + + ldp = maxval( power_A(1:3) + power_B(1:3) ) + allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), & + fact_pp(n_points), pp(n_points)) + + call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) + + nmax = maxval(iorder_p) + do i=0, nmax + F_integral_tab(i) = F_integral(i,p) + enddo + + integer :: i + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points) + + do ipoint=1,n_points + if(fact_p(ipoint).lt.1d-20)then + overlap(ipoint) = 1.d-10 + cycle + endif + + overlap_x = P_new(ipoint,0,1) * F_integral_tab(0) + do i = 1,iorder_p(1) + overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i) + enddo + + overlap_y = P_new(ipoint,0,2) * F_integral_tab(0) + do i = 1,iorder_p(2) + overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i) + enddo + + overlap_z = P_new(ipoint,0,3) * F_integral_tab(0) + do i = 1,iorder_p(3) + overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i) + enddo + + overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) + enddo + + deallocate(P_new, P_center, fact_p, pp, fact_pp) +end + +! --- diff --git a/src/utils/qsort.c b/src/utils/qsort.c new file mode 100644 index 00000000..c011b35a --- /dev/null +++ b/src/utils/qsort.c @@ -0,0 +1,373 @@ +/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ +#include +#include + +struct int16_t_comp { + int16_t x; + int32_t i; +}; + +int compare_int16_t( const void * l, const void * r ) +{ + const int16_t * restrict _l= l; + const int16_t * restrict _r= r; + if( *_l > *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct double_comp* A = malloc(isize * sizeof(struct double_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct float_comp* A = malloc(isize * sizeof(struct float_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i> +""" +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("TYPE", typ).replace("_big", "") ) + print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f2 +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +* Generated C file + +#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes +#include +#include +<> +#+END_SRC + +* Generated Fortran file + +#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes +module qsort_module + use iso_c_binding + + interface + <> + end interface + +end module qsort_module + +<> + +#+END_SRC + diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 new file mode 100644 index 00000000..a72a4f9e --- /dev/null +++ b/src/utils/qsort_module.f90 @@ -0,0 +1,347 @@ +module qsort_module + use iso_c_binding + + interface + + subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_c + + subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_c + + + + subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_big_c + + subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_big_c + + + + subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_c + + subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_c + + + + subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_big_c + + subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_big_c + + + + subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_c + + subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_c + + + + subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_big_c + + subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_big_c + + + + subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_c + + subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_c + + + + subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_big_c + + subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_big_c + + + + subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_c + + subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_c + + + + subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_big_c + + subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_big_c + + + + end interface + +end module qsort_module + + +subroutine i2sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_c(A, iorder, isize) +end subroutine i2sort + +subroutine i2sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_c(A, isize) +end subroutine i2sort_noidx + + + +subroutine i2sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_big_c(A, iorder, isize) +end subroutine i2sort_big + +subroutine i2sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_big_c(A, isize) +end subroutine i2sort_noidx_big + + + +subroutine isort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_c(A, iorder, isize) +end subroutine isort + +subroutine isort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_c(A, isize) +end subroutine isort_noidx + + + +subroutine isort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_big_c(A, iorder, isize) +end subroutine isort_big + +subroutine isort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_big_c(A, isize) +end subroutine isort_noidx_big + + + +subroutine i8sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_c(A, iorder, isize) +end subroutine i8sort + +subroutine i8sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_c(A, isize) +end subroutine i8sort_noidx + + + +subroutine i8sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_big_c(A, iorder, isize) +end subroutine i8sort_big + +subroutine i8sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_big_c(A, isize) +end subroutine i8sort_noidx_big + + + +subroutine dsort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_c(A, iorder, isize) +end subroutine dsort + +subroutine dsort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_c(A, isize) +end subroutine dsort_noidx + + + +subroutine dsort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_big_c(A, iorder, isize) +end subroutine dsort_big + +subroutine dsort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_big_c(A, isize) +end subroutine dsort_noidx_big + + + +subroutine sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_c(A, iorder, isize) +end subroutine sort + +subroutine sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_c(A, isize) +end subroutine sort_noidx + + + +subroutine sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_big_c(A, iorder, isize) +end subroutine sort_big + +subroutine sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_big_c(A, isize) +end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index a63eb4a3..089c3871 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,222 +1,4 @@ BEGIN_TEMPLATE - subroutine insertion_$Xsort (x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the insertion sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - $type :: xtmp - integer :: i, i0, j, jmax - - do i=2,isize - xtmp = x(i) - i0 = iorder(i) - j=i-1 - do while (j>0) - if ((x(j) <= xtmp)) exit - x(j+1) = x(j) - iorder(j+1) = iorder(j) - j=j-1 - enddo - x(j+1) = xtmp - iorder(j+1) = i0 - enddo - end subroutine insertion_$Xsort - - subroutine quick_$Xsort(x, iorder, isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the quicksort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer, external :: omp_get_num_threads - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - end - - recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) - implicit none - integer, intent(in) :: isize, first, last, level - integer,intent(inout) :: iorder(isize) - $type, intent(inout) :: x(isize) - $type :: c, tmp - integer :: itmp - integer :: i, j - - if(isize<2)return - - c = x( shiftr(first+last,1) ) - i = first - j = last - do - do while (x(i) < c) - i=i+1 - end do - do while (c < x(j)) - j=j-1 - end do - if (i >= j) exit - tmp = x(i) - x(i) = x(j) - x(j) = tmp - itmp = iorder(i) - iorder(i) = iorder(j) - iorder(j) = itmp - i=i+1 - j=j-1 - enddo - if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - else - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - endif - end - - subroutine heap_$Xsort(x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the heap sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - - integer :: i, k, j, l, i0 - $type :: xtemp - - l = isize/2+1 - k = isize - do while (.True.) - if (l>1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j0_8) - if (x(j)<=xtmp) exit - x(j+1_8) = x(j) - iorder(j+1_8) = iorder(j) - j = j-1_8 - enddo - x(j+1_8) = xtmp - iorder(j+1_8) = i0 - enddo - - end subroutine insertion_$Xsort_big - subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -563,223 +90,3 @@ SUBST [ X, type ] END_TEMPLATE -BEGIN_TEMPLATE - -recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) - implicit none - - BEGIN_DOC - ! Sort integer array x(isize) using the radix sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - ! iradix should be -1 in input. - END_DOC - integer*$int_type, intent(in) :: isize - integer*$int_type, intent(inout) :: iorder(isize) - integer*$type, intent(inout) :: x(isize) - integer, intent(in) :: iradix - integer :: iradix_new - integer*$type, allocatable :: x2(:), x1(:) - integer*$type :: i4 ! data type - integer*$int_type, allocatable :: iorder1(:),iorder2(:) - integer*$int_type :: i0, i1, i2, i3, i ! index type - integer*$type :: mask - integer :: err - !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 - - if (isize < 2) then - return - endif - - if (iradix == -1) then ! Sort Positive and negative - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - do i=1_$int_type,isize - if (x(i) < 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = -x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i2 - iorder(i1+i) = iorder2(i) - x(i1+i) = x2(i) - enddo - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i1 > 1_$int_type) then - call $Xradix_sort$big(x1,iorder1,i1,-2) - do i=1_$int_type,i1 - x(i) = -x1(1_$int_type+i1-i) - iorder(i) = iorder1(1_$int_type+i1-i) - enddo - endif - - if (i2>1_$int_type) then - call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) - endif - - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - return - - else if (iradix == -2) then ! Positive - - ! Find most significant bit - - i0 = 0_$int_type - i4 = maxval(x) - - iradix_new = max($integer_size-1-leadz(i4),1) - mask = ibset(0_$type,iradix_new) - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder1(i) - x(i0+i) = x1(i) - enddo - i0 = i0+i1 - i3 = i0 - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - - - do i=1_$int_type,i2 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - i0 = i0+i2 - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i3>1_$int_type) then - call $Xradix_sort$big(x,iorder,i3,iradix_new-1) - endif - - if (isize-i3>1_$int_type) then - call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) - endif - - return - endif - - ASSERT (iradix >= 0) - - if (isize < 48) then - call insertion_$Xsort$big(x,iorder,isize) - return - endif - - - allocate(x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x1, iorder1' - stop - endif - - - mask = ibset(0_$type,iradix) - i0=1_$int_type - i1=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder(i0) = iorder(i) - x(i0) = x(i) - i0 = i0+1_$int_type - else - iorder2(i1) = iorder(i) - x2(i1) = x(i) - i1 = i1+1_$int_type - endif - enddo - i0=i0-1_$int_type - i1=i1-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x2, iorder2' - stop - endif - - - if (iradix == 0) then - return - endif - - - if (i1>1_$int_type) then - call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - endif - if (i0>1) then - call $Xradix_sort$big(x,iorder,i0,iradix-1) - endif - - end - -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; -END_TEMPLATE - - - diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 184d8052..c9e35ee1 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -430,3 +430,28 @@ subroutine lowercase(txt,n) enddo end +subroutine v2_over_x(v,x,res) + + !BEGIN_DOC + ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0 + !END_DOC + + implicit none + + double precision, intent(in) :: v, x + double precision, intent(out) :: res + + double precision :: delta_E, tmp, val + + res = 0d0 + delta_E = x + if (v == 0.d0) return + + val = 2d0 * v + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + res = 0.5d0 * (tmp - delta_E) + +end diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst new file mode 100644 index 00000000..6a0689b6 --- /dev/null +++ b/src/utils_trust_region/README.rst @@ -0,0 +1,5 @@ +============ +trust_region +============ + +The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..eac17275 --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + BEGIN_DOC + ! Compute if the step should be cancelled + END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org new file mode 100644 index 00000000..aa836f98 --- /dev/null +++ b/src/utils_trust_region/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + END_DOC + + implicit none + + ! in + integer, intent(in) :: n, nb_iter + double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + BEGIN_DOC + ! Compute if the step should be cancelled + END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..e274ec11 --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org new file mode 100644 index 00000000..918581b7 --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + !if (debug) then + ! print*,'New mo_coef : ' + ! do i = 1, mo_num + ! write(*,'(100(F10.5))') mo_coef(i,:) + ! enddo + !endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..bbfabfec --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + !logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..4738fd67 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,443 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 == 0) then + print*, 'Diagonalization : Done' +elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +else + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +print*, 'e_val(1):', e_val(1) +print*, 'e_val(n):', e_val(n) +print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org new file mode 100644 index 00000000..73ba0298 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.org @@ -0,0 +1,454 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 == 0) then + print*, 'Diagonalization : Done' + elseif (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + else + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + print*, 'e_val(1):', e_val(1) + print*, 'e_val(n):', e_val(n) + print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..bdd1f6ba --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..16434dc8 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..b7d849d1 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,119 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org new file mode 100644 index 00000000..58c8f804 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.org @@ -0,0 +1,121 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n),H(n,n),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + print*,'g.x : ', part_1 + !endif + + ! Product H.x + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + print*,'1/2*x^T.H.x : ', part_2 + !endif + + print*,'prev_energy', prev_energy + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'Predicted energy after the rotation : ', e_model + print*, 'Previous energy - predicted energy:', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..85e7bf2a --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1655 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + + ! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org new file mode 100644 index 00000000..b39c9a10 --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.org @@ -0,0 +1,1665 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + print*,'' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + if (version_lambda_search == 1) then + print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + else + print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda =MAX(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + print*, 'Pre research of lambda:' + print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + print*, 'e_val(1):', e_val(1) + print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + print*,'Threshold for the CC:', thresh_cc + print*,'Threshold for rho_2:', thresh_rho_2 + + print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !if (debug) then + ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + !endif + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + print*,'--------------------------------------' + print*,'Research of lambda, iteration:', i + print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + print*,'prev lambda:', prev_lambda + print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + if (version_lambda_search == 1) then + print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + else + print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + endif + + print*,'previous - actual:', prev_f_R - f_R + print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + print*,'' + print*,'lambda, ||x||, delta:' + print*, lambda, dsqrt(f_N), delta + print*,'CC:', DABS(1d0 - f_N/delta**2) + print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations :', i + print*,'Value of lambda :', lambda + print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 + print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'' + print*,'---End trust_newton---' + print*,'' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + BEGIN_DOC + ! Compute ||x(lambda)||^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..45738736 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,121 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org new file mode 100644 index 00000000..9b25ee29 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.org @@ -0,0 +1,123 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + print*, 'previous energy, prev_energy :', prev_energy + print*, 'predicted energy, e_model :', e_model + print*, 'real energy, energy :', energy + print*, 'prev_energy - energy :', prev_energy - energy + print*, 'prev_energy - e_model :', prev_energy - e_model + print*, 'Rho :', rho + print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + print*,'' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..42aa6ed4 --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,716 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | H(n, n) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compuet the step in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo +enddo + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +print*, 'e_val(1) = ', e_val(1) +print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +! Normal version +if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + +! Version to use the absolute value of the eigenvalues +else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| H(n, n) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + BEGIN_DOC + ! Compuet the step in the trust region + END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + print*, 'e_val(1) = ', e_val(1) + print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + print*, '||x(0)||=', dsqrt(norm2_x) + print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + if (DABS(tmp_wtg(1)) < thresh_wtg) then + print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + else + print*, 'H indefinite, w_1^T.g =/= 0' + endif + print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org new file mode 100644 index 00000000..0a09fa86 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_index.org @@ -0,0 +1,72 @@ +* Vector to matrix indexes + +*Compute the indexes p,q of a matrix element with the vector index i* + +Vector (i) -> lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f new file mode 100644 index 00000000..9140b8d3 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -0,0 +1,39 @@ +! Vect to antisymmetric matrix using mat_to_vec_index + +! Vector to antisymmetric matrix transformation using mat_to_vec_index +! subroutine. + +! Can be done in OMP (for the first part and with omp critical for the second) + + +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + BEGIN_DOC + ! Vector to antisymmetric matrix + END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org new file mode 100644 index 00000000..4e358a88 --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + BEGIN_DOC + ! Vector to antisymmetric matrix + END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC