mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-12-22 11:33:29 +01:00
Merge pull request #247 from QuantumPackage/dev-stable
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
Dev stable
This commit is contained in:
commit
810b623743
57
.github/workflows/compilation.yml
vendored
Normal file
57
.github/workflows/compilation.yml
vendored
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
name: QP Compilation
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- dev-stable
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- dev-stable
|
||||||
|
- master
|
||||||
|
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
configuration:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
name: Dependencies
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config
|
||||||
|
|
||||||
|
compilation:
|
||||||
|
name: Compilation
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Restore configuration
|
||||||
|
id: restore
|
||||||
|
uses: actions/cache@v3
|
||||||
|
continue-on-error: false
|
||||||
|
with:
|
||||||
|
key: qp2-config
|
||||||
|
fail-on-cache-miss: true
|
||||||
|
path: |
|
||||||
|
external/opampack/
|
||||||
|
include/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
libexec/
|
||||||
|
restore-keys: qp2-
|
||||||
|
- name: Configuration
|
||||||
|
run: |
|
||||||
|
./configure -i ninja || :
|
||||||
|
./configure -i docopt || :
|
||||||
|
./configure -i resultsFile || :
|
||||||
|
./configure -i bats || :
|
||||||
|
./configure -c ./config/gfortran_debug.cfg
|
||||||
|
- name: Compilation
|
||||||
|
run: |
|
||||||
|
bash -c "source quantum_package.rc ; exec ninja"
|
||||||
|
|
||||||
|
|
||||||
|
|
66
.github/workflows/configuration.yml
vendored
Normal file
66
.github/workflows/configuration.yml
vendored
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
name: QP Configuration
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
# - ci
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
schedule:
|
||||||
|
- cron: "23 22 * * 6"
|
||||||
|
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
configuration:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
name: Dependencies
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config
|
||||||
|
- name: zlib
|
||||||
|
run: |
|
||||||
|
./configure -i zlib || echo OK
|
||||||
|
- name: ninja
|
||||||
|
run: |
|
||||||
|
./configure -i ninja || echo OK
|
||||||
|
- name: zeromq
|
||||||
|
run: |
|
||||||
|
./configure -i zeromq || echo OK
|
||||||
|
- name: f77zmq
|
||||||
|
run: |
|
||||||
|
./configure -i f77zmq || echo OK
|
||||||
|
- name: gmp
|
||||||
|
run: |
|
||||||
|
./configure -i gmp || echo OK
|
||||||
|
- name: ocaml
|
||||||
|
run: |
|
||||||
|
./configure -i ocaml || echo OK
|
||||||
|
- name: docopt
|
||||||
|
run: |
|
||||||
|
./configure -i docopt || echo OK
|
||||||
|
- name: resultsFile
|
||||||
|
run: |
|
||||||
|
./configure -i resultsFile || echo OK
|
||||||
|
- name: bats
|
||||||
|
run: |
|
||||||
|
./configure -i bats || echo OK
|
||||||
|
- name: Final check
|
||||||
|
run: |
|
||||||
|
./configure -c config/gfortran_debug.cfg
|
||||||
|
- name: Cache
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
key: qp2-config
|
||||||
|
path: |
|
||||||
|
external/opampack/
|
||||||
|
include/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
libexec/
|
||||||
|
|
@ -4,20 +4,20 @@
|
|||||||
|
|
||||||
** Changes
|
** Changes
|
||||||
|
|
||||||
|
- Introduced DFT-based basis set correction
|
||||||
|
- Use OpamPack for OCaml
|
||||||
|
- Configure adapted for ARM
|
||||||
|
- Added many types of integrals
|
||||||
|
- Accelerated four-index transformation
|
||||||
|
|
||||||
*** TODO: take from dev
|
*** TODO: take from dev
|
||||||
- [ ] Added GTOs with complex exponent
|
- [ ] Added GTOs with complex exponent
|
||||||
- [ ] Added many types of integrals
|
|
||||||
- Updated version of f77-zmq
|
- Updated version of f77-zmq
|
||||||
- Added transcorrelated SCF
|
- Added transcorrelated SCF
|
||||||
- Added transcorrelated CIPSI
|
- Added transcorrelated CIPSI
|
||||||
- Started to introduce shells in AOs
|
- Started to introduce shells in AOs
|
||||||
- Added ECMD UEG functional
|
- Added ECMD UEG functional
|
||||||
- Introduced DFT-based basis set correction
|
- General Davidson algorithm
|
||||||
- General davidson algorithm
|
|
||||||
- Use OpamPack for OCaml
|
|
||||||
- Configure adapted for ARM
|
|
||||||
|
|
||||||
*** Done
|
|
||||||
|
|
||||||
* Version 2.2
|
* Version 2.2
|
||||||
|
|
||||||
|
21
configure
vendored
21
configure
vendored
@ -20,18 +20,6 @@ git submodule update
|
|||||||
ARCHITECTURE=$(uname -m)
|
ARCHITECTURE=$(uname -m)
|
||||||
cd ${QP_ROOT}/external/qp2-dependencies
|
cd ${QP_ROOT}/external/qp2-dependencies
|
||||||
echo "Architecture: $ARCHITECTURE"
|
echo "Architecture: $ARCHITECTURE"
|
||||||
case $ARCHITECTURE in
|
|
||||||
aarch64)
|
|
||||||
git checkout arm64
|
|
||||||
;;
|
|
||||||
x86_64)
|
|
||||||
git checkout x86
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown architecture. Using x86_64."
|
|
||||||
git checkout x86
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
cd ${QP_ROOT}
|
cd ${QP_ROOT}
|
||||||
|
|
||||||
|
|
||||||
@ -209,7 +197,7 @@ for PACKAGE in ${PACKAGES} ; do
|
|||||||
|
|
||||||
execute << EOF
|
execute << EOF
|
||||||
rm -f "\${QP_ROOT}"/bin/ninja
|
rm -f "\${QP_ROOT}"/bin/ninja
|
||||||
tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/ninja.tar.gz
|
tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz
|
||||||
mv ninja "\${QP_ROOT}"/bin/
|
mv ninja "\${QP_ROOT}"/bin/
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -254,10 +242,13 @@ EOF
|
|||||||
|
|
||||||
execute <<EOF
|
execute <<EOF
|
||||||
source "${QP_ROOT}"/quantum_package.rc
|
source "${QP_ROOT}"/quantum_package.rc
|
||||||
|
rm -rf "${QP_ROOT}"/external/opampack
|
||||||
cd "${QP_ROOT}"/external/
|
cd "${QP_ROOT}"/external/
|
||||||
tar --gunzip --extract --file qp2-dependencies/opampack.tar.gz
|
tar --gunzip --extract --file qp2-dependencies/${ARCHITECTURE}/opampack.tar.gz
|
||||||
cd "${QP_ROOT}"/external/opampack
|
cd "${QP_ROOT}"/external/opampack
|
||||||
./install.sh
|
./install.sh
|
||||||
|
export OPAMROOT="${QP_ROOT}"/external/opampack/opamroot
|
||||||
|
eval \$("${QP_ROOT}"/external/opampack/opam env)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
elif [[ ${PACKAGE} = bse ]] ; then
|
elif [[ ${PACKAGE} = bse ]] ; then
|
||||||
@ -357,7 +348,7 @@ if [[ ${ZLIB} = $(not_found) ]] ; then
|
|||||||
fail
|
fail
|
||||||
fi
|
fi
|
||||||
|
|
||||||
OCAML=$(find_exe ocaml)
|
OCAML=$(find_exe ocamlc)
|
||||||
if [[ ${OCAML} = $(not_found) ]] ; then
|
if [[ ${OCAML} = $(not_found) ]] ; then
|
||||||
error "OCaml (ocaml) compiler is not installed."
|
error "OCaml (ocaml) compiler is not installed."
|
||||||
fail
|
fail
|
||||||
|
2
external/ezfio
vendored
2
external/ezfio
vendored
@ -1 +1 @@
|
|||||||
Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c
|
Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93
|
2
external/irpf90
vendored
2
external/irpf90
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271
|
Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102
|
2
external/qp2-dependencies
vendored
2
external/qp2-dependencies
vendored
@ -1 +1 @@
|
|||||||
Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8
|
Subproject commit b8cd5815bce14c9b880e3c5ea3d5fc2652f5955c
|
8
include/.gitignore
vendored
8
include/.gitignore
vendored
@ -1,7 +1 @@
|
|||||||
zmq.h
|
*
|
||||||
gmp.h
|
|
||||||
zconf.h
|
|
||||||
zconf.h
|
|
||||||
zlib.h
|
|
||||||
zmq_utils.h
|
|
||||||
f77_zmq_free.h
|
|
||||||
|
@ -84,7 +84,7 @@ end = struct
|
|||||||
Ezfio.get_nuclei_nucl_coord()
|
Ezfio.get_nuclei_nucl_coord()
|
||||||
|> Ezfio.flattened_ezfio
|
|> Ezfio.flattened_ezfio
|
||||||
in
|
in
|
||||||
let zero = Point3d.of_string Units.Bohr "0. 0. 0." in
|
let zero = Point3d.of_string ~units:Units.Bohr "0. 0. 0." in
|
||||||
let result = Array.make nucl_num zero in
|
let result = Array.make nucl_num zero in
|
||||||
for i=0 to (nucl_num-1)
|
for i=0 to (nucl_num-1)
|
||||||
do
|
do
|
||||||
@ -218,7 +218,7 @@ Nuclear coordinates in xyz format (Angstroms) ::
|
|||||||
and lines = Array.of_list lines
|
and lines = Array.of_list lines
|
||||||
in
|
in
|
||||||
List.init (Nucl_number.to_int nucl_num) (fun i ->
|
List.init (Nucl_number.to_int nucl_num) (fun i ->
|
||||||
Atom.of_string Units.Angstrom lines.(i))
|
Atom.of_string ~units:Units.Angstrom lines.(i))
|
||||||
end
|
end
|
||||||
| _ -> failwith "Error in xyz format"
|
| _ -> failwith "Error in xyz format"
|
||||||
in
|
in
|
||||||
|
3
scripts/.gitignore
vendored
3
scripts/.gitignore
vendored
@ -2,3 +2,6 @@
|
|||||||
*.pyo
|
*.pyo
|
||||||
docopt.py
|
docopt.py
|
||||||
resultsFile/
|
resultsFile/
|
||||||
|
verif_omp/a.out
|
||||||
|
src/*/Makefile
|
||||||
|
src/*/*/
|
||||||
|
@ -99,9 +99,20 @@ def ninja_create_env_variable(pwd_config_file):
|
|||||||
l_string = ["builddir = {0}".format(os.path.dirname(ROOT_BUILD_NINJA)),
|
l_string = ["builddir = {0}".format(os.path.dirname(ROOT_BUILD_NINJA)),
|
||||||
""]
|
""]
|
||||||
|
|
||||||
|
|
||||||
for flag in ["FC", "FCFLAGS", "IRPF90", "IRPF90_FLAGS"]:
|
for flag in ["FC", "FCFLAGS", "IRPF90", "IRPF90_FLAGS"]:
|
||||||
str_ = "{0} = {1}".format(flag, get_compilation_option(pwd_config_file,
|
str_ = "{0} = {1}".format(flag, get_compilation_option(pwd_config_file,
|
||||||
flag))
|
flag))
|
||||||
|
for directory in [real_join(QP_SRC, m) for m in sorted(os.listdir(QP_SRC))]:
|
||||||
|
includefile = real_join(directory, flag)
|
||||||
|
try:
|
||||||
|
content = ""
|
||||||
|
with open(includefile,'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
str_ += " "+content
|
||||||
|
except IOError:
|
||||||
|
pass
|
||||||
|
|
||||||
l_string.append(str_)
|
l_string.append(str_)
|
||||||
|
|
||||||
lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB")
|
lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB")
|
||||||
@ -110,17 +121,20 @@ def ninja_create_env_variable(pwd_config_file):
|
|||||||
str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr])
|
str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr])
|
||||||
|
|
||||||
# Read all LIB files in modules
|
# Read all LIB files in modules
|
||||||
libfile = "LIB"
|
for directory in [real_join(QP_SRC, m) for m in sorted(os.listdir(QP_SRC))]:
|
||||||
try:
|
libfile = real_join(directory, "LIB")
|
||||||
content = ""
|
try:
|
||||||
with open(libfile,'r') as f:
|
content = ""
|
||||||
content = f.read()
|
with open(libfile,'r') as f:
|
||||||
str_lib += " "+content
|
content = f.read().replace('\n','')
|
||||||
except IOError:
|
str_lib += " "+content
|
||||||
pass
|
except IOError:
|
||||||
|
pass
|
||||||
|
|
||||||
l_string.append("LIB = {0} ".format(str_lib))
|
l_string.append("LIB = {0} ".format(str_lib))
|
||||||
|
|
||||||
|
|
||||||
|
l_string.append("CONFIG_FILE = {0}".format(pwd_config_file))
|
||||||
l_string.append("")
|
l_string.append("")
|
||||||
|
|
||||||
return l_string
|
return l_string
|
||||||
|
11
src/.gitignore
vendored
Normal file
11
src/.gitignore
vendored
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
*
|
||||||
|
!README.rst
|
||||||
|
!*/
|
||||||
|
*/*
|
||||||
|
!*/*.*
|
||||||
|
*/*.o
|
||||||
|
*/build.ninja
|
||||||
|
*/ezfio_interface.irp.f
|
||||||
|
*/.gitignore
|
||||||
|
*/*.swp
|
||||||
|
|
@ -80,6 +80,10 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
|
|||||||
IF (DO_PSEUDO) THEN
|
IF (DO_PSEUDO) THEN
|
||||||
ao_integrals_n_e += ao_pseudo_integrals
|
ao_integrals_n_e += ao_pseudo_integrals
|
||||||
ENDIF
|
ENDIF
|
||||||
|
IF(point_charges) THEN
|
||||||
|
ao_integrals_n_e += ao_integrals_pt_chrg
|
||||||
|
ENDIF
|
||||||
|
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
108
src/ao_one_e_ints/pot_pt_charges.irp.f
Normal file
108
src/ao_one_e_ints/pot_pt_charges.irp.f
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Point charge-electron interaction, in the |AO| basis set.
|
||||||
|
!
|
||||||
|
! :math:`\langle \chi_i | -\sum_charge charge * \frac{1}{|r-R_charge|} | \chi_j \rangle`
|
||||||
|
!
|
||||||
|
! Notice the minus sign convention as it is supposed to be for electrons.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
integer :: num_A, num_B, power_A(3), power_B(3)
|
||||||
|
integer :: i, j, k, l, n_pt_in, m
|
||||||
|
double precision :: alpha, beta
|
||||||
|
double precision :: A_center(3),B_center(3),C_center(3)
|
||||||
|
double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
|
||||||
|
|
||||||
|
ao_integrals_pt_chrg = 0.d0
|
||||||
|
|
||||||
|
! if (read_ao_integrals_pt_chrg) then
|
||||||
|
!
|
||||||
|
! call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
|
||||||
|
! print *, 'AO N-e integrals read from disk'
|
||||||
|
!
|
||||||
|
! else
|
||||||
|
|
||||||
|
! if(use_cosgtos) then
|
||||||
|
! !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
|
||||||
|
!
|
||||||
|
! do j = 1, ao_num
|
||||||
|
! do i = 1, ao_num
|
||||||
|
! ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
|
||||||
|
! enddo
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
|
! else
|
||||||
|
|
||||||
|
!$OMP PARALLEL &
|
||||||
|
!$OMP DEFAULT (NONE) &
|
||||||
|
!$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
|
||||||
|
!$OMP num_A,num_B,Z,c,c1,n_pt_in) &
|
||||||
|
!$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
|
||||||
|
!$OMP n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
|
||||||
|
|
||||||
|
n_pt_in = n_pt_max_integrals
|
||||||
|
|
||||||
|
!$OMP DO SCHEDULE (dynamic)
|
||||||
|
|
||||||
|
do j = 1, ao_num
|
||||||
|
num_A = ao_nucl(j)
|
||||||
|
power_A(1:3)= ao_power(j,1:3)
|
||||||
|
A_center(1:3) = nucl_coord(num_A,1:3)
|
||||||
|
|
||||||
|
do i = 1, ao_num
|
||||||
|
|
||||||
|
num_B = ao_nucl(i)
|
||||||
|
power_B(1:3)= ao_power(i,1:3)
|
||||||
|
B_center(1:3) = nucl_coord(num_B,1:3)
|
||||||
|
|
||||||
|
do l=1,ao_prim_num(j)
|
||||||
|
alpha = ao_expo_ordered_transp(l,j)
|
||||||
|
|
||||||
|
do m=1,ao_prim_num(i)
|
||||||
|
beta = ao_expo_ordered_transp(m,i)
|
||||||
|
|
||||||
|
double precision :: c, c1
|
||||||
|
c = 0.d0
|
||||||
|
|
||||||
|
do k = 1, n_pts_charge
|
||||||
|
double precision :: Z
|
||||||
|
Z = pts_charge_z(k)
|
||||||
|
|
||||||
|
C_center(1:3) = pts_charge_coord(k,1:3)
|
||||||
|
|
||||||
|
c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
|
||||||
|
, alpha, beta, C_center, n_pt_in )
|
||||||
|
|
||||||
|
c = c - Z * c1
|
||||||
|
|
||||||
|
enddo
|
||||||
|
ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j) &
|
||||||
|
+ ao_coef_normalized_ordered_transp(l,j) &
|
||||||
|
* ao_coef_normalized_ordered_transp(m,i) * c
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
!$OMP END DO
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
! endif
|
||||||
|
|
||||||
|
|
||||||
|
! IF(do_pseudo) THEN
|
||||||
|
! ao_integrals_pt_chrg += ao_pseudo_integrals
|
||||||
|
! ENDIF
|
||||||
|
|
||||||
|
! endif
|
||||||
|
|
||||||
|
|
||||||
|
! if (write_ao_integrals_pt_chrg) then
|
||||||
|
! call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
|
||||||
|
! print *, 'AO N-e integrals written to disk'
|
||||||
|
! endif
|
||||||
|
|
||||||
|
END_PROVIDER
|
@ -321,14 +321,15 @@ BEGIN_PROVIDER [ double precision, ao_integrals_cache, (0:64*64*64*64) ]
|
|||||||
!$OMP END PARALLEL DO
|
!$OMP END PARALLEL DO
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
double precision function get_ao_two_e_integral(i,j,k,l,map) result(result)
|
double precision function get_ao_two_e_integral(i, j, k, l, map) result(result)
|
||||||
use map_module
|
use map_module
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! Gets one AO bi-electronic integral from the AO map
|
! Gets one AO bi-electronic integral from the AO map in PHYSICIST NOTATION
|
||||||
!
|
!
|
||||||
! i,j,k,l in physicist notation <ij|kl>
|
! <1:k, 2:l |1:i, 2:j>
|
||||||
END_DOC
|
END_DOC
|
||||||
integer, intent(in) :: i,j,k,l
|
integer, intent(in) :: i,j,k,l
|
||||||
integer(key_kind) :: idx
|
integer(key_kind) :: idx
|
||||||
|
@ -101,6 +101,7 @@ double precision function ao_two_e_integral(i,j,k,l)
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
double precision function ao_two_e_integral_schwartz_accel(i,j,k,l)
|
double precision function ao_two_e_integral_schwartz_accel(i,j,k,l)
|
||||||
|
@ -70,8 +70,8 @@ subroutine run_cipsi
|
|||||||
|
|
||||||
do while ( &
|
do while ( &
|
||||||
(N_det < N_det_max) .and. &
|
(N_det < N_det_max) .and. &
|
||||||
(sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and. &
|
(maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. &
|
||||||
(sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and. &
|
(maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. &
|
||||||
(correlation_energy_ratio <= correlation_energy_ratio_max) &
|
(correlation_energy_ratio <= correlation_energy_ratio_max) &
|
||||||
)
|
)
|
||||||
write(*,'(A)') '--------------------------------------------------------------------------------'
|
write(*,'(A)') '--------------------------------------------------------------------------------'
|
||||||
|
@ -131,7 +131,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
|
|||||||
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
|
PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
|
||||||
PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
|
PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
|
||||||
PROVIDE psi_det_hii selection_weight pseudo_sym
|
PROVIDE psi_det_hii selection_weight pseudo_sym
|
||||||
PROVIDE list_act list_inact list_core list_virt list_del seniority_max
|
PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
|
||||||
PROVIDE excitation_beta_max excitation_alpha_max excitation_max
|
PROVIDE excitation_beta_max excitation_alpha_max excitation_max
|
||||||
|
|
||||||
if (h0_type == 'CFG') then
|
if (h0_type == 'CFG') then
|
||||||
@ -290,9 +290,9 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
|
|||||||
call set_multiple_levels_omp(.False.)
|
call set_multiple_levels_omp(.False.)
|
||||||
|
|
||||||
|
|
||||||
print '(A)', '========== ======================= ===================== ===================== ==========='
|
print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
|
||||||
print '(A)', ' Samples Energy Variance Norm^2 Seconds'
|
print '(A)', ' Samples Energy PT2 Variance Norm^2 Convergence Seconds'
|
||||||
print '(A)', '========== ======================= ===================== ===================== ==========='
|
print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
|
||||||
|
|
||||||
PROVIDE global_selection_buffer
|
PROVIDE global_selection_buffer
|
||||||
|
|
||||||
@ -316,7 +316,8 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
|
|||||||
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
|
call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
|
||||||
call set_multiple_levels_omp(.True.)
|
call set_multiple_levels_omp(.True.)
|
||||||
|
|
||||||
print '(A)', '========== ======================= ===================== ===================== ==========='
|
print '(A)', '========== ==================== ================ ================ ================ ============= ==========='
|
||||||
|
|
||||||
|
|
||||||
do k=1,N_states
|
do k=1,N_states
|
||||||
pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
|
pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
|
||||||
@ -414,6 +415,17 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
|
|||||||
double precision :: rss
|
double precision :: rss
|
||||||
double precision, external :: memory_of_double, memory_of_int
|
double precision, external :: memory_of_double, memory_of_int
|
||||||
|
|
||||||
|
character(len=20) :: format_str1, str_error1, format_str2, str_error2
|
||||||
|
character(len=20) :: format_str3, str_error3, format_str4, str_error4
|
||||||
|
character(len=20) :: format_value1, format_value2, format_value3, format_value4
|
||||||
|
character(len=20) :: str_value1, str_value2, str_value3, str_value4
|
||||||
|
character(len=20) :: str_conv
|
||||||
|
double precision :: value1, value2, value3, value4
|
||||||
|
double precision :: error1, error2, error3, error4
|
||||||
|
integer :: size1,size2,size3,size4
|
||||||
|
|
||||||
|
double precision :: conv_crit
|
||||||
|
|
||||||
sending =.False.
|
sending =.False.
|
||||||
|
|
||||||
rss = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
|
rss = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
|
||||||
@ -537,14 +549,60 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
|
|||||||
|
|
||||||
if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
|
if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
|
||||||
time1 = time
|
time1 = time
|
||||||
print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1)', c, &
|
|
||||||
pt2_data % pt2(pt2_stoch_istate) +E, &
|
value1 = pt2_data % pt2(pt2_stoch_istate) + E
|
||||||
pt2_data_err % pt2(pt2_stoch_istate), &
|
error1 = pt2_data_err % pt2(pt2_stoch_istate)
|
||||||
pt2_data % variance(pt2_stoch_istate), &
|
value2 = pt2_data % pt2(pt2_stoch_istate)
|
||||||
pt2_data_err % variance(pt2_stoch_istate), &
|
error2 = pt2_data_err % pt2(pt2_stoch_istate)
|
||||||
pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate), &
|
value3 = pt2_data % variance(pt2_stoch_istate)
|
||||||
pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
|
error3 = pt2_data_err % variance(pt2_stoch_istate)
|
||||||
time-time0
|
value4 = pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate)
|
||||||
|
error4 = pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate)
|
||||||
|
|
||||||
|
! Max size of the values (FX.Y) with X=size
|
||||||
|
size1 = 15
|
||||||
|
size2 = 12
|
||||||
|
size3 = 12
|
||||||
|
size4 = 12
|
||||||
|
|
||||||
|
! To generate the format: number(error)
|
||||||
|
call format_w_error(value1,error1,size1,8,format_value1,str_error1)
|
||||||
|
call format_w_error(value2,error2,size2,8,format_value2,str_error2)
|
||||||
|
call format_w_error(value3,error3,size3,8,format_value3,str_error3)
|
||||||
|
call format_w_error(value4,error4,size4,8,format_value4,str_error4)
|
||||||
|
|
||||||
|
! value > string with the right format
|
||||||
|
write(str_value1,'('//format_value1//')') value1
|
||||||
|
write(str_value2,'('//format_value2//')') value2
|
||||||
|
write(str_value3,'('//format_value3//')') value3
|
||||||
|
write(str_value4,'('//format_value4//')') value4
|
||||||
|
|
||||||
|
! Convergence criterion
|
||||||
|
conv_crit = dabs(pt2_data_err % pt2(pt2_stoch_istate)) / &
|
||||||
|
(1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) )
|
||||||
|
write(str_conv,'(G10.3)') conv_crit
|
||||||
|
|
||||||
|
write(*,'(I10,X,X,A20,X,A16,X,A16,X,A16,X,A12,X,F10.1)') c,&
|
||||||
|
adjustl(adjustr(str_value1)//'('//str_error1(1:1)//')'),&
|
||||||
|
adjustl(adjustr(str_value2)//'('//str_error2(1:1)//')'),&
|
||||||
|
adjustl(adjustr(str_value3)//'('//str_error3(1:1)//')'),&
|
||||||
|
adjustl(adjustr(str_value4)//'('//str_error4(1:1)//')'),&
|
||||||
|
adjustl(str_conv),&
|
||||||
|
time-time0
|
||||||
|
|
||||||
|
! Old print
|
||||||
|
!print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, &
|
||||||
|
! pt2_data % pt2(pt2_stoch_istate) +E, &
|
||||||
|
! pt2_data_err % pt2(pt2_stoch_istate), &
|
||||||
|
! pt2_data % variance(pt2_stoch_istate), &
|
||||||
|
! pt2_data_err % variance(pt2_stoch_istate), &
|
||||||
|
! pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate), &
|
||||||
|
! pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
|
||||||
|
! time-time0, &
|
||||||
|
! pt2_data % pt2(pt2_stoch_istate), &
|
||||||
|
! dabs(pt2_data_err % pt2(pt2_stoch_istate)) / &
|
||||||
|
! (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) )
|
||||||
|
|
||||||
if (stop_now .or. ( &
|
if (stop_now .or. ( &
|
||||||
(do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) / &
|
(do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) / &
|
||||||
(1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
|
(1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
|
||||||
@ -844,6 +902,7 @@ END_PROVIDER
|
|||||||
if (tooth_width == 0.d0) then
|
if (tooth_width == 0.d0) then
|
||||||
tooth_width = max(1.d-15,sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))))
|
tooth_width = max(1.d-15,sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))))
|
||||||
endif
|
endif
|
||||||
|
ASSERT(tooth_width > 0.d0)
|
||||||
do i=pt2_n_0(t)+1, pt2_n_0(t+1)
|
do i=pt2_n_0(t)+1, pt2_n_0(t+1)
|
||||||
pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
|
pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
|
||||||
end do
|
end do
|
||||||
|
@ -31,11 +31,12 @@ subroutine run_pt2_slave(thread,iproc,energy)
|
|||||||
|
|
||||||
double precision, intent(in) :: energy(N_states_diag)
|
double precision, intent(in) :: energy(N_states_diag)
|
||||||
integer, intent(in) :: thread, iproc
|
integer, intent(in) :: thread, iproc
|
||||||
if (N_det > 100000 ) then
|
call run_pt2_slave_large(thread,iproc,energy)
|
||||||
call run_pt2_slave_large(thread,iproc,energy)
|
! if (N_det > 100000 ) then
|
||||||
else
|
! call run_pt2_slave_large(thread,iproc,energy)
|
||||||
call run_pt2_slave_small(thread,iproc,energy)
|
! else
|
||||||
endif
|
! call run_pt2_slave_small(thread,iproc,energy)
|
||||||
|
! endif
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine run_pt2_slave_small(thread,iproc,energy)
|
subroutine run_pt2_slave_small(thread,iproc,energy)
|
||||||
@ -66,6 +67,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
|
|||||||
|
|
||||||
double precision, external :: memory_of_double, memory_of_int
|
double precision, external :: memory_of_double, memory_of_int
|
||||||
integer :: bsize ! Size of selection buffers
|
integer :: bsize ! Size of selection buffers
|
||||||
|
! logical :: sending
|
||||||
|
|
||||||
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
|
allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
|
||||||
allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
|
allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
|
||||||
@ -83,6 +85,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
|
|||||||
buffer_ready = .False.
|
buffer_ready = .False.
|
||||||
n_tasks = 1
|
n_tasks = 1
|
||||||
|
|
||||||
|
! sending = .False.
|
||||||
done = .False.
|
done = .False.
|
||||||
do while (.not.done)
|
do while (.not.done)
|
||||||
|
|
||||||
@ -116,13 +119,14 @@ subroutine run_pt2_slave_small(thread,iproc,energy)
|
|||||||
do k=1,n_tasks
|
do k=1,n_tasks
|
||||||
call pt2_alloc(pt2_data(k),N_states)
|
call pt2_alloc(pt2_data(k),N_states)
|
||||||
b%cur = 0
|
b%cur = 0
|
||||||
! double precision :: time2
|
!double precision :: time2
|
||||||
! call wall_time(time2)
|
!call wall_time(time2)
|
||||||
call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
|
call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
|
||||||
! call wall_time(time1)
|
!call wall_time(time1)
|
||||||
! print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
|
!print *, i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
|
||||||
enddo
|
enddo
|
||||||
call wall_time(time1)
|
call wall_time(time1)
|
||||||
|
!print *, '-->', i_generator(1), time1-time0, n_tasks
|
||||||
|
|
||||||
integer, external :: tasks_done_to_taskserver
|
integer, external :: tasks_done_to_taskserver
|
||||||
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
|
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
|
||||||
@ -160,11 +164,6 @@ end subroutine
|
|||||||
subroutine run_pt2_slave_large(thread,iproc,energy)
|
subroutine run_pt2_slave_large(thread,iproc,energy)
|
||||||
use selection_types
|
use selection_types
|
||||||
use f77_zmq
|
use f77_zmq
|
||||||
BEGIN_DOC
|
|
||||||
! This subroutine can miss important determinants when the PT2 is completely
|
|
||||||
! computed. It should be called only for large workloads where the PT2 is
|
|
||||||
! interrupted before the end
|
|
||||||
END_DOC
|
|
||||||
implicit none
|
implicit none
|
||||||
|
|
||||||
double precision, intent(in) :: energy(N_states_diag)
|
double precision, intent(in) :: energy(N_states_diag)
|
||||||
@ -190,12 +189,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
|
|||||||
|
|
||||||
integer :: bsize ! Size of selection buffers
|
integer :: bsize ! Size of selection buffers
|
||||||
logical :: sending
|
logical :: sending
|
||||||
double precision :: time_shift
|
|
||||||
|
|
||||||
PROVIDE global_selection_buffer global_selection_buffer_lock
|
PROVIDE global_selection_buffer global_selection_buffer_lock
|
||||||
|
|
||||||
call random_number(time_shift)
|
|
||||||
time_shift = time_shift*15.d0
|
|
||||||
|
|
||||||
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
|
||||||
|
|
||||||
@ -213,9 +208,6 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
|
|||||||
|
|
||||||
sending = .False.
|
sending = .False.
|
||||||
done = .False.
|
done = .False.
|
||||||
double precision :: time0, time1
|
|
||||||
call wall_time(time0)
|
|
||||||
time0 = time0+time_shift
|
|
||||||
do while (.not.done)
|
do while (.not.done)
|
||||||
|
|
||||||
integer, external :: get_tasks_from_taskserver
|
integer, external :: get_tasks_from_taskserver
|
||||||
@ -242,28 +234,25 @@ subroutine run_pt2_slave_large(thread,iproc,energy)
|
|||||||
ASSERT (b%N == bsize)
|
ASSERT (b%N == bsize)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
double precision :: time0, time1
|
||||||
|
call wall_time(time0)
|
||||||
call pt2_alloc(pt2_data,N_states)
|
call pt2_alloc(pt2_data,N_states)
|
||||||
b%cur = 0
|
b%cur = 0
|
||||||
call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
|
call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
|
||||||
|
call wall_time(time1)
|
||||||
|
|
||||||
integer, external :: tasks_done_to_taskserver
|
integer, external :: tasks_done_to_taskserver
|
||||||
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
|
if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
|
||||||
done = .true.
|
done = .true.
|
||||||
endif
|
endif
|
||||||
call sort_selection_buffer(b)
|
call sort_selection_buffer(b)
|
||||||
|
|
||||||
call wall_time(time1)
|
|
||||||
! if (time1-time0 > 15.d0) then
|
|
||||||
call omp_set_lock(global_selection_buffer_lock)
|
|
||||||
global_selection_buffer%mini = b%mini
|
|
||||||
call merge_selection_buffers(b,global_selection_buffer)
|
|
||||||
b%cur=0
|
|
||||||
call omp_unset_lock(global_selection_buffer_lock)
|
|
||||||
call wall_time(time0)
|
|
||||||
! endif
|
|
||||||
|
|
||||||
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
|
call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
|
||||||
if ( iproc == 1 .or. i_generator < 100 .or. done) then
|
call omp_set_lock(global_selection_buffer_lock)
|
||||||
|
global_selection_buffer%mini = b%mini
|
||||||
|
call merge_selection_buffers(b,global_selection_buffer)
|
||||||
|
b%cur=0
|
||||||
|
call omp_unset_lock(global_selection_buffer_lock)
|
||||||
|
if ( iproc == 1 ) then
|
||||||
call omp_set_lock(global_selection_buffer_lock)
|
call omp_set_lock(global_selection_buffer_lock)
|
||||||
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
|
call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
|
||||||
global_selection_buffer%cur = 0
|
global_selection_buffer%cur = 0
|
||||||
|
@ -571,7 +571,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
|||||||
double precision, external :: diag_H_mat_elem_fock
|
double precision, external :: diag_H_mat_elem_fock
|
||||||
double precision :: E_shift
|
double precision :: E_shift
|
||||||
double precision :: s_weight(N_states,N_states)
|
double precision :: s_weight(N_states,N_states)
|
||||||
logical, external :: is_in_wavefunction
|
|
||||||
PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs
|
PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs
|
||||||
do jstate=1,N_states
|
do jstate=1,N_states
|
||||||
do istate=1,N_states
|
do istate=1,N_states
|
||||||
@ -751,7 +750,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
|||||||
if (delta_E < 0.d0) then
|
if (delta_E < 0.d0) then
|
||||||
tmp = -tmp
|
tmp = -tmp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
!e_pert(istate) = alpha_h_psi * alpha_h_psi / (E0(istate) - Hii)
|
||||||
e_pert(istate) = 0.5d0 * (tmp - delta_E)
|
e_pert(istate) = 0.5d0 * (tmp - delta_E)
|
||||||
|
|
||||||
if (dabs(alpha_h_psi) > 1.d-4) then
|
if (dabs(alpha_h_psi) > 1.d-4) then
|
||||||
coef(istate) = e_pert(istate) / alpha_h_psi
|
coef(istate) = e_pert(istate) / alpha_h_psi
|
||||||
else
|
else
|
||||||
@ -864,6 +866,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
|
|||||||
!!!BEGIN_DEBUG
|
!!!BEGIN_DEBUG
|
||||||
! ! To check if the pt2 is taking determinants already in the wf
|
! ! To check if the pt2 is taking determinants already in the wf
|
||||||
! if (is_in_wavefunction(det(N_int,1),N_int)) then
|
! if (is_in_wavefunction(det(N_int,1),N_int)) then
|
||||||
|
! logical, external :: is_in_wavefunction
|
||||||
! print*, 'A determinant contributing to the pt2 is already in'
|
! print*, 'A determinant contributing to the pt2 is already in'
|
||||||
! print*, 'the wave function:'
|
! print*, 'the wave function:'
|
||||||
! call print_det(det(N_int,1),N_int)
|
! call print_det(det(N_int,1),N_int)
|
||||||
|
@ -311,7 +311,7 @@ subroutine run_slave_main
|
|||||||
if (mpi_master) then
|
if (mpi_master) then
|
||||||
print *, 'Running PT2'
|
print *, 'Running PT2'
|
||||||
endif
|
endif
|
||||||
!$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target)
|
!$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
|
||||||
i = omp_get_thread_num()
|
i = omp_get_thread_num()
|
||||||
call run_pt2_slave(0,i,pt2_e0_denominator)
|
call run_pt2_slave(0,i,pt2_e0_denominator)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
@ -69,8 +69,8 @@ subroutine run_stochastic_cipsi
|
|||||||
|
|
||||||
do while ( &
|
do while ( &
|
||||||
(N_det < N_det_max) .and. &
|
(N_det < N_det_max) .and. &
|
||||||
(sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and. &
|
(maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. &
|
||||||
(sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and. &
|
(maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. &
|
||||||
(correlation_energy_ratio <= correlation_energy_ratio_max) &
|
(correlation_energy_ratio <= correlation_energy_ratio_max) &
|
||||||
)
|
)
|
||||||
write(*,'(A)') '--------------------------------------------------------------------------------'
|
write(*,'(A)') '--------------------------------------------------------------------------------'
|
||||||
|
@ -66,10 +66,27 @@ subroutine v_rho_oc_to_v_rho_ab(v_rho_o,v_rho_c,v_rho_a,v_rho_b)
|
|||||||
END_DOC
|
END_DOC
|
||||||
double precision, intent(in) :: v_rho_o,v_rho_c
|
double precision, intent(in) :: v_rho_o,v_rho_c
|
||||||
double precision, intent(out) :: v_rho_a,v_rho_b
|
double precision, intent(out) :: v_rho_a,v_rho_b
|
||||||
|
! print*,'in v_rho_oc_to_v_rho_ab'
|
||||||
|
! print*, v_rho_c , v_rho_o
|
||||||
v_rho_a = v_rho_c + v_rho_o
|
v_rho_a = v_rho_c + v_rho_o
|
||||||
v_rho_b = v_rho_c - v_rho_o
|
v_rho_b = v_rho_c - v_rho_o
|
||||||
end
|
end
|
||||||
|
|
||||||
|
subroutine v_grad_rho_ab_to_v_grad_rho_oc(v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b,v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c)
|
||||||
|
implicit none
|
||||||
|
double precision, intent(in) :: v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b
|
||||||
|
double precision, intent(out) :: v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c
|
||||||
|
BEGIN_DOC
|
||||||
|
! convert (v_grad_rho_a_2, v_grad_rho_b_2, v_grad_rho_a.grad_rho_b)
|
||||||
|
!
|
||||||
|
! to (v_grad_rho_c_2, v_grad_rho_o_2, v_grad_rho_o.grad_rho_c)
|
||||||
|
!
|
||||||
|
! rho_c = total density, rho_o spin density
|
||||||
|
END_DOC
|
||||||
|
v_grad_rho_c_2 = 0.25d0 * (v_grad_rho_a_2 + v_grad_rho_b_2 + v_grad_rho_a_b)
|
||||||
|
v_grad_rho_o_2 = 0.25d0 * (v_grad_rho_a_2 + v_grad_rho_b_2 - v_grad_rho_a_b)
|
||||||
|
v_grad_rho_o_c = 0.25d0 * (2d0 * v_grad_rho_a_2 - 2d0 * v_grad_rho_b_2 )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
subroutine v_grad_rho_oc_to_v_grad_rho_ab(v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c,v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b)
|
subroutine v_grad_rho_oc_to_v_grad_rho_ab(v_grad_rho_o_2,v_grad_rho_c_2,v_grad_rho_o_c,v_grad_rho_a_2,v_grad_rho_b_2,v_grad_rho_a_b)
|
||||||
@ -88,21 +105,3 @@ subroutine v_grad_rho_oc_to_v_grad_rho_ab(v_grad_rho_o_2,v_grad_rho_c_2,v_grad_r
|
|||||||
v_grad_rho_a_b = -2d0 * v_grad_rho_o_2 + 2d0 * v_grad_rho_c_2
|
v_grad_rho_a_b = -2d0 * v_grad_rho_o_2 + 2d0 * v_grad_rho_c_2
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,6 +45,8 @@
|
|||||||
call density_and_grad_alpha_beta_and_all_aos_and_grad_aos_at_r(r,dm_a,dm_b, dm_a_grad, dm_b_grad, aos_array, grad_aos_array)
|
call density_and_grad_alpha_beta_and_all_aos_and_grad_aos_at_r(r,dm_a,dm_b, dm_a_grad, dm_b_grad, aos_array, grad_aos_array)
|
||||||
|
|
||||||
! alpha/beta density
|
! alpha/beta density
|
||||||
|
dm_a(istate) = max(dm_a(istate),1.d-12)
|
||||||
|
dm_b(istate) = max(dm_b(istate),1.d-12)
|
||||||
one_e_dm_and_grad_alpha_in_r(4,i,istate) = dm_a(istate)
|
one_e_dm_and_grad_alpha_in_r(4,i,istate) = dm_a(istate)
|
||||||
one_e_dm_and_grad_beta_in_r(4,i,istate) = dm_b(istate)
|
one_e_dm_and_grad_beta_in_r(4,i,istate) = dm_b(istate)
|
||||||
|
|
||||||
@ -80,6 +82,7 @@
|
|||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
!$OMP END PARALLEL DO
|
||||||
|
print*,'density and gradients provided'
|
||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
@ -18,6 +18,39 @@ function run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function run_pt_charges() {
|
||||||
|
thresh=1.e-5
|
||||||
|
cp ${QP_ROOT}/src/nuclei/write_pt_charges.py .
|
||||||
|
cat > hcn.xyz << EOF
|
||||||
|
3
|
||||||
|
HCN molecule
|
||||||
|
C 0.0 0.0 0.0
|
||||||
|
H 0.0 0.0 1.064
|
||||||
|
N 0.0 0.0 -1.156
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat > hcn_charges.xyz << EOF
|
||||||
|
0.5 2.0 0.0 0.0
|
||||||
|
0.5 -2.0 0.0 0.0
|
||||||
|
EOF
|
||||||
|
|
||||||
|
rm -rf hcn.ezfio
|
||||||
|
qp create_ezfio -b def2-svp hcn.xyz
|
||||||
|
qp run scf
|
||||||
|
mv hcn_charges.xyz hcn.ezfio_point_charges.xyz
|
||||||
|
python write_pt_charges.py hcn.ezfio
|
||||||
|
qp set nuclei point_charges True
|
||||||
|
qp run scf | tee hcn.ezfio.pt_charges.out
|
||||||
|
energy="$(ezfio get hartree_fock energy)"
|
||||||
|
rm -rf hcn.ezfio
|
||||||
|
good=-92.76613324421798
|
||||||
|
eq $energy $good $thresh
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "point charges" {
|
||||||
|
run_pt_charges
|
||||||
|
}
|
||||||
|
|
||||||
@test "B-B" { # 3s
|
@test "B-B" { # 3s
|
||||||
run b2_stretched.ezfio -48.9950585434279
|
run b2_stretched.ezfio -48.9950585434279
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,6 @@ subroutine create_guess
|
|||||||
if (.not.exists) then
|
if (.not.exists) then
|
||||||
mo_label = 'Guess'
|
mo_label = 'Guess'
|
||||||
if (mo_guess_type == "HCore") then
|
if (mo_guess_type == "HCore") then
|
||||||
mo_coef = ao_ortho_lowdin_coef
|
|
||||||
call restore_symmetry(ao_num,mo_num,mo_coef,size(mo_coef,1),1.d-10)
|
call restore_symmetry(ao_num,mo_num,mo_coef,size(mo_coef,1),1.d-10)
|
||||||
TOUCH mo_coef
|
TOUCH mo_coef
|
||||||
call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, &
|
call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, &
|
||||||
|
@ -235,11 +235,11 @@ subroutine get_mo_two_e_integrals_erf_ij(k,l,sze,out_array,map)
|
|||||||
|
|
||||||
logical :: integral_is_in_map
|
logical :: integral_is_in_map
|
||||||
if (key_kind == 8) then
|
if (key_kind == 8) then
|
||||||
call i8radix_sort(hash,iorder,kk,-1)
|
call i8sort(hash,iorder,kk)
|
||||||
else if (key_kind == 4) then
|
else if (key_kind == 4) then
|
||||||
call iradix_sort(hash,iorder,kk,-1)
|
call isort(hash,iorder,kk)
|
||||||
else if (key_kind == 2) then
|
else if (key_kind == 2) then
|
||||||
call i2radix_sort(hash,iorder,kk,-1)
|
call i2sort(hash,iorder,kk)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
|
call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
|
||||||
@ -290,11 +290,11 @@ subroutine get_mo_two_e_integrals_erf_i1j1(k,l,sze,out_array,map)
|
|||||||
|
|
||||||
logical :: integral_is_in_map
|
logical :: integral_is_in_map
|
||||||
if (key_kind == 8) then
|
if (key_kind == 8) then
|
||||||
call i8radix_sort(hash,iorder,kk,-1)
|
call i8sort(hash,iorder,kk)
|
||||||
else if (key_kind == 4) then
|
else if (key_kind == 4) then
|
||||||
call iradix_sort(hash,iorder,kk,-1)
|
call isort(hash,iorder,kk)
|
||||||
else if (key_kind == 2) then
|
else if (key_kind == 2) then
|
||||||
call i2radix_sort(hash,iorder,kk,-1)
|
call i2sort(hash,iorder,kk)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
|
call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk)
|
||||||
|
@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ]
|
|||||||
enddo
|
enddo
|
||||||
do k=1,mo_num
|
do k=1,mo_num
|
||||||
do i=1,mo_num
|
do i=1,mo_num
|
||||||
h_core_ri(i,j) = h_core_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j)
|
h_core_ri(i,j) = h_core_ri(i,j) - 0.5d0 * big_array_exchange_integrals(k,i,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
|
@ -53,7 +53,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
|||||||
! call four_idx_novvvv
|
! call four_idx_novvvv
|
||||||
call four_idx_novvvv_old
|
call four_idx_novvvv_old
|
||||||
else
|
else
|
||||||
call add_integrals_to_map(full_ijkl_bitmask_4)
|
if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then
|
||||||
|
call four_idx_dgemm
|
||||||
|
else
|
||||||
|
call add_integrals_to_map(full_ijkl_bitmask_4)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
call wall_time(wall_2)
|
call wall_time(wall_2)
|
||||||
@ -77,6 +81,94 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
|
|||||||
|
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
subroutine four_idx_dgemm
|
||||||
|
implicit none
|
||||||
|
integer :: p,q,r,s,i,j,k,l
|
||||||
|
double precision, allocatable :: a1(:,:,:,:)
|
||||||
|
double precision, allocatable :: a2(:,:,:,:)
|
||||||
|
|
||||||
|
allocate (a1(ao_num,ao_num,ao_num,ao_num))
|
||||||
|
|
||||||
|
print *, 'Getting AOs'
|
||||||
|
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,r,s)
|
||||||
|
do s=1,ao_num
|
||||||
|
do r=1,ao_num
|
||||||
|
do q=1,ao_num
|
||||||
|
call get_ao_two_e_integrals(q,r,s,ao_num,a1(1,q,r,s))
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END PARALLEL DO
|
||||||
|
|
||||||
|
print *, '1st transformation'
|
||||||
|
! 1st transformation
|
||||||
|
allocate (a2(ao_num,ao_num,ao_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*ao_num*ao_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*ao_num*ao_num))
|
||||||
|
|
||||||
|
! 2nd transformation
|
||||||
|
print *, '2nd transformation'
|
||||||
|
deallocate (a1)
|
||||||
|
allocate (a1(ao_num,ao_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*ao_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (ao_num*ao_num*mo_num))
|
||||||
|
|
||||||
|
! 3rd transformation
|
||||||
|
print *, '3rd transformation'
|
||||||
|
deallocate (a2)
|
||||||
|
allocate (a2(ao_num,mo_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (ao_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*mo_num*mo_num))
|
||||||
|
|
||||||
|
! 4th transformation
|
||||||
|
print *, '4th transformation'
|
||||||
|
deallocate (a1)
|
||||||
|
allocate (a1(mo_num,mo_num,mo_num,mo_num))
|
||||||
|
call dgemm('T','N', (mo_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (mo_num*mo_num*mo_num))
|
||||||
|
|
||||||
|
deallocate (a2)
|
||||||
|
|
||||||
|
integer :: n_integrals, size_buffer
|
||||||
|
integer(key_kind) , allocatable :: buffer_i(:)
|
||||||
|
real(integral_kind), allocatable :: buffer_value(:)
|
||||||
|
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
||||||
|
|
||||||
|
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,buffer_value,buffer_i,n_integrals)
|
||||||
|
allocate ( buffer_i(size_buffer), buffer_value(size_buffer) )
|
||||||
|
|
||||||
|
n_integrals = 0
|
||||||
|
!$OMP DO
|
||||||
|
do l=1,mo_num
|
||||||
|
do k=1,mo_num
|
||||||
|
do j=1,l
|
||||||
|
do i=1,k
|
||||||
|
if (abs(a1(i,j,k,l)) < mo_integrals_threshold) then
|
||||||
|
cycle
|
||||||
|
endif
|
||||||
|
n_integrals += 1
|
||||||
|
buffer_value(n_integrals) = a1(i,j,k,l)
|
||||||
|
!DIR$ FORCEINLINE
|
||||||
|
call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
|
||||||
|
if (n_integrals == size_buffer) then
|
||||||
|
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||||
|
n_integrals = 0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
|
|
||||||
|
call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals)
|
||||||
|
|
||||||
|
deallocate(buffer_i, buffer_value)
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
deallocate (a1)
|
||||||
|
|
||||||
|
call map_unique(mo_integrals_map)
|
||||||
|
|
||||||
|
integer*8 :: get_mo_map_size, mo_map_size
|
||||||
|
mo_map_size = get_mo_map_size()
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
|
||||||
subroutine add_integrals_to_map(mask_ijkl)
|
subroutine add_integrals_to_map(mask_ijkl)
|
||||||
use bitmasks
|
use bitmasks
|
||||||
@ -153,24 +245,26 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
return
|
return
|
||||||
endif
|
endif
|
||||||
|
|
||||||
size_buffer = min(ao_num*ao_num*ao_num,16000000)
|
call wall_time(wall_1)
|
||||||
|
|
||||||
|
size_buffer = min(ao_num*ao_num*ao_num,8000000)
|
||||||
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
|
||||||
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
|
||||||
|
|
||||||
double precision :: accu_bis
|
|
||||||
accu_bis = 0.d0
|
|
||||||
call wall_time(wall_1)
|
|
||||||
|
|
||||||
!$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
!$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
||||||
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
||||||
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
||||||
!$OMP wall_0,thread_num,accu_bis) &
|
!$OMP wall_0,thread_num) &
|
||||||
!$OMP DEFAULT(NONE) &
|
!$OMP DEFAULT(NONE) &
|
||||||
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, &
|
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, &
|
||||||
!$OMP mo_coef_transp, &
|
!$OMP mo_coef_transp, &
|
||||||
!$OMP mo_coef_transp_is_built, list_ijkl, &
|
!$OMP mo_coef_transp_is_built, list_ijkl, &
|
||||||
!$OMP mo_coef_is_built, wall_1, &
|
!$OMP mo_coef_is_built, wall_1, &
|
||||||
!$OMP mo_coef,mo_integrals_threshold,mo_integrals_map)
|
!$OMP mo_coef,mo_integrals_threshold,mo_integrals_map)
|
||||||
|
|
||||||
|
thread_num = 0
|
||||||
|
!$ thread_num = omp_get_thread_num()
|
||||||
|
|
||||||
n_integrals = 0
|
n_integrals = 0
|
||||||
wall_0 = wall_1
|
wall_0 = wall_1
|
||||||
allocate(two_e_tmp_3(mo_num, n_j, n_k), &
|
allocate(two_e_tmp_3(mo_num, n_j, n_k), &
|
||||||
@ -181,8 +275,6 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
buffer_i(size_buffer), &
|
buffer_i(size_buffer), &
|
||||||
buffer_value(size_buffer) )
|
buffer_value(size_buffer) )
|
||||||
|
|
||||||
thread_num = 0
|
|
||||||
!$ thread_num = omp_get_thread_num()
|
|
||||||
!$OMP DO SCHEDULE(guided)
|
!$OMP DO SCHEDULE(guided)
|
||||||
do l1 = 1,ao_num
|
do l1 = 1,ao_num
|
||||||
two_e_tmp_3 = 0.d0
|
two_e_tmp_3 = 0.d0
|
||||||
@ -340,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl)
|
|||||||
!$OMP END DO NOWAIT
|
!$OMP END DO NOWAIT
|
||||||
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
||||||
|
|
||||||
integer :: index_needed
|
if (n_integrals > 0) then
|
||||||
|
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
||||||
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
real(mo_integrals_threshold,integral_kind))
|
||||||
real(mo_integrals_threshold,integral_kind))
|
endif
|
||||||
deallocate(buffer_i, buffer_value)
|
deallocate(buffer_i, buffer_value)
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
call map_merge(mo_integrals_map)
|
call map_merge(mo_integrals_map)
|
||||||
@ -433,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
|||||||
|
|
||||||
call wall_time(wall_1)
|
call wall_time(wall_1)
|
||||||
call cpu_time(cpu_1)
|
call cpu_time(cpu_1)
|
||||||
double precision :: accu_bis
|
|
||||||
accu_bis = 0.d0
|
|
||||||
!$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
!$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
|
||||||
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
!$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
|
||||||
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
!$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, &
|
||||||
!$OMP wall_0,thread_num,accu_bis) &
|
!$OMP wall_0,thread_num) &
|
||||||
!$OMP DEFAULT(NONE) &
|
!$OMP DEFAULT(NONE) &
|
||||||
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, &
|
!$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, &
|
||||||
!$OMP mo_coef_transp, &
|
!$OMP mo_coef_transp, &
|
||||||
@ -636,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
|
|||||||
!$OMP END DO NOWAIT
|
!$OMP END DO NOWAIT
|
||||||
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
|
||||||
|
|
||||||
integer :: index_needed
|
|
||||||
|
|
||||||
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
|
||||||
real(mo_integrals_threshold,integral_kind))
|
real(mo_integrals_threshold,integral_kind))
|
||||||
deallocate(buffer_i, buffer_value)
|
deallocate(buffer_i, buffer_value)
|
||||||
|
@ -37,3 +37,27 @@ type: logical
|
|||||||
doc: If true, the calculation uses periodic boundary conditions
|
doc: If true, the calculation uses periodic boundary conditions
|
||||||
interface: ezfio, provider, ocaml
|
interface: ezfio, provider, ocaml
|
||||||
default: false
|
default: false
|
||||||
|
[n_pts_charge]
|
||||||
|
type: integer
|
||||||
|
doc: Number of point charges to be added to the potential
|
||||||
|
interface: ezfio
|
||||||
|
default: 0
|
||||||
|
|
||||||
|
[pts_charge_z]
|
||||||
|
type: double precision
|
||||||
|
doc: Charge associated to each point charge
|
||||||
|
interface: ezfio
|
||||||
|
size: (nuclei.n_pts_charge)
|
||||||
|
|
||||||
|
[pts_charge_coord]
|
||||||
|
type: double precision
|
||||||
|
doc: Coordinate of each point charge.
|
||||||
|
interface: ezfio
|
||||||
|
size: (nuclei.n_pts_charge,3)
|
||||||
|
|
||||||
|
[point_charges]
|
||||||
|
type: logical
|
||||||
|
doc: If |true|, point charges (see nuclei/write_pt_charges.py) are added to the one-electron potential
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: False
|
||||||
|
|
||||||
|
@ -205,6 +205,9 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ]
|
|||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
nuclear_repulsion *= 0.5d0
|
nuclear_repulsion *= 0.5d0
|
||||||
|
if(point_charges)then
|
||||||
|
nuclear_repulsion += pt_chrg_nuclei_interaction + pt_chrg_interaction
|
||||||
|
endif
|
||||||
end if
|
end if
|
||||||
|
|
||||||
call write_time(6)
|
call write_time(6)
|
||||||
|
209
src/nuclei/point_charges.irp.f
Normal file
209
src/nuclei/point_charges.irp.f
Normal file
@ -0,0 +1,209 @@
|
|||||||
|
! ---
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ integer, n_pts_charge ]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Number of point charges to be added to the potential
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
logical :: has
|
||||||
|
PROVIDE ezfio_filename
|
||||||
|
if (mpi_master) then
|
||||||
|
|
||||||
|
call ezfio_has_nuclei_n_pts_charge(has)
|
||||||
|
if (has) then
|
||||||
|
write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
|
||||||
|
call ezfio_get_nuclei_n_pts_charge(n_pts_charge)
|
||||||
|
else
|
||||||
|
print *, 'nuclei/n_pts_charge not found in EZFIO file'
|
||||||
|
stop 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
IRP_IF MPI_DEBUG
|
||||||
|
print *, irp_here, mpi_rank
|
||||||
|
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||||
|
IRP_ENDIF
|
||||||
|
IRP_IF MPI
|
||||||
|
include 'mpif.h'
|
||||||
|
integer :: ierr
|
||||||
|
call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
stop 'Unable to read n_pts_charge with MPI'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
call write_time(6)
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Charge associated to each point charge.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
logical :: exists
|
||||||
|
|
||||||
|
PROVIDE ezfio_filename
|
||||||
|
|
||||||
|
if (mpi_master) then
|
||||||
|
call ezfio_has_nuclei_pts_charge_z(exists)
|
||||||
|
endif
|
||||||
|
|
||||||
|
IRP_IF MPI_DEBUG
|
||||||
|
print *, irp_here, mpi_rank
|
||||||
|
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
IRP_IF MPI
|
||||||
|
include 'mpif.h'
|
||||||
|
integer :: ierr
|
||||||
|
call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
stop 'Unable to read pts_charge_z with MPI'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
if (exists) then
|
||||||
|
|
||||||
|
if (mpi_master) then
|
||||||
|
write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
|
||||||
|
call ezfio_get_nuclei_pts_charge_z(pts_charge_z)
|
||||||
|
IRP_IF MPI
|
||||||
|
call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
stop 'Unable to read pts_charge_z with MPI'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
endif
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
integer :: i
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
pts_charge_z(i) = 0.d0
|
||||||
|
enddo
|
||||||
|
|
||||||
|
endif
|
||||||
|
print*,'Point charges '
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Coordinates of each point charge.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
logical :: exists
|
||||||
|
|
||||||
|
PROVIDE ezfio_filename
|
||||||
|
|
||||||
|
if (mpi_master) then
|
||||||
|
call ezfio_has_nuclei_pts_charge_coord(exists)
|
||||||
|
endif
|
||||||
|
|
||||||
|
IRP_IF MPI_DEBUG
|
||||||
|
print *, irp_here, mpi_rank
|
||||||
|
call MPI_BARRIER(MPI_COMM_WORLD, ierr)
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
IRP_IF MPI
|
||||||
|
include 'mpif.h'
|
||||||
|
integer :: ierr
|
||||||
|
call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
stop 'Unable to read pts_charge_coord with MPI'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
|
||||||
|
if (exists) then
|
||||||
|
|
||||||
|
if (mpi_master) then
|
||||||
|
double precision, allocatable :: buffer(:,:)
|
||||||
|
allocate (buffer(n_pts_charge,3))
|
||||||
|
write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
|
||||||
|
call ezfio_get_nuclei_pts_charge_coord(buffer)
|
||||||
|
integer :: i,j
|
||||||
|
do i=1,3
|
||||||
|
do j=1,n_pts_charge
|
||||||
|
pts_charge_coord(j,i) = buffer(j,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
deallocate(buffer)
|
||||||
|
IRP_IF MPI
|
||||||
|
call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
|
||||||
|
if (ierr /= MPI_SUCCESS) then
|
||||||
|
stop 'Unable to read pts_charge_coord with MPI'
|
||||||
|
endif
|
||||||
|
IRP_ENDIF
|
||||||
|
endif
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
pts_charge_coord(i,:) = 0.d0
|
||||||
|
enddo
|
||||||
|
|
||||||
|
endif
|
||||||
|
print*,'Coordinates for the point charges '
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
! ---
|
||||||
|
BEGIN_PROVIDER [ double precision, pt_chrg_interaction]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Interaction between the point charges
|
||||||
|
END_DOC
|
||||||
|
integer :: i,j
|
||||||
|
double precision :: Z_A, z_B,A_center(3), B_center(3), dist
|
||||||
|
pt_chrg_interaction = 0.d0
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
Z_A = pts_charge_z(i)
|
||||||
|
A_center(1:3) = pts_charge_coord(i,1:3)
|
||||||
|
do j = i+1, n_pts_charge
|
||||||
|
Z_B = pts_charge_z(j)
|
||||||
|
B_center(1:3) = pts_charge_coord(j,1:3)
|
||||||
|
dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
|
||||||
|
dist = dsqrt(dist)
|
||||||
|
pt_chrg_interaction += Z_A*Z_B/dist
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'Interaction between the point charges '
|
||||||
|
print*,'pt_chrg_interaction = ',pt_chrg_interaction
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_interaction]
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! repulsion between the point charges and the nuclei
|
||||||
|
END_DOC
|
||||||
|
integer :: i,j
|
||||||
|
double precision :: Z_A, z_B,A_center(3), B_center(3), dist
|
||||||
|
pt_chrg_nuclei_interaction = 0.d0
|
||||||
|
do i = 1, n_pts_charge
|
||||||
|
Z_A = pts_charge_z(i)
|
||||||
|
A_center(1:3) = pts_charge_coord(i,1:3)
|
||||||
|
do j = 1, nucl_num
|
||||||
|
Z_B = nucl_charge(j)
|
||||||
|
B_center(1:3) = nucl_coord(j,1:3)
|
||||||
|
dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
|
||||||
|
dist = dsqrt(dist)
|
||||||
|
pt_chrg_nuclei_interaction += Z_A*Z_B/dist
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'Interaction between point charges and nuclei'
|
||||||
|
print*,'pt_chrg_nuclei_interaction = ',pt_chrg_nuclei_interaction
|
||||||
|
END_PROVIDER
|
||||||
|
|
@ -16,7 +16,7 @@
|
|||||||
else
|
else
|
||||||
ref_tc_energy_3e = 0.d0
|
ref_tc_energy_3e = 0.d0
|
||||||
endif
|
endif
|
||||||
ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e
|
ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot)
|
subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot)
|
||||||
@ -88,7 +88,7 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree,
|
|||||||
call a_tc_operator ( occ_hole (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
|
call a_tc_operator ( occ_hole (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
htot = hmono+htwoe+hthree
|
htot = hmono+htwoe+hthree+nuclear_repulsion
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
|
subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
|
||||||
|
@ -17,6 +17,7 @@ subroutine routine_active_only
|
|||||||
double precision :: wee_ab_st_av, rdm_ab_st_av
|
double precision :: wee_ab_st_av, rdm_ab_st_av
|
||||||
double precision :: wee_tot_st_av, rdm_tot_st_av,spin_trace
|
double precision :: wee_tot_st_av, rdm_tot_st_av,spin_trace
|
||||||
double precision :: wee_aa_st_av_2,wee_ab_st_av_2,wee_bb_st_av_2,wee_tot_st_av_2,wee_tot_st_av_3
|
double precision :: wee_aa_st_av_2,wee_ab_st_av_2,wee_bb_st_av_2,wee_tot_st_av_2,wee_tot_st_av_3
|
||||||
|
double precision :: accu_aa, accu_bb, accu_ab, accu_tot
|
||||||
|
|
||||||
wee_ab = 0.d0
|
wee_ab = 0.d0
|
||||||
wee_bb = 0.d0
|
wee_bb = 0.d0
|
||||||
@ -64,14 +65,23 @@ subroutine routine_active_only
|
|||||||
do istate = 1, N_states
|
do istate = 1, N_states
|
||||||
!! PURE ACTIVE PART
|
!! PURE ACTIVE PART
|
||||||
!!
|
!!
|
||||||
|
accu_aa = 0.d0
|
||||||
|
accu_bb = 0.d0
|
||||||
|
accu_ab = 0.d0
|
||||||
|
accu_tot = 0.d0
|
||||||
do i = 1, n_act_orb
|
do i = 1, n_act_orb
|
||||||
iorb = list_act(i)
|
iorb = list_act(i)
|
||||||
do j = 1, n_act_orb
|
do j = 1, n_act_orb
|
||||||
jorb = list_act(j)
|
jorb = list_act(j)
|
||||||
|
accu_bb += act_2_rdm_bb_mo(j,i,j,i,1)
|
||||||
|
accu_aa += act_2_rdm_aa_mo(j,i,j,i,1)
|
||||||
|
accu_ab += act_2_rdm_ab_mo(j,i,j,i,1)
|
||||||
|
accu_tot += act_2_rdm_spin_trace_mo(j,i,j,i,1)
|
||||||
do k = 1, n_act_orb
|
do k = 1, n_act_orb
|
||||||
korb = list_act(k)
|
korb = list_act(k)
|
||||||
do l = 1, n_act_orb
|
do l = 1, n_act_orb
|
||||||
lorb = list_act(l)
|
lorb = list_act(l)
|
||||||
|
! 1 2 1 2 2 1 2 1
|
||||||
if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate)).gt.1.d-10)then
|
if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate)).gt.1.d-10)then
|
||||||
print*,'Error in act_2_rdm_spin_trace_mo'
|
print*,'Error in act_2_rdm_spin_trace_mo'
|
||||||
print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l) - act_2_rdm_spin_trace_mo(j,i,l,k)).gt.1.d-10"
|
print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l) - act_2_rdm_spin_trace_mo(j,i,l,k)).gt.1.d-10"
|
||||||
@ -79,6 +89,7 @@ subroutine routine_active_only
|
|||||||
print*,act_2_rdm_spin_trace_mo(i,j,k,l,istate),act_2_rdm_spin_trace_mo(j,i,l,k,istate),dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate))
|
print*,act_2_rdm_spin_trace_mo(i,j,k,l,istate),act_2_rdm_spin_trace_mo(j,i,l,k,istate),dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(j,i,l,k,istate))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
! 1 2 1 2 1 2 1 2
|
||||||
if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate)).gt.1.d-10)then
|
if(dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate)).gt.1.d-10)then
|
||||||
print*,'Error in act_2_rdm_spin_trace_mo'
|
print*,'Error in act_2_rdm_spin_trace_mo'
|
||||||
print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate),istate).gt.1.d-10"
|
print*,"dabs(act_2_rdm_spin_trace_mo(i,j,k,l,istate) - act_2_rdm_spin_trace_mo(k,l,i,j,istate),istate).gt.1.d-10"
|
||||||
@ -131,6 +142,15 @@ subroutine routine_active_only
|
|||||||
print*,'wee_tot = ',wee_tot(istate)
|
print*,'wee_tot = ',wee_tot(istate)
|
||||||
print*,'Full energy '
|
print*,'Full energy '
|
||||||
print*,'psi_energy_two_e(istate)= ',psi_energy_two_e(istate)
|
print*,'psi_energy_two_e(istate)= ',psi_energy_two_e(istate)
|
||||||
|
print*,'--------------------------'
|
||||||
|
print*,'accu_aa = ',accu_aa
|
||||||
|
print*,'N_a (N_a-1)/2 = ', elec_alpha_num*(elec_alpha_num-1)*0.5
|
||||||
|
print*,'accu_bb = ',accu_bb
|
||||||
|
print*,'N_b (N_b-1)/2 = ', elec_beta_num*(elec_beta_num-1)*0.5
|
||||||
|
print*,'accu_ab = ',accu_ab
|
||||||
|
print*,'N_a N_b = ', elec_beta_num*elec_alpha_num
|
||||||
|
print*,'accu_tot = ',accu_tot
|
||||||
|
print*,'Ne(Ne-1)/2 = ',(elec_num-1)*elec_num * 0.5
|
||||||
enddo
|
enddo
|
||||||
wee_aa_st_av = 0.d0
|
wee_aa_st_av = 0.d0
|
||||||
wee_bb_st_av = 0.d0
|
wee_bb_st_av = 0.d0
|
||||||
|
@ -14,6 +14,7 @@ double precision, parameter :: thresh = 1.d-15
|
|||||||
double precision, parameter :: cx_lda = -0.73855876638202234d0
|
double precision, parameter :: cx_lda = -0.73855876638202234d0
|
||||||
double precision, parameter :: c_2_4_3 = 2.5198420997897464d0
|
double precision, parameter :: c_2_4_3 = 2.5198420997897464d0
|
||||||
double precision, parameter :: cst_lda = -0.93052573634909996d0
|
double precision, parameter :: cst_lda = -0.93052573634909996d0
|
||||||
double precision, parameter :: c_4_3 = 1.3333333333333333d0
|
double precision, parameter :: c_4_3 = 4.d0/3.d0
|
||||||
double precision, parameter :: c_1_3 = 0.3333333333333333d0
|
double precision, parameter :: c_1_3 = 1.d0/3.d0
|
||||||
|
double precision, parameter :: sq_op5 = dsqrt(0.5d0)
|
||||||
|
double precision, parameter :: dlog_2pi = dlog(2.d0*dacos(-1.d0))
|
||||||
|
71
src/utils/format_w_error.irp.f
Normal file
71
src/utils/format_w_error.irp.f
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_error)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Format for double precision, value(error)
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
! in
|
||||||
|
! | value | double precision | value... |
|
||||||
|
! | error | double precision | error... |
|
||||||
|
! | size_nb | integer | X in FX.Y |
|
||||||
|
! | max_nb_digits | integer | Max Y in FX.Y |
|
||||||
|
|
||||||
|
! out
|
||||||
|
! | format_value | character | string FX.Y for the format |
|
||||||
|
! | str_error | character | string of the error |
|
||||||
|
|
||||||
|
! internal
|
||||||
|
! | str_size | character | size in string format |
|
||||||
|
! | nb_digits | integer | number of digits Y in FX.Y depending of the error |
|
||||||
|
! | str_nb_digits | character | nb_digits in string format |
|
||||||
|
! | str_exp | character | string of the value in exponential format |
|
||||||
|
|
||||||
|
! in
|
||||||
|
double precision, intent(in) :: error, value
|
||||||
|
integer, intent(in) :: size_nb, max_nb_digits
|
||||||
|
|
||||||
|
! out
|
||||||
|
character(len=20), intent(out) :: str_error, format_value
|
||||||
|
|
||||||
|
! internal
|
||||||
|
character(len=20) :: str_size, str_nb_digits, str_exp
|
||||||
|
integer :: nb_digits
|
||||||
|
|
||||||
|
! max_nb_digit: Y max
|
||||||
|
! size_nb = Size of the double: X (FX.Y)
|
||||||
|
write(str_size,'(I3)') size_nb
|
||||||
|
|
||||||
|
! Error
|
||||||
|
write(str_exp,'(1pE20.0)') error
|
||||||
|
str_error = trim(adjustl(str_exp))
|
||||||
|
|
||||||
|
! Number of digit: Y (FX.Y) from the exponent
|
||||||
|
str_nb_digits = str_exp(19:20)
|
||||||
|
read(str_nb_digits,*) nb_digits
|
||||||
|
|
||||||
|
! If the error is 0d0
|
||||||
|
if (error <= 1d-16) then
|
||||||
|
write(str_nb_digits,*) max_nb_digits
|
||||||
|
endif
|
||||||
|
|
||||||
|
! If the error is too small
|
||||||
|
if (nb_digits > max_nb_digits) then
|
||||||
|
write(str_nb_digits,*) max_nb_digits
|
||||||
|
str_error(1:1) = '0'
|
||||||
|
endif
|
||||||
|
|
||||||
|
! If the error is too big (>= 0.5)
|
||||||
|
if (error >= 0.5d0) then
|
||||||
|
str_nb_digits = '1'
|
||||||
|
str_error(1:1) = '*'
|
||||||
|
endif
|
||||||
|
|
||||||
|
! FX.Y,A1,A1,A1 for value(str_error)
|
||||||
|
!string = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))//',A1,A1,A1'
|
||||||
|
|
||||||
|
! FX.Y just for the value
|
||||||
|
format_value = 'F'//trim(adjustl(str_size))//'.'//trim(adjustl(str_nb_digits))
|
||||||
|
|
||||||
|
end
|
@ -238,11 +238,11 @@ subroutine cache_map_sort(map)
|
|||||||
iorder(i) = i
|
iorder(i) = i
|
||||||
enddo
|
enddo
|
||||||
if (cache_key_kind == 2) then
|
if (cache_key_kind == 2) then
|
||||||
call i2radix_sort(map%key,iorder,map%n_elements,-1)
|
call i2sort(map%key,iorder,map%n_elements,-1)
|
||||||
else if (cache_key_kind == 4) then
|
else if (cache_key_kind == 4) then
|
||||||
call iradix_sort(map%key,iorder,map%n_elements,-1)
|
call isort(map%key,iorder,map%n_elements,-1)
|
||||||
else if (cache_key_kind == 8) then
|
else if (cache_key_kind == 8) then
|
||||||
call i8radix_sort(map%key,iorder,map%n_elements,-1)
|
call i8sort(map%key,iorder,map%n_elements,-1)
|
||||||
endif
|
endif
|
||||||
if (integral_kind == 4) then
|
if (integral_kind == 4) then
|
||||||
call set_order(map%value,iorder,map%n_elements)
|
call set_order(map%value,iorder,map%n_elements)
|
||||||
|
373
src/utils/qsort.c
Normal file
373
src/utils/qsort.c
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
struct int16_t_comp {
|
||||||
|
int16_t x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int16_t( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int16_t * restrict _l= l;
|
||||||
|
const int16_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct int16_t_comp_big {
|
||||||
|
int16_t x;
|
||||||
|
int64_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int16_t_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int16_t * restrict _l= l;
|
||||||
|
const int16_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
|
||||||
|
struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct int32_t_comp {
|
||||||
|
int32_t x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int32_t( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int32_t * restrict _l= l;
|
||||||
|
const int32_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct int32_t_comp_big {
|
||||||
|
int32_t x;
|
||||||
|
int64_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int32_t_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int32_t * restrict _l= l;
|
||||||
|
const int32_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
|
||||||
|
struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct int64_t_comp {
|
||||||
|
int64_t x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int64_t( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int64_t * restrict _l= l;
|
||||||
|
const int64_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct int64_t_comp_big {
|
||||||
|
int64_t x;
|
||||||
|
int64_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_int64_t_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const int64_t * restrict _l= l;
|
||||||
|
const int64_t * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
|
||||||
|
struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct double_comp {
|
||||||
|
double x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_double( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const double * restrict _l= l;
|
||||||
|
const double * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct double_comp* A = malloc(isize * sizeof(struct double_comp));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_double_noidx(double* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct double_comp_big {
|
||||||
|
double x;
|
||||||
|
int64_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_double_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const double * restrict _l= l;
|
||||||
|
const double * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
|
||||||
|
struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_double_noidx_big(double* A, int64_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct float_comp {
|
||||||
|
float x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_float( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const float * restrict _l= l;
|
||||||
|
const float * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct float_comp* A = malloc(isize * sizeof(struct float_comp));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_float_noidx(float* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct float_comp_big {
|
||||||
|
float x;
|
||||||
|
int64_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_float_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const float * restrict _l= l;
|
||||||
|
const float * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
|
||||||
|
struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_float_noidx_big(float* A, int64_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
|
||||||
|
}
|
||||||
|
/* Generated C file:1 ends here */
|
169
src/utils/qsort.org
Normal file
169
src/utils/qsort.org
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
#+TITLE: Quick sort binding for Fortran
|
||||||
|
|
||||||
|
* C template
|
||||||
|
|
||||||
|
#+NAME: c_template
|
||||||
|
#+BEGIN_SRC c
|
||||||
|
struct TYPE_comp_big {
|
||||||
|
TYPE x;
|
||||||
|
int32_t i;
|
||||||
|
};
|
||||||
|
|
||||||
|
int compare_TYPE_big( const void * l, const void * r )
|
||||||
|
{
|
||||||
|
const TYPE * restrict _l= l;
|
||||||
|
const TYPE * restrict _r= r;
|
||||||
|
if( *_l > *_r ) return 1;
|
||||||
|
if( *_l < *_r ) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
|
||||||
|
struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
|
||||||
|
if (A == NULL) return;
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A[i].x = A_in[i];
|
||||||
|
A[i].i = iorder[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
|
||||||
|
|
||||||
|
for (int i=0 ; i<isize ; ++i) {
|
||||||
|
A_in[i] = A[i].x;
|
||||||
|
iorder[i] = A[i].i;
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
|
||||||
|
qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
|
||||||
|
}
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
* Fortran template
|
||||||
|
|
||||||
|
#+NAME:f_template
|
||||||
|
#+BEGIN_SRC f90
|
||||||
|
subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_TYPE) :: A(isize)
|
||||||
|
end subroutine Lsort_big_c
|
||||||
|
|
||||||
|
subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
real (c_TYPE) :: A(isize)
|
||||||
|
end subroutine Lsort_noidx_big_c
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
#+NAME:f_template2
|
||||||
|
#+BEGIN_SRC f90
|
||||||
|
subroutine Lsort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_TYPE) :: A(isize)
|
||||||
|
call Lsort_big_c(A, iorder, isize)
|
||||||
|
end subroutine Lsort_big
|
||||||
|
|
||||||
|
subroutine Lsort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
real (c_TYPE) :: A(isize)
|
||||||
|
call Lsort_noidx_big_c(A, isize)
|
||||||
|
end subroutine Lsort_noidx_big
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
* Python scripts for type replacements
|
||||||
|
|
||||||
|
#+NAME: replaced
|
||||||
|
#+begin_src python :results output :noweb yes
|
||||||
|
data = """
|
||||||
|
<<c_template>>
|
||||||
|
"""
|
||||||
|
for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
|
||||||
|
print( data.replace("TYPE", typ).replace("_big", "") )
|
||||||
|
print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME: replaced_f
|
||||||
|
#+begin_src python :results output :noweb yes
|
||||||
|
data = """
|
||||||
|
<<f_template>>
|
||||||
|
"""
|
||||||
|
c1 = {
|
||||||
|
"int16_t": "i2",
|
||||||
|
"int32_t": "i",
|
||||||
|
"int64_t": "i8",
|
||||||
|
"double": "d",
|
||||||
|
"float": ""
|
||||||
|
}
|
||||||
|
c2 = {
|
||||||
|
"int16_t": "integer",
|
||||||
|
"int32_t": "integer",
|
||||||
|
"int64_t": "integer",
|
||||||
|
"double": "real",
|
||||||
|
"float": "real"
|
||||||
|
}
|
||||||
|
|
||||||
|
for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
|
||||||
|
print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
|
||||||
|
print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
#+NAME: replaced_f2
|
||||||
|
#+begin_src python :results output :noweb yes
|
||||||
|
data = """
|
||||||
|
<<f_template2>>
|
||||||
|
"""
|
||||||
|
c1 = {
|
||||||
|
"int16_t": "i2",
|
||||||
|
"int32_t": "i",
|
||||||
|
"int64_t": "i8",
|
||||||
|
"double": "d",
|
||||||
|
"float": ""
|
||||||
|
}
|
||||||
|
c2 = {
|
||||||
|
"int16_t": "integer",
|
||||||
|
"int32_t": "integer",
|
||||||
|
"int64_t": "integer",
|
||||||
|
"double": "real",
|
||||||
|
"float": "real"
|
||||||
|
}
|
||||||
|
|
||||||
|
for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
|
||||||
|
print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
|
||||||
|
print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
* Generated C file
|
||||||
|
|
||||||
|
#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
<<replaced()>>
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
* Generated Fortran file
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
|
||||||
|
module qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
|
||||||
|
interface
|
||||||
|
<<replaced_f()>>
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module qsort_module
|
||||||
|
|
||||||
|
<<replaced_f2()>>
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
347
src/utils/qsort_module.f90
Normal file
347
src/utils/qsort_module.f90
Normal file
@ -0,0 +1,347 @@
|
|||||||
|
module qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
|
||||||
|
interface
|
||||||
|
|
||||||
|
subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
end subroutine i2sort_c
|
||||||
|
|
||||||
|
subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
end subroutine i2sort_noidx_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
end subroutine i2sort_big_c
|
||||||
|
|
||||||
|
subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
end subroutine i2sort_noidx_big_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
end subroutine isort_c
|
||||||
|
|
||||||
|
subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
end subroutine isort_noidx_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
end subroutine isort_big_c
|
||||||
|
|
||||||
|
subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
end subroutine isort_noidx_big_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
end subroutine i8sort_c
|
||||||
|
|
||||||
|
subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
end subroutine i8sort_noidx_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
end subroutine i8sort_big_c
|
||||||
|
|
||||||
|
subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
end subroutine i8sort_noidx_big_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
end subroutine dsort_c
|
||||||
|
|
||||||
|
subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
end subroutine dsort_noidx_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
end subroutine dsort_big_c
|
||||||
|
|
||||||
|
subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
end subroutine dsort_noidx_big_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
end subroutine sort_c
|
||||||
|
|
||||||
|
subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t), value :: isize
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
end subroutine sort_noidx_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
end subroutine sort_big_c
|
||||||
|
|
||||||
|
subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t), value :: isize
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
end subroutine sort_noidx_big_c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end interface
|
||||||
|
|
||||||
|
end module qsort_module
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i2sort(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
call i2sort_c(A, iorder, isize)
|
||||||
|
end subroutine i2sort
|
||||||
|
|
||||||
|
subroutine i2sort_noidx(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
call i2sort_noidx_c(A, isize)
|
||||||
|
end subroutine i2sort_noidx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i2sort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
call i2sort_big_c(A, iorder, isize)
|
||||||
|
end subroutine i2sort_big
|
||||||
|
|
||||||
|
subroutine i2sort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer (c_int16_t) :: A(isize)
|
||||||
|
call i2sort_noidx_big_c(A, isize)
|
||||||
|
end subroutine i2sort_noidx_big
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine isort(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
call isort_c(A, iorder, isize)
|
||||||
|
end subroutine isort
|
||||||
|
|
||||||
|
subroutine isort_noidx(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
call isort_noidx_c(A, isize)
|
||||||
|
end subroutine isort_noidx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine isort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
call isort_big_c(A, iorder, isize)
|
||||||
|
end subroutine isort_big
|
||||||
|
|
||||||
|
subroutine isort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer (c_int32_t) :: A(isize)
|
||||||
|
call isort_noidx_big_c(A, isize)
|
||||||
|
end subroutine isort_noidx_big
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i8sort(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
call i8sort_c(A, iorder, isize)
|
||||||
|
end subroutine i8sort
|
||||||
|
|
||||||
|
subroutine i8sort_noidx(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
call i8sort_noidx_c(A, isize)
|
||||||
|
end subroutine i8sort_noidx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine i8sort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
call i8sort_big_c(A, iorder, isize)
|
||||||
|
end subroutine i8sort_big
|
||||||
|
|
||||||
|
subroutine i8sort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer (c_int64_t) :: A(isize)
|
||||||
|
call i8sort_noidx_big_c(A, isize)
|
||||||
|
end subroutine i8sort_noidx_big
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine dsort(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
call dsort_c(A, iorder, isize)
|
||||||
|
end subroutine dsort
|
||||||
|
|
||||||
|
subroutine dsort_noidx(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
call dsort_noidx_c(A, isize)
|
||||||
|
end subroutine dsort_noidx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine dsort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
call dsort_big_c(A, iorder, isize)
|
||||||
|
end subroutine dsort_big
|
||||||
|
|
||||||
|
subroutine dsort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
real (c_double) :: A(isize)
|
||||||
|
call dsort_noidx_big_c(A, isize)
|
||||||
|
end subroutine dsort_noidx_big
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine sort(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
integer(c_int32_t) :: iorder(isize)
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
call sort_c(A, iorder, isize)
|
||||||
|
end subroutine sort
|
||||||
|
|
||||||
|
subroutine sort_noidx(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int32_t) :: isize
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
call sort_noidx_c(A, isize)
|
||||||
|
end subroutine sort_noidx
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine sort_big(A, iorder, isize)
|
||||||
|
use qsort_module
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
integer(c_int64_t) :: iorder(isize)
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
call sort_big_c(A, iorder, isize)
|
||||||
|
end subroutine sort_big
|
||||||
|
|
||||||
|
subroutine sort_noidx_big(A, isize)
|
||||||
|
use iso_c_binding
|
||||||
|
use qsort_module
|
||||||
|
integer(c_int64_t) :: isize
|
||||||
|
real (c_float) :: A(isize)
|
||||||
|
call sort_noidx_big_c(A, isize)
|
||||||
|
end subroutine sort_noidx_big
|
@ -1,222 +1,4 @@
|
|||||||
BEGIN_TEMPLATE
|
BEGIN_TEMPLATE
|
||||||
subroutine insertion_$Xsort (x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize) using the insertion sort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
$type :: xtmp
|
|
||||||
integer :: i, i0, j, jmax
|
|
||||||
|
|
||||||
do i=2,isize
|
|
||||||
xtmp = x(i)
|
|
||||||
i0 = iorder(i)
|
|
||||||
j=i-1
|
|
||||||
do while (j>0)
|
|
||||||
if ((x(j) <= xtmp)) exit
|
|
||||||
x(j+1) = x(j)
|
|
||||||
iorder(j+1) = iorder(j)
|
|
||||||
j=j-1
|
|
||||||
enddo
|
|
||||||
x(j+1) = xtmp
|
|
||||||
iorder(j+1) = i0
|
|
||||||
enddo
|
|
||||||
end subroutine insertion_$Xsort
|
|
||||||
|
|
||||||
subroutine quick_$Xsort(x, iorder, isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize) using the quicksort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer, external :: omp_get_num_threads
|
|
||||||
call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
|
|
||||||
end
|
|
||||||
|
|
||||||
recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
|
|
||||||
implicit none
|
|
||||||
integer, intent(in) :: isize, first, last, level
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
$type, intent(inout) :: x(isize)
|
|
||||||
$type :: c, tmp
|
|
||||||
integer :: itmp
|
|
||||||
integer :: i, j
|
|
||||||
|
|
||||||
if(isize<2)return
|
|
||||||
|
|
||||||
c = x( shiftr(first+last,1) )
|
|
||||||
i = first
|
|
||||||
j = last
|
|
||||||
do
|
|
||||||
do while (x(i) < c)
|
|
||||||
i=i+1
|
|
||||||
end do
|
|
||||||
do while (c < x(j))
|
|
||||||
j=j-1
|
|
||||||
end do
|
|
||||||
if (i >= j) exit
|
|
||||||
tmp = x(i)
|
|
||||||
x(i) = x(j)
|
|
||||||
x(j) = tmp
|
|
||||||
itmp = iorder(i)
|
|
||||||
iorder(i) = iorder(j)
|
|
||||||
iorder(j) = itmp
|
|
||||||
i=i+1
|
|
||||||
j=j-1
|
|
||||||
enddo
|
|
||||||
if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
|
|
||||||
if (first < i-1) then
|
|
||||||
call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
|
|
||||||
endif
|
|
||||||
if (j+1 < last) then
|
|
||||||
call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
if (first < i-1) then
|
|
||||||
call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
|
|
||||||
endif
|
|
||||||
if (j+1 < last) then
|
|
||||||
call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
end
|
|
||||||
|
|
||||||
subroutine heap_$Xsort(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize) using the heap sort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
|
|
||||||
integer :: i, k, j, l, i0
|
|
||||||
$type :: xtemp
|
|
||||||
|
|
||||||
l = isize/2+1
|
|
||||||
k = isize
|
|
||||||
do while (.True.)
|
|
||||||
if (l>1) then
|
|
||||||
l=l-1
|
|
||||||
xtemp = x(l)
|
|
||||||
i0 = iorder(l)
|
|
||||||
else
|
|
||||||
xtemp = x(k)
|
|
||||||
i0 = iorder(k)
|
|
||||||
x(k) = x(1)
|
|
||||||
iorder(k) = iorder(1)
|
|
||||||
k = k-1
|
|
||||||
if (k == 1) then
|
|
||||||
x(1) = xtemp
|
|
||||||
iorder(1) = i0
|
|
||||||
exit
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
i=l
|
|
||||||
j = shiftl(l,1)
|
|
||||||
do while (j<k)
|
|
||||||
if ( x(j) < x(j+1) ) then
|
|
||||||
j=j+1
|
|
||||||
endif
|
|
||||||
if (xtemp < x(j)) then
|
|
||||||
x(i) = x(j)
|
|
||||||
iorder(i) = iorder(j)
|
|
||||||
i = j
|
|
||||||
j = shiftl(j,1)
|
|
||||||
else
|
|
||||||
j = k+1
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
if (j==k) then
|
|
||||||
if (xtemp < x(j)) then
|
|
||||||
x(i) = x(j)
|
|
||||||
iorder(i) = iorder(j)
|
|
||||||
i = j
|
|
||||||
j = shiftl(j,1)
|
|
||||||
else
|
|
||||||
j = k+1
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
x(i) = xtemp
|
|
||||||
iorder(i) = i0
|
|
||||||
enddo
|
|
||||||
end subroutine heap_$Xsort
|
|
||||||
|
|
||||||
subroutine heap_$Xsort_big(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize) using the heap sort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
! This is a version for very large arrays where the indices need
|
|
||||||
! to be in integer*8 format
|
|
||||||
END_DOC
|
|
||||||
integer*8,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer*8,intent(inout) :: iorder(isize)
|
|
||||||
|
|
||||||
integer*8 :: i, k, j, l, i0
|
|
||||||
$type :: xtemp
|
|
||||||
|
|
||||||
l = isize/2+1
|
|
||||||
k = isize
|
|
||||||
do while (.True.)
|
|
||||||
if (l>1) then
|
|
||||||
l=l-1
|
|
||||||
xtemp = x(l)
|
|
||||||
i0 = iorder(l)
|
|
||||||
else
|
|
||||||
xtemp = x(k)
|
|
||||||
i0 = iorder(k)
|
|
||||||
x(k) = x(1)
|
|
||||||
iorder(k) = iorder(1)
|
|
||||||
k = k-1
|
|
||||||
if (k == 1) then
|
|
||||||
x(1) = xtemp
|
|
||||||
iorder(1) = i0
|
|
||||||
exit
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
i=l
|
|
||||||
j = shiftl(l,1)
|
|
||||||
do while (j<k)
|
|
||||||
if ( x(j) < x(j+1) ) then
|
|
||||||
j=j+1
|
|
||||||
endif
|
|
||||||
if (xtemp < x(j)) then
|
|
||||||
x(i) = x(j)
|
|
||||||
iorder(i) = iorder(j)
|
|
||||||
i = j
|
|
||||||
j = shiftl(j,1)
|
|
||||||
else
|
|
||||||
j = k+1
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
if (j==k) then
|
|
||||||
if (xtemp < x(j)) then
|
|
||||||
x(i) = x(j)
|
|
||||||
iorder(i) = iorder(j)
|
|
||||||
i = j
|
|
||||||
j = shiftl(j,1)
|
|
||||||
else
|
|
||||||
j = k+1
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
x(i) = xtemp
|
|
||||||
iorder(i) = i0
|
|
||||||
enddo
|
|
||||||
|
|
||||||
end subroutine heap_$Xsort_big
|
|
||||||
|
|
||||||
subroutine sorted_$Xnumber(x,isize,n)
|
subroutine sorted_$Xnumber(x,isize,n)
|
||||||
implicit none
|
implicit none
|
||||||
@ -250,222 +32,6 @@ SUBST [ X, type ]
|
|||||||
END_TEMPLATE
|
END_TEMPLATE
|
||||||
|
|
||||||
|
|
||||||
!---------------------- INTEL
|
|
||||||
IRP_IF INTEL
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
subroutine $Xsort(x,iorder,isize)
|
|
||||||
use intel
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer :: n
|
|
||||||
character, allocatable :: tmp(:)
|
|
||||||
if (isize < 2) return
|
|
||||||
call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
|
|
||||||
allocate(tmp(n))
|
|
||||||
call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
|
|
||||||
deallocate(tmp)
|
|
||||||
iorder(1:isize) = iorder(1:isize)+1
|
|
||||||
call $Xset_order(x,iorder,isize)
|
|
||||||
end
|
|
||||||
|
|
||||||
subroutine $Xsort_noidx(x,isize)
|
|
||||||
use intel
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer :: n
|
|
||||||
character, allocatable :: tmp(:)
|
|
||||||
if (isize < 2) return
|
|
||||||
call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
|
|
||||||
allocate(tmp(n))
|
|
||||||
call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
|
|
||||||
deallocate(tmp)
|
|
||||||
end
|
|
||||||
|
|
||||||
SUBST [ X, type, ityp, n, ippsz ]
|
|
||||||
; real ; 32f ; 4 ; 13 ;;
|
|
||||||
i ; integer ; 32s ; 4 ; 11 ;;
|
|
||||||
i2 ; integer*2 ; 16s ; 2 ; 7 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
subroutine $Xsort(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer :: n
|
|
||||||
if (isize < 2) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
! call sorted_$Xnumber(x,isize,n)
|
|
||||||
! if (isize == n) then
|
|
||||||
! return
|
|
||||||
! endif
|
|
||||||
if ( isize < 32) then
|
|
||||||
call insertion_$Xsort(x,iorder,isize)
|
|
||||||
else
|
|
||||||
! call heap_$Xsort(x,iorder,isize)
|
|
||||||
call quick_$Xsort(x,iorder,isize)
|
|
||||||
endif
|
|
||||||
end subroutine $Xsort
|
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
d ; double precision ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
subroutine $Xsort(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer :: n
|
|
||||||
if (isize < 2) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
call sorted_$Xnumber(x,isize,n)
|
|
||||||
if (isize == n) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
if ( isize < 32) then
|
|
||||||
call insertion_$Xsort(x,iorder,isize)
|
|
||||||
else
|
|
||||||
! call $Xradix_sort(x,iorder,isize,-1)
|
|
||||||
call quick_$Xsort(x,iorder,isize)
|
|
||||||
endif
|
|
||||||
end subroutine $Xsort
|
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
i8 ; integer*8 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
!---------------------- END INTEL
|
|
||||||
IRP_ELSE
|
|
||||||
!---------------------- NON-INTEL
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
subroutine $Xsort_noidx(x,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer, allocatable :: iorder(:)
|
|
||||||
integer :: i
|
|
||||||
allocate(iorder(isize))
|
|
||||||
do i=1,isize
|
|
||||||
iorder(i)=i
|
|
||||||
enddo
|
|
||||||
call $Xsort(x,iorder,isize)
|
|
||||||
deallocate(iorder)
|
|
||||||
end subroutine $Xsort_noidx
|
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
; real ;;
|
|
||||||
d ; double precision ;;
|
|
||||||
i ; integer ;;
|
|
||||||
i8 ; integer*8 ;;
|
|
||||||
i2 ; integer*2 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
subroutine $Xsort(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer :: n
|
|
||||||
if (isize < 2) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
! call sorted_$Xnumber(x,isize,n)
|
|
||||||
! if (isize == n) then
|
|
||||||
! return
|
|
||||||
! endif
|
|
||||||
if ( isize < 32) then
|
|
||||||
call insertion_$Xsort(x,iorder,isize)
|
|
||||||
else
|
|
||||||
! call heap_$Xsort(x,iorder,isize)
|
|
||||||
call quick_$Xsort(x,iorder,isize)
|
|
||||||
endif
|
|
||||||
end subroutine $Xsort
|
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
; real ;;
|
|
||||||
d ; double precision ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
subroutine $Xsort(x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize).
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
END_DOC
|
|
||||||
integer,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer,intent(inout) :: iorder(isize)
|
|
||||||
integer :: n
|
|
||||||
if (isize < 2) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
call sorted_$Xnumber(x,isize,n)
|
|
||||||
if (isize == n) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
if ( isize < 32) then
|
|
||||||
call insertion_$Xsort(x,iorder,isize)
|
|
||||||
else
|
|
||||||
! call $Xradix_sort(x,iorder,isize,-1)
|
|
||||||
call quick_$Xsort(x,iorder,isize)
|
|
||||||
endif
|
|
||||||
end subroutine $Xsort
|
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
i ; integer ;;
|
|
||||||
i8 ; integer*8 ;;
|
|
||||||
i2 ; integer*2 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
IRP_ENDIF
|
|
||||||
!---------------------- END NON-INTEL
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
BEGIN_TEMPLATE
|
||||||
subroutine $Xset_order(x,iorder,isize)
|
subroutine $Xset_order(x,iorder,isize)
|
||||||
@ -491,47 +57,6 @@ BEGIN_TEMPLATE
|
|||||||
deallocate(xtmp)
|
deallocate(xtmp)
|
||||||
end
|
end
|
||||||
|
|
||||||
SUBST [ X, type ]
|
|
||||||
; real ;;
|
|
||||||
d ; double precision ;;
|
|
||||||
i ; integer ;;
|
|
||||||
i8; integer*8 ;;
|
|
||||||
i2; integer*2 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
subroutine insertion_$Xsort_big (x,iorder,isize)
|
|
||||||
implicit none
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort array x(isize) using the insertion sort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
! This is a version for very large arrays where the indices need
|
|
||||||
! to be in integer*8 format
|
|
||||||
END_DOC
|
|
||||||
integer*8,intent(in) :: isize
|
|
||||||
$type,intent(inout) :: x(isize)
|
|
||||||
integer*8,intent(inout) :: iorder(isize)
|
|
||||||
$type :: xtmp
|
|
||||||
integer*8 :: i, i0, j, jmax
|
|
||||||
|
|
||||||
do i=2_8,isize
|
|
||||||
xtmp = x(i)
|
|
||||||
i0 = iorder(i)
|
|
||||||
j = i-1_8
|
|
||||||
do while (j>0_8)
|
|
||||||
if (x(j)<=xtmp) exit
|
|
||||||
x(j+1_8) = x(j)
|
|
||||||
iorder(j+1_8) = iorder(j)
|
|
||||||
j = j-1_8
|
|
||||||
enddo
|
|
||||||
x(j+1_8) = xtmp
|
|
||||||
iorder(j+1_8) = i0
|
|
||||||
enddo
|
|
||||||
|
|
||||||
end subroutine insertion_$Xsort_big
|
|
||||||
|
|
||||||
subroutine $Xset_order_big(x,iorder,isize)
|
subroutine $Xset_order_big(x,iorder,isize)
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
@ -565,223 +90,3 @@ SUBST [ X, type ]
|
|||||||
END_TEMPLATE
|
END_TEMPLATE
|
||||||
|
|
||||||
|
|
||||||
BEGIN_TEMPLATE
|
|
||||||
|
|
||||||
recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
BEGIN_DOC
|
|
||||||
! Sort integer array x(isize) using the radix sort algorithm.
|
|
||||||
! iorder in input should be (1,2,3,...,isize), and in output
|
|
||||||
! contains the new order of the elements.
|
|
||||||
! iradix should be -1 in input.
|
|
||||||
END_DOC
|
|
||||||
integer*$int_type, intent(in) :: isize
|
|
||||||
integer*$int_type, intent(inout) :: iorder(isize)
|
|
||||||
integer*$type, intent(inout) :: x(isize)
|
|
||||||
integer, intent(in) :: iradix
|
|
||||||
integer :: iradix_new
|
|
||||||
integer*$type, allocatable :: x2(:), x1(:)
|
|
||||||
integer*$type :: i4 ! data type
|
|
||||||
integer*$int_type, allocatable :: iorder1(:),iorder2(:)
|
|
||||||
integer*$int_type :: i0, i1, i2, i3, i ! index type
|
|
||||||
integer*$type :: mask
|
|
||||||
integer :: err
|
|
||||||
!DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1
|
|
||||||
|
|
||||||
if (isize < 2) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
if (iradix == -1) then ! Sort Positive and negative
|
|
||||||
|
|
||||||
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to allocate arrays'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
i1=1_$int_type
|
|
||||||
i2=1_$int_type
|
|
||||||
do i=1_$int_type,isize
|
|
||||||
if (x(i) < 0_$type) then
|
|
||||||
iorder1(i1) = iorder(i)
|
|
||||||
x1(i1) = -x(i)
|
|
||||||
i1 = i1+1_$int_type
|
|
||||||
else
|
|
||||||
iorder2(i2) = iorder(i)
|
|
||||||
x2(i2) = x(i)
|
|
||||||
i2 = i2+1_$int_type
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
i1=i1-1_$int_type
|
|
||||||
i2=i2-1_$int_type
|
|
||||||
|
|
||||||
do i=1_$int_type,i2
|
|
||||||
iorder(i1+i) = iorder2(i)
|
|
||||||
x(i1+i) = x2(i)
|
|
||||||
enddo
|
|
||||||
deallocate(x2,iorder2,stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to deallocate arrays x2, iorder2'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
if (i1 > 1_$int_type) then
|
|
||||||
call $Xradix_sort$big(x1,iorder1,i1,-2)
|
|
||||||
do i=1_$int_type,i1
|
|
||||||
x(i) = -x1(1_$int_type+i1-i)
|
|
||||||
iorder(i) = iorder1(1_$int_type+i1-i)
|
|
||||||
enddo
|
|
||||||
endif
|
|
||||||
|
|
||||||
if (i2>1_$int_type) then
|
|
||||||
call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
|
|
||||||
endif
|
|
||||||
|
|
||||||
deallocate(x1,iorder1,stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
return
|
|
||||||
|
|
||||||
else if (iradix == -2) then ! Positive
|
|
||||||
|
|
||||||
! Find most significant bit
|
|
||||||
|
|
||||||
i0 = 0_$int_type
|
|
||||||
i4 = maxval(x)
|
|
||||||
|
|
||||||
iradix_new = max($integer_size-1-leadz(i4),1)
|
|
||||||
mask = ibset(0_$type,iradix_new)
|
|
||||||
|
|
||||||
allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to allocate arrays'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
i1=1_$int_type
|
|
||||||
i2=1_$int_type
|
|
||||||
|
|
||||||
do i=1_$int_type,isize
|
|
||||||
if (iand(mask,x(i)) == 0_$type) then
|
|
||||||
iorder1(i1) = iorder(i)
|
|
||||||
x1(i1) = x(i)
|
|
||||||
i1 = i1+1_$int_type
|
|
||||||
else
|
|
||||||
iorder2(i2) = iorder(i)
|
|
||||||
x2(i2) = x(i)
|
|
||||||
i2 = i2+1_$int_type
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
i1=i1-1_$int_type
|
|
||||||
i2=i2-1_$int_type
|
|
||||||
|
|
||||||
do i=1_$int_type,i1
|
|
||||||
iorder(i0+i) = iorder1(i)
|
|
||||||
x(i0+i) = x1(i)
|
|
||||||
enddo
|
|
||||||
i0 = i0+i1
|
|
||||||
i3 = i0
|
|
||||||
deallocate(x1,iorder1,stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to deallocate arrays x1, iorder1'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
do i=1_$int_type,i2
|
|
||||||
iorder(i0+i) = iorder2(i)
|
|
||||||
x(i0+i) = x2(i)
|
|
||||||
enddo
|
|
||||||
i0 = i0+i2
|
|
||||||
deallocate(x2,iorder2,stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to deallocate arrays x2, iorder2'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
if (i3>1_$int_type) then
|
|
||||||
call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
|
|
||||||
endif
|
|
||||||
|
|
||||||
if (isize-i3>1_$int_type) then
|
|
||||||
call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
|
|
||||||
endif
|
|
||||||
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
ASSERT (iradix >= 0)
|
|
||||||
|
|
||||||
if (isize < 48) then
|
|
||||||
call insertion_$Xsort$big(x,iorder,isize)
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
allocate(x2(isize),iorder2(isize),stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to allocate arrays x1, iorder1'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
mask = ibset(0_$type,iradix)
|
|
||||||
i0=1_$int_type
|
|
||||||
i1=1_$int_type
|
|
||||||
|
|
||||||
do i=1_$int_type,isize
|
|
||||||
if (iand(mask,x(i)) == 0_$type) then
|
|
||||||
iorder(i0) = iorder(i)
|
|
||||||
x(i0) = x(i)
|
|
||||||
i0 = i0+1_$int_type
|
|
||||||
else
|
|
||||||
iorder2(i1) = iorder(i)
|
|
||||||
x2(i1) = x(i)
|
|
||||||
i1 = i1+1_$int_type
|
|
||||||
endif
|
|
||||||
enddo
|
|
||||||
i0=i0-1_$int_type
|
|
||||||
i1=i1-1_$int_type
|
|
||||||
|
|
||||||
do i=1_$int_type,i1
|
|
||||||
iorder(i0+i) = iorder2(i)
|
|
||||||
x(i0+i) = x2(i)
|
|
||||||
enddo
|
|
||||||
|
|
||||||
deallocate(x2,iorder2,stat=err)
|
|
||||||
if (err /= 0) then
|
|
||||||
print *, irp_here, ': Unable to allocate arrays x2, iorder2'
|
|
||||||
stop
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
if (iradix == 0) then
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
if (i1>1_$int_type) then
|
|
||||||
call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
|
|
||||||
endif
|
|
||||||
if (i0>1) then
|
|
||||||
call $Xradix_sort$big(x,iorder,i0,iradix-1)
|
|
||||||
endif
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
SUBST [ X, type, integer_size, is_big, big, int_type ]
|
|
||||||
i ; 4 ; 32 ; .False. ; ; 4 ;;
|
|
||||||
i8 ; 8 ; 64 ; .False. ; ; 4 ;;
|
|
||||||
i2 ; 2 ; 16 ; .False. ; ; 4 ;;
|
|
||||||
i ; 4 ; 32 ; .True. ; _big ; 8 ;;
|
|
||||||
i8 ; 8 ; 64 ; .True. ; _big ; 8 ;;
|
|
||||||
END_TEMPLATE
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
22
src/utils/units.irp.f
Normal file
22
src/utils/units.irp.f
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
BEGIN_PROVIDER [double precision, ha_to_ev]
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Converstion from Hartree to eV
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
ha_to_ev = 27.211396641308d0
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [double precision, au_to_D]
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
BEGIN_DOC
|
||||||
|
! Converstion from au to Debye
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
au_to_D = 2.5415802529d0
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
@ -37,6 +37,10 @@ double precision function binom_func(i,j)
|
|||||||
else
|
else
|
||||||
binom_func = dexp( logfact(i)-logfact(j)-logfact(i-j) )
|
binom_func = dexp( logfact(i)-logfact(j)-logfact(i-j) )
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
! To avoid .999999 numbers
|
||||||
|
binom_func = floor(binom_func + 0.5d0)
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -132,7 +136,7 @@ double precision function logfact(n)
|
|||||||
enddo
|
enddo
|
||||||
end function
|
end function
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
BEGIN_PROVIDER [ double precision, fact_inv, (128) ]
|
BEGIN_PROVIDER [ double precision, fact_inv, (128) ]
|
||||||
implicit none
|
implicit none
|
||||||
@ -146,6 +150,29 @@ BEGIN_PROVIDER [ double precision, fact_inv, (128) ]
|
|||||||
enddo
|
enddo
|
||||||
END_PROVIDER
|
END_PROVIDER
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
BEGIN_PROVIDER [ double precision, shiftfact_op5_inv, (128) ]
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
!
|
||||||
|
! 1 / Gamma(n + 0.5)
|
||||||
|
!
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
integer :: i
|
||||||
|
double precision :: tmp
|
||||||
|
|
||||||
|
do i = 1, size(shiftfact_op5_inv)
|
||||||
|
!tmp = dgamma(dble(i) + 0.5d0)
|
||||||
|
tmp = gamma(dble(i) + 0.5d0)
|
||||||
|
shiftfact_op5_inv(i) = 1.d0 / tmp
|
||||||
|
enddo
|
||||||
|
|
||||||
|
END_PROVIDER
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
double precision function dble_fact(n)
|
double precision function dble_fact(n)
|
||||||
implicit none
|
implicit none
|
||||||
@ -300,12 +327,12 @@ subroutine wall_time(t)
|
|||||||
end
|
end
|
||||||
|
|
||||||
BEGIN_PROVIDER [ integer, nproc ]
|
BEGIN_PROVIDER [ integer, nproc ]
|
||||||
use omp_lib
|
|
||||||
implicit none
|
implicit none
|
||||||
BEGIN_DOC
|
BEGIN_DOC
|
||||||
! Number of current OpenMP threads
|
! Number of current OpenMP threads
|
||||||
END_DOC
|
END_DOC
|
||||||
|
|
||||||
|
integer, external :: omp_get_num_threads
|
||||||
nproc = 1
|
nproc = 1
|
||||||
!$OMP PARALLEL
|
!$OMP PARALLEL
|
||||||
!$OMP MASTER
|
!$OMP MASTER
|
||||||
@ -407,3 +434,28 @@ subroutine lowercase(txt,n)
|
|||||||
enddo
|
enddo
|
||||||
end
|
end
|
||||||
|
|
||||||
|
subroutine v2_over_x(v,x,res)
|
||||||
|
|
||||||
|
!BEGIN_DOC
|
||||||
|
! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0
|
||||||
|
!END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
double precision, intent(in) :: v, x
|
||||||
|
double precision, intent(out) :: res
|
||||||
|
|
||||||
|
double precision :: delta_E, tmp, val
|
||||||
|
|
||||||
|
res = 0d0
|
||||||
|
delta_E = x
|
||||||
|
if (v == 0.d0) return
|
||||||
|
|
||||||
|
val = 2d0 * v
|
||||||
|
tmp = dsqrt(delta_E * delta_E + val * val)
|
||||||
|
if (delta_E < 0.d0) then
|
||||||
|
tmp = -tmp
|
||||||
|
endif
|
||||||
|
res = 0.5d0 * (tmp - delta_E)
|
||||||
|
|
||||||
|
end
|
||||||
|
89
src/utils_trust_region/EZFIO.cfg
Normal file
89
src/utils_trust_region/EZFIO.cfg
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
[thresh_delta]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-10
|
||||||
|
|
||||||
|
[thresh_rho]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho)
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 0.1
|
||||||
|
|
||||||
|
[thresh_eig]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-12
|
||||||
|
|
||||||
|
[thresh_model]
|
||||||
|
type: double precision
|
||||||
|
doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-12
|
||||||
|
|
||||||
|
[absolute_eig]
|
||||||
|
type: logical
|
||||||
|
doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region)
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: false
|
||||||
|
|
||||||
|
[thresh_wtg]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-6
|
||||||
|
|
||||||
|
[thresh_wtg2]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-6
|
||||||
|
|
||||||
|
[avoid_saddle]
|
||||||
|
type: logical
|
||||||
|
doc: Test to avoid saddle point, active if true
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: false
|
||||||
|
|
||||||
|
[version_avoid_saddle]
|
||||||
|
type: integer
|
||||||
|
doc: cf. trust region, not stable
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 3
|
||||||
|
|
||||||
|
[thresh_rho_2]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 0.1
|
||||||
|
|
||||||
|
[thresh_cc]
|
||||||
|
type: double precision
|
||||||
|
doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc)
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-6
|
||||||
|
|
||||||
|
[thresh_model_2]
|
||||||
|
type: double precision
|
||||||
|
doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 1.e-12
|
||||||
|
|
||||||
|
[version_lambda_search]
|
||||||
|
type: integer
|
||||||
|
doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 2
|
||||||
|
|
||||||
|
[nb_it_max_lambda]
|
||||||
|
type: integer
|
||||||
|
doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 100
|
||||||
|
|
||||||
|
[nb_it_max_pre_search]
|
||||||
|
type: integer
|
||||||
|
doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm
|
||||||
|
interface: ezfio,provider,ocaml
|
||||||
|
default: 40
|
1
src/utils_trust_region/NEED
Normal file
1
src/utils_trust_region/NEED
Normal file
@ -0,0 +1 @@
|
|||||||
|
hartree_fock
|
5
src/utils_trust_region/README.rst
Normal file
5
src/utils_trust_region/README.rst
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
============
|
||||||
|
trust_region
|
||||||
|
============
|
||||||
|
|
||||||
|
The documentation can be found in the org files.
|
7
src/utils_trust_region/TANGLE_org_mode.sh
Executable file
7
src/utils_trust_region/TANGLE_org_mode.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
list='ls *.org'
|
||||||
|
for element in $list
|
||||||
|
do
|
||||||
|
emacs --batch $element -f org-babel-tangle
|
||||||
|
done
|
248
src/utils_trust_region/algo_trust.irp.f
Normal file
248
src/utils_trust_region/algo_trust.irp.f
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
! Algorithm for the trust region
|
||||||
|
|
||||||
|
! step_in_trust_region:
|
||||||
|
! Computes the step in the trust region (delta)
|
||||||
|
! (automatically sets at the iteration 0 and which evolves during the
|
||||||
|
! process in function of the evolution of rho). The step is computing by
|
||||||
|
! constraining its norm with a lagrange multiplier.
|
||||||
|
! Since the calculation of the step is based on the Newton method, an
|
||||||
|
! estimation of the gain in energy is given using the Taylors series
|
||||||
|
! truncated at the second order (criterion_model).
|
||||||
|
! If (DABS(criterion-criterion_model) < 1d-12) then
|
||||||
|
! must_exit = .True.
|
||||||
|
! else
|
||||||
|
! must_exit = .False.
|
||||||
|
|
||||||
|
! This estimation of the gain in energy is used by
|
||||||
|
! is_step_cancel_trust_region to say if the step is accepted or cancelled.
|
||||||
|
|
||||||
|
! If the step must be cancelled, the calculation restart from the same
|
||||||
|
! hessian and gradient and recomputes the step but in a smaller trust
|
||||||
|
! region and so on until the step is accepted. If the step is accepted
|
||||||
|
! the hessian and the gradient are recomputed to produce a new step.
|
||||||
|
|
||||||
|
! Example:
|
||||||
|
|
||||||
|
|
||||||
|
! !### Initialization ###
|
||||||
|
! delta = 0d0
|
||||||
|
! nb_iter = 0 ! Must start at 0 !!!
|
||||||
|
! rho = 0.5d0
|
||||||
|
! not_converged = .True.
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! ! Compute the criterion before the loop
|
||||||
|
! call #your_criterion(prev_criterion)
|
||||||
|
!
|
||||||
|
! do while (not_converged)
|
||||||
|
! ! ### TODO ##
|
||||||
|
! ! Call your gradient
|
||||||
|
! ! Call you hessian
|
||||||
|
! call #your_gradient(v_grad) (1D array)
|
||||||
|
! call #your_hessian(H) (2D array)
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! ! Diagonalization of the hessian
|
||||||
|
! call diagonalization_hessian(n,H,e_val,w)
|
||||||
|
!
|
||||||
|
! cancel_step = .True. ! To enter in the loop just after
|
||||||
|
! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
|
||||||
|
! do while (cancel_step)
|
||||||
|
!
|
||||||
|
! ! Hessian,gradient,Criterion -> x
|
||||||
|
! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
|
||||||
|
!
|
||||||
|
! if (must_exit) then
|
||||||
|
! ! ### Message ###
|
||||||
|
! ! if step_in_trust_region sets must_exit on true for numerical reasons
|
||||||
|
! print*,'algo_trust1 sends the message : Exit'
|
||||||
|
! !### exit ###
|
||||||
|
! endif
|
||||||
|
!
|
||||||
|
! !### TODO ###
|
||||||
|
! ! Compute x -> m_x
|
||||||
|
! ! Compute m_x -> R
|
||||||
|
! ! Apply R and keep the previous MOs...
|
||||||
|
! ! Update/touch
|
||||||
|
! ! Compute the new criterion/energy -> criterion
|
||||||
|
!
|
||||||
|
! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
|
||||||
|
! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
|
||||||
|
! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
|
||||||
|
!
|
||||||
|
! TOUCH #your_variables
|
||||||
|
!
|
||||||
|
! call #your_criterion(criterion)
|
||||||
|
!
|
||||||
|
! ! Criterion -> step accepted or rejected
|
||||||
|
! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! !if (cancel_step) then
|
||||||
|
! ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
|
||||||
|
! !endif
|
||||||
|
! #if (cancel_step) then
|
||||||
|
! #mo_coef = prev_mos
|
||||||
|
! #endif
|
||||||
|
!
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
|
! !call save_mos() !### depend of the time for 1 iteration
|
||||||
|
!
|
||||||
|
! ! To exit the external loop if must_exit = .True.
|
||||||
|
! if (must_exit) then
|
||||||
|
! !### exit ###
|
||||||
|
! endif
|
||||||
|
!
|
||||||
|
! ! Step accepted, nb iteration + 1
|
||||||
|
! nb_iter = nb_iter + 1
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! !if (###Conditions###) then
|
||||||
|
! ! no_converged = .False.
|
||||||
|
! !endif
|
||||||
|
! #if (#your_conditions) then
|
||||||
|
! # not_converged = .False.
|
||||||
|
! #endif
|
||||||
|
!
|
||||||
|
! enddo
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! Variables:
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | n | integer | m*(m-1)/2 |
|
||||||
|
! | m | integer | number of mo in the mo_class |
|
||||||
|
! | H(n,n) | double precision | Hessian |
|
||||||
|
! | v_grad(n) | double precision | Gradient |
|
||||||
|
! | W(n,n) | double precision | Eigenvectors of the hessian |
|
||||||
|
! | e_val(n) | double precision | Eigenvalues of the hessian |
|
||||||
|
! | criterion | double precision | Actual criterion |
|
||||||
|
! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
|
||||||
|
! | rho | double precision | Given by is_step_cancel_trus_region |
|
||||||
|
! | | | Agreement between the real function and the Taylor series (2nd order) |
|
||||||
|
! | nb_iter | integer | Actual number of iterations |
|
||||||
|
|
||||||
|
! Input/output:
|
||||||
|
! | delta | double precision | Radius of the trust region |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | criterion_model | double precision | Predicted criterion after the rotation |
|
||||||
|
! | x(n) | double precision | Step |
|
||||||
|
! | must_exit | logical | If the program must exit the loop |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the step and the expected criterion/energy after the step
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n, nb_iter
|
||||||
|
double precision, intent(in) :: H(n,n), W(n,n), v_grad(n)
|
||||||
|
double precision, intent(in) :: rho, prev_criterion
|
||||||
|
|
||||||
|
! inout
|
||||||
|
double precision, intent(inout) :: delta, e_val(n)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: criterion_model, x(n)
|
||||||
|
logical, intent(out) :: must_exit
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: info
|
||||||
|
|
||||||
|
must_exit = .False.
|
||||||
|
|
||||||
|
call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
|
||||||
|
|
||||||
|
call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
|
||||||
|
|
||||||
|
! exit if DABS(prev_criterion - criterion_model) < 1d-12
|
||||||
|
if (DABS(prev_criterion - criterion_model) < thresh_model) then
|
||||||
|
print*,''
|
||||||
|
print*,'###############################################################################'
|
||||||
|
print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
|
||||||
|
print*,'###############################################################################'
|
||||||
|
print*,''
|
||||||
|
must_exit = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (delta < thresh_delta) then
|
||||||
|
print*,''
|
||||||
|
print*,'##############################################'
|
||||||
|
print*,'Delta <', thresh_delta, 'stop the trust region'
|
||||||
|
print*,'##############################################'
|
||||||
|
print*,''
|
||||||
|
must_exit = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Add after the call to this subroutine, a statement:
|
||||||
|
! "if (must_exit) then
|
||||||
|
! exit
|
||||||
|
! endif"
|
||||||
|
! in order to exit the optimization loop
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! Variables:
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | nb_iter | integer | actual number of iterations |
|
||||||
|
! | prev_criterion | double precision | criterion before the application of the step x |
|
||||||
|
! | criterion | double precision | criterion after the application of the step x |
|
||||||
|
! | criterion_model | double precision | predicted criterion after the application of x |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | rho | double precision | Agreement between the predicted criterion and the real new criterion |
|
||||||
|
! | cancel_step | logical | If the step must be cancelled |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute if the step should be cancelled
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
double precision, intent(in) :: prev_criterion, criterion, criterion_model
|
||||||
|
|
||||||
|
! inout
|
||||||
|
integer, intent(inout) :: nb_iter
|
||||||
|
|
||||||
|
! out
|
||||||
|
logical, intent(out) :: cancel_step
|
||||||
|
double precision, intent(out) :: rho
|
||||||
|
|
||||||
|
! Computes rho
|
||||||
|
call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
|
||||||
|
|
||||||
|
if (nb_iter == 0) then
|
||||||
|
nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled
|
||||||
|
endif
|
||||||
|
|
||||||
|
! If rho < thresh_rho -> give something in output to cancel the step
|
||||||
|
if (rho >= thresh_rho) then !0.1d0) then
|
||||||
|
! The step is accepted
|
||||||
|
cancel_step = .False.
|
||||||
|
else
|
||||||
|
! The step is rejected
|
||||||
|
cancel_step = .True.
|
||||||
|
print*, '***********************'
|
||||||
|
print*, 'Step cancel : rho <', thresh_rho
|
||||||
|
print*, '***********************'
|
||||||
|
endif
|
||||||
|
|
||||||
|
end subroutine
|
593
src/utils_trust_region/algo_trust.org
Normal file
593
src/utils_trust_region/algo_trust.org
Normal file
@ -0,0 +1,593 @@
|
|||||||
|
* Algorithm for the trust region
|
||||||
|
|
||||||
|
step_in_trust_region:
|
||||||
|
Computes the step in the trust region (delta)
|
||||||
|
(automatically sets at the iteration 0 and which evolves during the
|
||||||
|
process in function of the evolution of rho). The step is computing by
|
||||||
|
constraining its norm with a lagrange multiplier.
|
||||||
|
Since the calculation of the step is based on the Newton method, an
|
||||||
|
estimation of the gain in energy is given using the Taylors series
|
||||||
|
truncated at the second order (criterion_model).
|
||||||
|
If (DABS(criterion-criterion_model) < 1d-12) then
|
||||||
|
must_exit = .True.
|
||||||
|
else
|
||||||
|
must_exit = .False.
|
||||||
|
|
||||||
|
This estimation of the gain in energy is used by
|
||||||
|
is_step_cancel_trust_region to say if the step is accepted or cancelled.
|
||||||
|
|
||||||
|
If the step must be cancelled, the calculation restart from the same
|
||||||
|
hessian and gradient and recomputes the step but in a smaller trust
|
||||||
|
region and so on until the step is accepted. If the step is accepted
|
||||||
|
the hessian and the gradient are recomputed to produce a new step.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
|
||||||
|
! !### Initialization ###
|
||||||
|
! delta = 0d0
|
||||||
|
! nb_iter = 0 ! Must start at 0 !!!
|
||||||
|
! rho = 0.5d0
|
||||||
|
! not_converged = .True.
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! ! Compute the criterion before the loop
|
||||||
|
! call #your_criterion(prev_criterion)
|
||||||
|
!
|
||||||
|
! do while (not_converged)
|
||||||
|
! ! ### TODO ##
|
||||||
|
! ! Call your gradient
|
||||||
|
! ! Call you hessian
|
||||||
|
! call #your_gradient(v_grad) (1D array)
|
||||||
|
! call #your_hessian(H) (2D array)
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! ! Diagonalization of the hessian
|
||||||
|
! call diagonalization_hessian(n,H,e_val,w)
|
||||||
|
!
|
||||||
|
! cancel_step = .True. ! To enter in the loop just after
|
||||||
|
! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
|
||||||
|
! do while (cancel_step)
|
||||||
|
!
|
||||||
|
! ! Hessian,gradient,Criterion -> x
|
||||||
|
! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit)
|
||||||
|
!
|
||||||
|
! if (must_exit) then
|
||||||
|
! ! ### Message ###
|
||||||
|
! ! if step_in_trust_region sets must_exit on true for numerical reasons
|
||||||
|
! print*,'algo_trust1 sends the message : Exit'
|
||||||
|
! !### exit ###
|
||||||
|
! endif
|
||||||
|
!
|
||||||
|
! !### TODO ###
|
||||||
|
! ! Compute x -> m_x
|
||||||
|
! ! Compute m_x -> R
|
||||||
|
! ! Apply R and keep the previous MOs...
|
||||||
|
! ! Update/touch
|
||||||
|
! ! Compute the new criterion/energy -> criterion
|
||||||
|
!
|
||||||
|
! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x)
|
||||||
|
! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R)
|
||||||
|
! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos)
|
||||||
|
!
|
||||||
|
! TOUCH #your_variables
|
||||||
|
!
|
||||||
|
! call #your_criterion(criterion)
|
||||||
|
!
|
||||||
|
! ! Criterion -> step accepted or rejected
|
||||||
|
! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! !if (cancel_step) then
|
||||||
|
! ! Cancel the previous step (mo_coef = prev_mos if you keep them...)
|
||||||
|
! !endif
|
||||||
|
! #if (cancel_step) then
|
||||||
|
! #mo_coef = prev_mos
|
||||||
|
! #endif
|
||||||
|
!
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
|
! !call save_mos() !### depend of the time for 1 iteration
|
||||||
|
!
|
||||||
|
! ! To exit the external loop if must_exit = .True.
|
||||||
|
! if (must_exit) then
|
||||||
|
! !### exit ###
|
||||||
|
! endif
|
||||||
|
!
|
||||||
|
! ! Step accepted, nb iteration + 1
|
||||||
|
! nb_iter = nb_iter + 1
|
||||||
|
!
|
||||||
|
! ! ### TODO ###
|
||||||
|
! !if (###Conditions###) then
|
||||||
|
! ! no_converged = .False.
|
||||||
|
! !endif
|
||||||
|
! #if (#your_conditions) then
|
||||||
|
! # not_converged = .False.
|
||||||
|
! #endif
|
||||||
|
!
|
||||||
|
! enddo
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
Variables:
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| n | integer | m*(m-1)/2 |
|
||||||
|
| m | integer | number of mo in the mo_class |
|
||||||
|
| H(n,n) | double precision | Hessian |
|
||||||
|
| v_grad(n) | double precision | Gradient |
|
||||||
|
| W(n,n) | double precision | Eigenvectors of the hessian |
|
||||||
|
| e_val(n) | double precision | Eigenvalues of the hessian |
|
||||||
|
| criterion | double precision | Actual criterion |
|
||||||
|
| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration |
|
||||||
|
| rho | double precision | Given by is_step_cancel_trus_region |
|
||||||
|
| | | Agreement between the real function and the Taylor series (2nd order) |
|
||||||
|
| nb_iter | integer | Actual number of iterations |
|
||||||
|
|
||||||
|
Input/output:
|
||||||
|
| delta | double precision | Radius of the trust region |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| criterion_model | double precision | Predicted criterion after the rotation |
|
||||||
|
| x(n) | double precision | Step |
|
||||||
|
| must_exit | logical | If the program must exit the loop |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
|
||||||
|
subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the step and the expected criterion/energy after the step
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n, nb_iter
|
||||||
|
double precision, intent(in) :: H(n,n), W(n,n), v_grad(n)
|
||||||
|
double precision, intent(in) :: rho, prev_criterion
|
||||||
|
|
||||||
|
! inout
|
||||||
|
double precision, intent(inout) :: delta, e_val(n)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: criterion_model, x(n)
|
||||||
|
logical, intent(out) :: must_exit
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: info
|
||||||
|
|
||||||
|
must_exit = .False.
|
||||||
|
|
||||||
|
call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta)
|
||||||
|
|
||||||
|
call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model)
|
||||||
|
|
||||||
|
! exit if DABS(prev_criterion - criterion_model) < 1d-12
|
||||||
|
if (DABS(prev_criterion - criterion_model) < thresh_model) then
|
||||||
|
print*,''
|
||||||
|
print*,'###############################################################################'
|
||||||
|
print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region'
|
||||||
|
print*,'###############################################################################'
|
||||||
|
print*,''
|
||||||
|
must_exit = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (delta < thresh_delta) then
|
||||||
|
print*,''
|
||||||
|
print*,'##############################################'
|
||||||
|
print*,'Delta <', thresh_delta, 'stop the trust region'
|
||||||
|
print*,'##############################################'
|
||||||
|
print*,''
|
||||||
|
must_exit = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Add after the call to this subroutine, a statement:
|
||||||
|
! "if (must_exit) then
|
||||||
|
! exit
|
||||||
|
! endif"
|
||||||
|
! in order to exit the optimization loop
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
Variables:
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| nb_iter | integer | actual number of iterations |
|
||||||
|
| prev_criterion | double precision | criterion before the application of the step x |
|
||||||
|
| criterion | double precision | criterion after the application of the step x |
|
||||||
|
| criterion_model | double precision | predicted criterion after the application of x |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| rho | double precision | Agreement between the predicted criterion and the real new criterion |
|
||||||
|
| cancel_step | logical | If the step must be cancelled |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f
|
||||||
|
subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute if the step should be cancelled
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
double precision, intent(in) :: prev_criterion, criterion, criterion_model
|
||||||
|
|
||||||
|
! inout
|
||||||
|
integer, intent(inout) :: nb_iter
|
||||||
|
|
||||||
|
! out
|
||||||
|
logical, intent(out) :: cancel_step
|
||||||
|
double precision, intent(out) :: rho
|
||||||
|
|
||||||
|
! Computes rho
|
||||||
|
call trust_region_rho(prev_criterion,criterion,criterion_model,rho)
|
||||||
|
|
||||||
|
if (nb_iter == 0) then
|
||||||
|
nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled
|
||||||
|
endif
|
||||||
|
|
||||||
|
! If rho < thresh_rho -> give something in output to cancel the step
|
||||||
|
if (rho >= thresh_rho) then !0.1d0) then
|
||||||
|
! The step is accepted
|
||||||
|
cancel_step = .False.
|
||||||
|
else
|
||||||
|
! The step is rejected
|
||||||
|
cancel_step = .True.
|
||||||
|
print*, '***********************'
|
||||||
|
print*, 'Step cancel : rho <', thresh_rho
|
||||||
|
print*, '***********************'
|
||||||
|
endif
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Template for MOs
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt
|
||||||
|
subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! In
|
||||||
|
integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size)
|
||||||
|
|
||||||
|
! Out
|
||||||
|
! Rien ou un truc pour savoir si ça c'est bien passé
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:)
|
||||||
|
double precision, allocatable :: prev_mos(:,:)
|
||||||
|
double precision :: criterion, prev_criterion, criterion_model
|
||||||
|
double precision :: delta, rho
|
||||||
|
logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation
|
||||||
|
integer :: nb_iter, info, nb_sub_iter
|
||||||
|
integer :: i,j,tmp_i,tmp_j
|
||||||
|
|
||||||
|
allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size))
|
||||||
|
allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num))
|
||||||
|
allocate(prev_mos(ao_num, mo_num))
|
||||||
|
|
||||||
|
! Provide the criterion, but unnecessary because it's done
|
||||||
|
! automatically
|
||||||
|
PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
|
||||||
|
|
||||||
|
! Initialization
|
||||||
|
delta = 0d0
|
||||||
|
nb_iter = 0 ! Must start at 0 !!!
|
||||||
|
rho = 0.5d0 ! Must start at 0.5
|
||||||
|
not_converged = .True. ! Must be true
|
||||||
|
|
||||||
|
! Compute the criterion before the loop
|
||||||
|
prev_criterion = C_PROVIDER
|
||||||
|
|
||||||
|
do while (not_converged)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'******************'
|
||||||
|
print*,'Iteration', nb_iter
|
||||||
|
print*,'******************'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
! The new hessian and gradient are computed at the end of the previous iteration
|
||||||
|
! Diagonalization of the hessian
|
||||||
|
call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
|
||||||
|
|
||||||
|
cancel_step = .True. ! To enter in the loop just after
|
||||||
|
nb_sub_iter = 0
|
||||||
|
|
||||||
|
! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
|
||||||
|
do while (cancel_step)
|
||||||
|
|
||||||
|
print*,'-----------------------------'
|
||||||
|
print*,'Iteration:', nb_iter
|
||||||
|
print*,'Sub iteration:', nb_sub_iter
|
||||||
|
print*,'-----------------------------'
|
||||||
|
|
||||||
|
! Hessian,gradient,Criterion -> x
|
||||||
|
call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
|
||||||
|
prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
|
||||||
|
|
||||||
|
if (must_exit) then
|
||||||
|
! if step_in_trust_region sets must_exit on true for numerical reasons
|
||||||
|
print*,'trust_region_step_w_expected_e sent the message : Exit'
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
! 1D tmp -> 2D tmp
|
||||||
|
call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x)
|
||||||
|
|
||||||
|
! Rotation submatrix (square matrix tmp_list_size by tmp_list_size)
|
||||||
|
call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation)
|
||||||
|
|
||||||
|
if (enforce_step_cancellation) then
|
||||||
|
print*, 'Forces the step cancellation, too large error in the rotation matrix'
|
||||||
|
rho = 0d0
|
||||||
|
cycle
|
||||||
|
endif
|
||||||
|
|
||||||
|
! tmp_R to R, subspace to full space
|
||||||
|
call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R)
|
||||||
|
|
||||||
|
! Rotation of the MOs
|
||||||
|
call apply_mo_rotation(R, prev_mos)
|
||||||
|
|
||||||
|
! touch mo_coef
|
||||||
|
call clear_mo_map ! Only if you are using the bi-electronic integrals
|
||||||
|
! mo_coef becomes valid
|
||||||
|
! And avoid the recomputation of the providers which depend of mo_coef
|
||||||
|
TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
|
||||||
|
|
||||||
|
! To update the other parameters if needed
|
||||||
|
call #update_parameters()
|
||||||
|
|
||||||
|
! To enforce the program to provide new criterion after the update
|
||||||
|
! of the parameters
|
||||||
|
FREE C_PROVIDER
|
||||||
|
PROVIDE C_PROVIDER
|
||||||
|
criterion = C_PROVIDER
|
||||||
|
|
||||||
|
! Criterion -> step accepted or rejected
|
||||||
|
call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
|
||||||
|
|
||||||
|
! Cancellation of the step ?
|
||||||
|
if (cancel_step) then
|
||||||
|
! Replacement by the previous MOs
|
||||||
|
mo_coef = prev_mos
|
||||||
|
! call save_mos() ! depends of the time for 1 iteration
|
||||||
|
|
||||||
|
! No need to clear_mo_map since we don't recompute the gradient and the hessian
|
||||||
|
! mo_coef becomes valid
|
||||||
|
! Avoid the recomputation of the providers which depend of mo_coef
|
||||||
|
TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
|
||||||
|
else
|
||||||
|
! The step is accepted:
|
||||||
|
! criterion -> prev criterion
|
||||||
|
|
||||||
|
! The replacement "criterion -> prev criterion" is already done
|
||||||
|
! in trust_region_rho, so if the criterion does not have a reason
|
||||||
|
! to change, it will change nothing for the criterion and will
|
||||||
|
! force the program to provide the new hessian, gradient and
|
||||||
|
! convergence criterion for the next iteration.
|
||||||
|
! But in the case of orbital optimization we diagonalize the CI
|
||||||
|
! matrix after the "FREE" statement, so the criterion will change
|
||||||
|
|
||||||
|
FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
|
||||||
|
PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
|
||||||
|
prev_criterion = C_PROVIDER
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
nb_sub_iter = nb_sub_iter + 1
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! call save_mos() ! depends of the time for 1 iteration
|
||||||
|
|
||||||
|
! To exit the external loop if must_exit = .True.
|
||||||
|
if (must_exit) then
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Step accepted, nb iteration + 1
|
||||||
|
nb_iter = nb_iter + 1
|
||||||
|
|
||||||
|
! Provide the convergence criterion
|
||||||
|
! Provide the gradient and the hessian for the next iteration
|
||||||
|
PROVIDE cc_PROVIDER
|
||||||
|
|
||||||
|
! To exit
|
||||||
|
if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (nb_iter > optimization_max_nb_iter) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (delta < thresh_delta) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Save the final MOs
|
||||||
|
call save_mos()
|
||||||
|
|
||||||
|
! Diagonalization of the hessian
|
||||||
|
! (To see the eigenvalues at the end of the optimization)
|
||||||
|
call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
|
||||||
|
|
||||||
|
deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos)
|
||||||
|
|
||||||
|
end
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Cartesian version
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt
|
||||||
|
subroutine algo_trust_cartesian_template(tmp_n)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! In
|
||||||
|
integer, intent(in) :: tmp_n
|
||||||
|
|
||||||
|
! Out
|
||||||
|
! Rien ou un truc pour savoir si ça c'est bien passé
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision, allocatable :: e_val(:), W(:,:), tmp_x(:)
|
||||||
|
double precision :: criterion, prev_criterion, criterion_model
|
||||||
|
double precision :: delta, rho
|
||||||
|
logical :: not_converged, cancel_step, must_exit
|
||||||
|
integer :: nb_iter, nb_sub_iter
|
||||||
|
integer :: i,j
|
||||||
|
|
||||||
|
allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n))
|
||||||
|
|
||||||
|
PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER
|
||||||
|
|
||||||
|
! Initialization
|
||||||
|
delta = 0d0
|
||||||
|
nb_iter = 0 ! Must start at 0 !!!
|
||||||
|
rho = 0.5d0 ! Must start at 0.5
|
||||||
|
not_converged = .True. ! Must be true
|
||||||
|
|
||||||
|
! Compute the criterion before the loop
|
||||||
|
prev_criterion = C_PROVIDER
|
||||||
|
|
||||||
|
do while (not_converged)
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'******************'
|
||||||
|
print*,'Iteration', nb_iter
|
||||||
|
print*,'******************'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
if (nb_iter > 0) then
|
||||||
|
PROVIDE H_PROVIDER g_PROVIDER
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Diagonalization of the hessian
|
||||||
|
call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W)
|
||||||
|
|
||||||
|
cancel_step = .True. ! To enter in the loop just after
|
||||||
|
nb_sub_iter = 0
|
||||||
|
|
||||||
|
! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho
|
||||||
|
do while (cancel_step)
|
||||||
|
|
||||||
|
print*,'-----------------------------'
|
||||||
|
print*,'Iteration:', nb_iter
|
||||||
|
print*,'Sub iteration:', nb_sub_iter
|
||||||
|
print*,'-----------------------------'
|
||||||
|
|
||||||
|
! Hessian,gradient,Criterion -> x
|
||||||
|
call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, &
|
||||||
|
prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit)
|
||||||
|
|
||||||
|
if (must_exit) then
|
||||||
|
! if step_in_trust_region sets must_exit on true for numerical reasons
|
||||||
|
print*,'trust_region_step_w_expected_e sent the message : Exit'
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
! New coordinates, check the sign
|
||||||
|
X_PROVIDER = X_PROVIDER - tmp_x
|
||||||
|
|
||||||
|
! touch X_PROVIDER
|
||||||
|
TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER
|
||||||
|
|
||||||
|
! To update the other parameters if needed
|
||||||
|
call #update_parameters()
|
||||||
|
|
||||||
|
! New criterion
|
||||||
|
PROVIDE C_PROVIDER ! Unnecessary
|
||||||
|
criterion = C_PROVIDER
|
||||||
|
|
||||||
|
! Criterion -> step accepted or rejected
|
||||||
|
call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step)
|
||||||
|
|
||||||
|
! Cancel the previous step
|
||||||
|
if (cancel_step) then
|
||||||
|
! Replacement by the previous coordinates, check the sign
|
||||||
|
X_PROVIDER = X_PROVIDER + tmp_x
|
||||||
|
|
||||||
|
! Avoid the recomputation of the hessian and the gradient
|
||||||
|
TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER
|
||||||
|
endif
|
||||||
|
|
||||||
|
nb_sub_iter = nb_sub_iter + 1
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! To exit the external loop if must_exit = .True.
|
||||||
|
if (must_exit) then
|
||||||
|
exit
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Step accepted, nb iteration + 1
|
||||||
|
nb_iter = nb_iter + 1
|
||||||
|
|
||||||
|
PROVIDE cc_PROVIDER
|
||||||
|
|
||||||
|
! To exit
|
||||||
|
if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (nb_iter > optimization_max_nb_iter) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
if (delta < thresh_delta) then
|
||||||
|
not_converged = .False.
|
||||||
|
endif
|
||||||
|
|
||||||
|
enddo
|
||||||
|
|
||||||
|
deallocate(e_val, W, tmp_x)
|
||||||
|
|
||||||
|
end
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Script template
|
||||||
|
#+BEGIN_SRC bash :tangle script_template_mos.sh
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
your_file=
|
||||||
|
|
||||||
|
your_C_PROVIDER=
|
||||||
|
your_H_PROVIDER=
|
||||||
|
your_g_PROVIDER=
|
||||||
|
your_cc_PROVIDER=
|
||||||
|
|
||||||
|
sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file
|
||||||
|
sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
|
||||||
|
sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
|
||||||
|
sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
#+BEGIN_SRC bash :tangle script_template_xyz.sh
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
your_file=
|
||||||
|
|
||||||
|
your_C_PROVIDER=
|
||||||
|
your_X_PROVIDER=
|
||||||
|
your_H_PROVIDER=
|
||||||
|
your_g_PROVIDER=
|
||||||
|
your_cc_PROVIDER=
|
||||||
|
|
||||||
|
sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file
|
||||||
|
sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file
|
||||||
|
sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file
|
||||||
|
sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file
|
||||||
|
sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file
|
||||||
|
#+END_SRC
|
||||||
|
|
85
src/utils_trust_region/apply_mo_rotation.irp.f
Normal file
85
src/utils_trust_region/apply_mo_rotation.irp.f
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
! Apply MO rotation
|
||||||
|
! Subroutine to apply the rotation matrix to the coefficients of the
|
||||||
|
! MOs.
|
||||||
|
|
||||||
|
! New MOs = Old MOs . Rotation matrix
|
||||||
|
|
||||||
|
! *Compute the new MOs with the previous MOs and a rotation matrix*
|
||||||
|
|
||||||
|
! Provided:
|
||||||
|
! | mo_num | integer | number of MOs |
|
||||||
|
! | ao_num | integer | number of AOs |
|
||||||
|
! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
|
||||||
|
|
||||||
|
! Intent in:
|
||||||
|
! | R(mo_num,mo_num) | double precision | rotation matrix |
|
||||||
|
|
||||||
|
! Intent out:
|
||||||
|
! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
|
||||||
|
! | i,j | integer | indexes |
|
||||||
|
|
||||||
|
subroutine apply_mo_rotation(R,prev_mos)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the new MOs knowing the rotation matrix
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
double precision, intent(in) :: R(mo_num,mo_num)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: prev_mos(ao_num,mo_num)
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision, allocatable :: new_mos(:,:)
|
||||||
|
integer :: i,j
|
||||||
|
double precision :: t1,t2,t3
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---apply_mo_rotation---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(new_mos(ao_num,mo_num))
|
||||||
|
|
||||||
|
! Calculation
|
||||||
|
|
||||||
|
! Product of old MOs (mo_coef) by Rotation matrix (R)
|
||||||
|
call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
|
||||||
|
|
||||||
|
prev_mos = mo_coef
|
||||||
|
mo_coef = new_mos
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
! print*,'New mo_coef : '
|
||||||
|
! do i = 1, mo_num
|
||||||
|
! write(*,'(100(F10.5))') mo_coef(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Save the new MOs and change the label
|
||||||
|
mo_label = 'MCSCF'
|
||||||
|
!call save_mos
|
||||||
|
call ezfio_set_determinants_mo_label(mo_label)
|
||||||
|
|
||||||
|
!print*,'Done, MOs saved'
|
||||||
|
|
||||||
|
! Deallocation, end
|
||||||
|
deallocate(new_mos)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in apply mo rotation:', t3
|
||||||
|
print*,'---End apply_mo_rotation---'
|
||||||
|
|
||||||
|
end subroutine
|
86
src/utils_trust_region/apply_mo_rotation.org
Normal file
86
src/utils_trust_region/apply_mo_rotation.org
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
* Apply MO rotation
|
||||||
|
Subroutine to apply the rotation matrix to the coefficients of the
|
||||||
|
MOs.
|
||||||
|
|
||||||
|
New MOs = Old MOs . Rotation matrix
|
||||||
|
|
||||||
|
*Compute the new MOs with the previous MOs and a rotation matrix*
|
||||||
|
|
||||||
|
Provided:
|
||||||
|
| mo_num | integer | number of MOs |
|
||||||
|
| ao_num | integer | number of AOs |
|
||||||
|
| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs |
|
||||||
|
|
||||||
|
Intent in:
|
||||||
|
| R(mo_num,mo_num) | double precision | rotation matrix |
|
||||||
|
|
||||||
|
Intent out:
|
||||||
|
| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| new_mos(ao_num,mo_num) | double precision | MOs after the rotation |
|
||||||
|
| i,j | integer | indexes |
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f
|
||||||
|
subroutine apply_mo_rotation(R,prev_mos)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the new MOs knowing the rotation matrix
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
double precision, intent(in) :: R(mo_num,mo_num)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: prev_mos(ao_num,mo_num)
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision, allocatable :: new_mos(:,:)
|
||||||
|
integer :: i,j
|
||||||
|
double precision :: t1,t2,t3
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---apply_mo_rotation---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(new_mos(ao_num,mo_num))
|
||||||
|
|
||||||
|
! Calculation
|
||||||
|
|
||||||
|
! Product of old MOs (mo_coef) by Rotation matrix (R)
|
||||||
|
call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1))
|
||||||
|
|
||||||
|
prev_mos = mo_coef
|
||||||
|
mo_coef = new_mos
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
! print*,'New mo_coef : '
|
||||||
|
! do i = 1, mo_num
|
||||||
|
! write(*,'(100(F10.5))') mo_coef(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Save the new MOs and change the label
|
||||||
|
mo_label = 'MCSCF'
|
||||||
|
!call save_mos
|
||||||
|
call ezfio_set_determinants_mo_label(mo_label)
|
||||||
|
|
||||||
|
!print*,'Done, MOs saved'
|
||||||
|
|
||||||
|
! Deallocation, end
|
||||||
|
deallocate(new_mos)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in apply mo rotation:', t3
|
||||||
|
print*,'---End apply_mo_rotation---'
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
61
src/utils_trust_region/mat_to_vec_index.irp.f
Normal file
61
src/utils_trust_region/mat_to_vec_index.irp.f
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
! Matrix to vector index
|
||||||
|
|
||||||
|
! *Compute the index i of a vector element from the indexes p,q of a
|
||||||
|
! matrix element*
|
||||||
|
|
||||||
|
! Lower diagonal matrix (p,q), p > q -> vector (i)
|
||||||
|
|
||||||
|
! If a matrix is antisymmetric it can be reshaped as a vector. And the
|
||||||
|
! vector can be reshaped as an antisymmetric matrix
|
||||||
|
|
||||||
|
! \begin{align*}
|
||||||
|
! \begin{pmatrix}
|
||||||
|
! 0 & -1 & -2 & -4 \\
|
||||||
|
! 1 & 0 & -3 & -5 \\
|
||||||
|
! 2 & 3 & 0 & -6 \\
|
||||||
|
! 4 & 5 & 6 & 0
|
||||||
|
! \end{pmatrix}
|
||||||
|
! \Leftrightarrow
|
||||||
|
! \begin{pmatrix}
|
||||||
|
! 1 & 2 & 3 & 4 & 5 & 6
|
||||||
|
! \end{pmatrix}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! !!! Here the algorithm only work for the lower diagonal !!!
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | p,q | integer | indexes of a matrix element in the lower diagonal |
|
||||||
|
! | | | p > q, q -> column |
|
||||||
|
! | | | p -> row, |
|
||||||
|
! | | | q -> column |
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | i | integer | corresponding index in the vector |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine mat_to_vec_index(p,q,i)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: p,q
|
||||||
|
|
||||||
|
! out
|
||||||
|
integer, intent(out) :: i
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: a,b
|
||||||
|
double precision :: da
|
||||||
|
|
||||||
|
! Calculation
|
||||||
|
|
||||||
|
a = p-1
|
||||||
|
b = a*(a-1)/2
|
||||||
|
|
||||||
|
i = q+b
|
||||||
|
|
||||||
|
end subroutine
|
63
src/utils_trust_region/mat_to_vec_index.org
Normal file
63
src/utils_trust_region/mat_to_vec_index.org
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
* Matrix to vector index
|
||||||
|
|
||||||
|
*Compute the index i of a vector element from the indexes p,q of a
|
||||||
|
matrix element*
|
||||||
|
|
||||||
|
Lower diagonal matrix (p,q), p > q -> vector (i)
|
||||||
|
|
||||||
|
If a matrix is antisymmetric it can be reshaped as a vector. And the
|
||||||
|
vector can be reshaped as an antisymmetric matrix
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\begin{pmatrix}
|
||||||
|
0 & -1 & -2 & -4 \\
|
||||||
|
1 & 0 & -3 & -5 \\
|
||||||
|
2 & 3 & 0 & -6 \\
|
||||||
|
4 & 5 & 6 & 0
|
||||||
|
\end{pmatrix}
|
||||||
|
\Leftrightarrow
|
||||||
|
\begin{pmatrix}
|
||||||
|
1 & 2 & 3 & 4 & 5 & 6
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
!!! Here the algorithm only work for the lower diagonal !!!
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| p,q | integer | indexes of a matrix element in the lower diagonal |
|
||||||
|
| | | p > q, q -> column |
|
||||||
|
| | | p -> row, |
|
||||||
|
| | | q -> column |
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| i | integer | corresponding index in the vector |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f
|
||||||
|
subroutine mat_to_vec_index(p,q,i)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: p,q
|
||||||
|
|
||||||
|
! out
|
||||||
|
integer, intent(out) :: i
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: a,b
|
||||||
|
double precision :: da
|
||||||
|
|
||||||
|
! Calculation
|
||||||
|
|
||||||
|
a = p-1
|
||||||
|
b = a*(a-1)/2
|
||||||
|
|
||||||
|
i = q+b
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
||||||
|
|
2
src/utils_trust_region/pi.h
Normal file
2
src/utils_trust_region/pi.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
!logical, parameter :: debug=.False.
|
||||||
|
double precision, parameter :: pi = 3.1415926535897932d0
|
443
src/utils_trust_region/rotation_matrix.irp.f
Normal file
443
src/utils_trust_region/rotation_matrix.irp.f
Normal file
@ -0,0 +1,443 @@
|
|||||||
|
! Rotation matrix
|
||||||
|
|
||||||
|
! *Build a rotation matrix from an antisymmetric matrix*
|
||||||
|
|
||||||
|
! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
|
||||||
|
! $$
|
||||||
|
! \textbf{R}=\exp(\textbf{A})
|
||||||
|
! $$
|
||||||
|
|
||||||
|
! So :
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{R}=& \exp(\textbf{A}) \\
|
||||||
|
! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
|
||||||
|
! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! With :
|
||||||
|
! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$
|
||||||
|
! $\tau$ : $\sqrt{-x}$
|
||||||
|
! $x$ : eigenvalues of $\textbf{A}^2$
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | A(n,n) | double precision | antisymmetric matrix |
|
||||||
|
! | n | integer | number of columns of the A matrix |
|
||||||
|
! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) |
|
||||||
|
! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | R(n,n) | double precision | Rotation matrix |
|
||||||
|
! | info | integer | if info = 0, the execution is successful |
|
||||||
|
! | | | if info = k, the k-th parameter has an illegal value |
|
||||||
|
! | | | if info = -k, the algorithm failed |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | B(n,n) | double precision | B = A.A |
|
||||||
|
! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) |
|
||||||
|
! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) |
|
||||||
|
! | W(n,n) | double precision | eigenvectors of B |
|
||||||
|
! | e_val(n) | double precision | eigenvalues of B |
|
||||||
|
! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B |
|
||||||
|
! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values |
|
||||||
|
! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values |
|
||||||
|
! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values |
|
||||||
|
! | part_1(n,n) | double precision | matrix W.cos_tau.W^t |
|
||||||
|
! | part_1a(n,n) | double precision | matrix cos_tau.W^t |
|
||||||
|
! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A |
|
||||||
|
! | part_2a(n,n) | double precision | matrix W^t.A |
|
||||||
|
! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A |
|
||||||
|
! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A |
|
||||||
|
! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
|
||||||
|
! | norm | integer | norm of R.R^t-1, must be equal to 0 |
|
||||||
|
! | i,j | integer | indexes |
|
||||||
|
|
||||||
|
! Functions:
|
||||||
|
! | dnrm2 | double precision | Lapack function, compute the norm of a matrix |
|
||||||
|
! | disnan | logical | Lapack function, check if an element is NaN |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Rotation matrix to rotate the molecular orbitals.
|
||||||
|
! If the rotation is too large the transformation is not unitary and must be cancelled.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n,LDA,LDR
|
||||||
|
double precision, intent(inout) :: A(LDA,n)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: R(LDR,n)
|
||||||
|
integer, intent(out) :: info
|
||||||
|
logical, intent(out) :: enforce_step_cancellation
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision, allocatable :: B(:,:)
|
||||||
|
double precision, allocatable :: work(:,:)
|
||||||
|
double precision, allocatable :: W(:,:), e_val(:)
|
||||||
|
double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
|
||||||
|
double precision, allocatable :: part_1(:,:),part_1a(:,:)
|
||||||
|
double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
|
||||||
|
double precision, allocatable :: RR_t(:,:)
|
||||||
|
integer :: i,j
|
||||||
|
integer :: info2, lwork ! for dsyev
|
||||||
|
double precision :: norm, max_elem, max_elem_A, t1,t2,t3
|
||||||
|
|
||||||
|
! function
|
||||||
|
double precision :: dnrm2
|
||||||
|
logical :: disnan
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---rotation_matrix---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(B(n,n))
|
||||||
|
allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
|
||||||
|
allocate(W(n,n),e_val(n))
|
||||||
|
allocate(part_1(n,n),part_1a(n,n))
|
||||||
|
allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
|
||||||
|
allocate(RR_t(n,n))
|
||||||
|
|
||||||
|
! Pre-conditions
|
||||||
|
|
||||||
|
! Initialization
|
||||||
|
info=0
|
||||||
|
enforce_step_cancellation = .False.
|
||||||
|
|
||||||
|
! Size of matrix A must be at least 1 by 1
|
||||||
|
if (n<1) then
|
||||||
|
info = 3
|
||||||
|
print*, 'WARNING: invalid parameter 5'
|
||||||
|
print*, 'n<1'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Leading dimension of A must be >= n
|
||||||
|
if (LDA < n) then
|
||||||
|
info = 25
|
||||||
|
print*, 'WARNING: invalid parameter 2 or 5'
|
||||||
|
print*, 'LDA < n'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Leading dimension of A must be >= n
|
||||||
|
if (LDR < n) then
|
||||||
|
info = 4
|
||||||
|
print*, 'WARNING: invalid parameter 4'
|
||||||
|
print*, 'LDR < n'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Matrix elements of A must by non-NaN
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (disnan(A(i,j))) then
|
||||||
|
info=1
|
||||||
|
print*, 'WARNING: invalid parameter 1'
|
||||||
|
print*, 'NaN element in A matrix'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (A(i,i) /= 0d0) then
|
||||||
|
print*, 'WARNING: matrix A is not antisymmetric'
|
||||||
|
print*, 'Non 0 element on the diagonal', i, A(i,i)
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (A(i,j)+A(j,i)>1d-16) then
|
||||||
|
print*, 'WANRING: matrix A is not antisymmetric'
|
||||||
|
print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
|
||||||
|
print*, 'diff:', A(i,j)+A(j,i)
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Fix for too big elements ! bad idea better to cancel if the error is too big
|
||||||
|
!do j = 1, n
|
||||||
|
! do i = 1, n
|
||||||
|
! A(i,j) = mod(A(i,j),2d0*pi)
|
||||||
|
! if (dabs(A(i,j)) > pi) then
|
||||||
|
! A(i,j) = 0d0
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
!enddo
|
||||||
|
|
||||||
|
max_elem_A = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(A(i,j)) > ABS(max_elem_A)) then
|
||||||
|
max_elem_A = A(i,j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'max element in A', max_elem_A
|
||||||
|
|
||||||
|
if (ABS(max_elem_A) > 2 * pi) then
|
||||||
|
print*,''
|
||||||
|
print*,'WARNING: ABS(max_elem_A) > 2 pi '
|
||||||
|
print*,''
|
||||||
|
endif
|
||||||
|
|
||||||
|
! B=A.A
|
||||||
|
! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$
|
||||||
|
! - Diagonalization of $\textbf{B}$
|
||||||
|
! W, the eigenvectors
|
||||||
|
! e_val, the eigenvalues
|
||||||
|
|
||||||
|
|
||||||
|
! Compute B=A.A
|
||||||
|
|
||||||
|
call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
|
||||||
|
|
||||||
|
! Copy B in W, diagonalization will put the eigenvectors in W
|
||||||
|
W=B
|
||||||
|
|
||||||
|
! Diagonalization of B
|
||||||
|
! Eigenvalues -> e_val
|
||||||
|
! Eigenvectors -> W
|
||||||
|
lwork = 3*n-1
|
||||||
|
allocate(work(lwork,n))
|
||||||
|
|
||||||
|
print*,'Starting diagonalization ...'
|
||||||
|
|
||||||
|
call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
|
||||||
|
|
||||||
|
deallocate(work)
|
||||||
|
|
||||||
|
if (info2 == 0) then
|
||||||
|
print*, 'Diagonalization : Done'
|
||||||
|
elseif (info2 < 0) then
|
||||||
|
print*, 'WARNING: error in the diagonalization'
|
||||||
|
print*, 'Illegal value of the ', info2,'-th parameter'
|
||||||
|
else
|
||||||
|
print*, "WARNING: Diagonalization failed to converge"
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Tau^-1, cos(tau), sin(tau)
|
||||||
|
! $$\tau = \sqrt{-x}$$
|
||||||
|
! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$
|
||||||
|
! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$
|
||||||
|
! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
|
||||||
|
! These matrices are diagonals
|
||||||
|
|
||||||
|
! Diagonal matrix m_diag
|
||||||
|
do j = 1, n
|
||||||
|
if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
|
||||||
|
e_val(j) = 0.d0
|
||||||
|
else
|
||||||
|
e_val(j) = - e_val(j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
m_diag = 0.d0
|
||||||
|
do i = 1, n
|
||||||
|
m_diag(i,i) = e_val(i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! cos_tau
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (i==j) then
|
||||||
|
cos_tau(i,j) = dcos(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
cos_tau(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! sin_tau
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (i==j) then
|
||||||
|
sin_tau(i,j) = dsin(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
sin_tau(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Debug, display the cos_tau and sin_tau matrix
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'cos_tau'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, cos_tau(i,:)
|
||||||
|
! enddo
|
||||||
|
! print*, 'sin_tau'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, sin_tau(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! tau^-1
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
|
||||||
|
tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
tau_m1(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
max_elem = 0d0
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
|
||||||
|
max_elem = tau_m1(i,i)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
print*,'max elem tau^-1:', max_elem
|
||||||
|
|
||||||
|
! Debug
|
||||||
|
!print*,'eigenvalues:'
|
||||||
|
!do i = 1, n
|
||||||
|
! print*, e_val(i)
|
||||||
|
!enddo
|
||||||
|
|
||||||
|
!Debug, display tau^-1
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'tau^-1'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*,tau_m1(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Rotation matrix
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
|
||||||
|
! \end{align*}
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
|
||||||
|
! \end{align*}
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! First:
|
||||||
|
! part_1 = dgemm(W, dgemm(cos_tau, W^t))
|
||||||
|
! part_1a = dgemm(cos_tau, W^t)
|
||||||
|
! part_1 = dgemm(W, part_1a)
|
||||||
|
! And:
|
||||||
|
! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
|
||||||
|
! part_2a = dgemm(W^t, A)
|
||||||
|
! part_2b = dgemm(sin_tau, part_2a)
|
||||||
|
! part_2c = dgemm(tau_m1, part_2b)
|
||||||
|
! part_2 = dgemm(W, part_2c)
|
||||||
|
! Finally:
|
||||||
|
! Rotation matrix, R = part_1+part_2
|
||||||
|
|
||||||
|
! If $R$ is a rotation matrix:
|
||||||
|
! $R.R^T=R^T.R=\textbf{1}$
|
||||||
|
|
||||||
|
! part_1
|
||||||
|
call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
|
||||||
|
|
||||||
|
! part_2
|
||||||
|
call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
|
||||||
|
|
||||||
|
! Rotation matrix R
|
||||||
|
R = part_1 + part_2
|
||||||
|
|
||||||
|
! Matrix check
|
||||||
|
! R.R^t and R^t.R must be equal to identity matrix
|
||||||
|
do j = 1, n
|
||||||
|
do i=1,n
|
||||||
|
if (i==j) then
|
||||||
|
RR_t(i,j) = 1d0
|
||||||
|
else
|
||||||
|
RR_t(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
|
||||||
|
|
||||||
|
norm = dnrm2(n*n,RR_t,1)
|
||||||
|
print*, 'Rotation matrix check, norm R.R^T = ', norm
|
||||||
|
|
||||||
|
! Debug
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'RR_t'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, RR_t(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Post conditions
|
||||||
|
|
||||||
|
! Check if R.R^T=1
|
||||||
|
max_elem = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(RR_t(i,j)) > ABS(max_elem)) then
|
||||||
|
max_elem = RR_t(i,j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
print*, 'Max error in R.R^T:', max_elem
|
||||||
|
print*, 'e_val(1):', e_val(1)
|
||||||
|
print*, 'e_val(n):', e_val(n)
|
||||||
|
print*, 'max elem in A:', max_elem_A
|
||||||
|
|
||||||
|
if (ABS(max_elem) > 1d-12) then
|
||||||
|
print*, 'WARNING: max error in R.R^T > 1d-12'
|
||||||
|
print*, 'Enforce the step cancellation'
|
||||||
|
enforce_step_cancellation = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Matrix elements of R must by non-NaN
|
||||||
|
do j = 1,n
|
||||||
|
do i = 1,LDR
|
||||||
|
if (disnan(R(i,j))) then
|
||||||
|
info = 666
|
||||||
|
print*, 'NaN in rotation matrix'
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Display
|
||||||
|
!if (debug) then
|
||||||
|
! print*,'Rotation matrix :'
|
||||||
|
! do i = 1, n
|
||||||
|
! write(*,'(100(F10.5))') R(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Deallocation, end
|
||||||
|
|
||||||
|
deallocate(B)
|
||||||
|
deallocate(m_diag,cos_tau,sin_tau,tau_m1)
|
||||||
|
deallocate(W,e_val)
|
||||||
|
deallocate(part_1,part_1a)
|
||||||
|
deallocate(part_2,part_2a,part_2b,part_2c)
|
||||||
|
deallocate(RR_t)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2-t1
|
||||||
|
print*,'Time in rotation matrix:', t3
|
||||||
|
|
||||||
|
print*,'---End rotation_matrix---'
|
||||||
|
|
||||||
|
end subroutine
|
454
src/utils_trust_region/rotation_matrix.org
Normal file
454
src/utils_trust_region/rotation_matrix.org
Normal file
@ -0,0 +1,454 @@
|
|||||||
|
* Rotation matrix
|
||||||
|
|
||||||
|
*Build a rotation matrix from an antisymmetric matrix*
|
||||||
|
|
||||||
|
Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as :
|
||||||
|
$$
|
||||||
|
\textbf{R}=\exp(\textbf{A})
|
||||||
|
$$
|
||||||
|
|
||||||
|
So :
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{R}=& \exp(\textbf{A}) \\
|
||||||
|
=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\
|
||||||
|
=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
With :
|
||||||
|
$\textbf{W}$ : eigenvectors of $\textbf{A}^2$
|
||||||
|
$\tau$ : $\sqrt{-x}$
|
||||||
|
$x$ : eigenvalues of $\textbf{A}^2$
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| A(n,n) | double precision | antisymmetric matrix |
|
||||||
|
| n | integer | number of columns of the A matrix |
|
||||||
|
| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) |
|
||||||
|
| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| R(n,n) | double precision | Rotation matrix |
|
||||||
|
| info | integer | if info = 0, the execution is successful |
|
||||||
|
| | | if info = k, the k-th parameter has an illegal value |
|
||||||
|
| | | if info = -k, the algorithm failed |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| B(n,n) | double precision | B = A.A |
|
||||||
|
| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) |
|
||||||
|
| lwork | integer | dimension of the syev work array >= max(1, 3n-1) |
|
||||||
|
| W(n,n) | double precision | eigenvectors of B |
|
||||||
|
| e_val(n) | double precision | eigenvalues of B |
|
||||||
|
| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B |
|
||||||
|
| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values |
|
||||||
|
| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values |
|
||||||
|
| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values |
|
||||||
|
| part_1(n,n) | double precision | matrix W.cos_tau.W^t |
|
||||||
|
| part_1a(n,n) | double precision | matrix cos_tau.W^t |
|
||||||
|
| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A |
|
||||||
|
| part_2a(n,n) | double precision | matrix W^t.A |
|
||||||
|
| part_2b(n,n) | double precision | matrix sin_tau.W^t.A |
|
||||||
|
| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A |
|
||||||
|
| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 |
|
||||||
|
| norm | integer | norm of R.R^t-1, must be equal to 0 |
|
||||||
|
| i,j | integer | indexes |
|
||||||
|
|
||||||
|
Functions:
|
||||||
|
| dnrm2 | double precision | Lapack function, compute the norm of a matrix |
|
||||||
|
| disnan | logical | Lapack function, check if an element is NaN |
|
||||||
|
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation)
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Rotation matrix to rotate the molecular orbitals.
|
||||||
|
! If the rotation is too large the transformation is not unitary and must be cancelled.
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n,LDA,LDR
|
||||||
|
double precision, intent(inout) :: A(LDA,n)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: R(LDR,n)
|
||||||
|
integer, intent(out) :: info
|
||||||
|
logical, intent(out) :: enforce_step_cancellation
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision, allocatable :: B(:,:)
|
||||||
|
double precision, allocatable :: work(:,:)
|
||||||
|
double precision, allocatable :: W(:,:), e_val(:)
|
||||||
|
double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:)
|
||||||
|
double precision, allocatable :: part_1(:,:),part_1a(:,:)
|
||||||
|
double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:)
|
||||||
|
double precision, allocatable :: RR_t(:,:)
|
||||||
|
integer :: i,j
|
||||||
|
integer :: info2, lwork ! for dsyev
|
||||||
|
double precision :: norm, max_elem, max_elem_A, t1,t2,t3
|
||||||
|
|
||||||
|
! function
|
||||||
|
double precision :: dnrm2
|
||||||
|
logical :: disnan
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---rotation_matrix---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(B(n,n))
|
||||||
|
allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n))
|
||||||
|
allocate(W(n,n),e_val(n))
|
||||||
|
allocate(part_1(n,n),part_1a(n,n))
|
||||||
|
allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n))
|
||||||
|
allocate(RR_t(n,n))
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Pre-conditions
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
! Initialization
|
||||||
|
info=0
|
||||||
|
enforce_step_cancellation = .False.
|
||||||
|
|
||||||
|
! Size of matrix A must be at least 1 by 1
|
||||||
|
if (n<1) then
|
||||||
|
info = 3
|
||||||
|
print*, 'WARNING: invalid parameter 5'
|
||||||
|
print*, 'n<1'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Leading dimension of A must be >= n
|
||||||
|
if (LDA < n) then
|
||||||
|
info = 25
|
||||||
|
print*, 'WARNING: invalid parameter 2 or 5'
|
||||||
|
print*, 'LDA < n'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Leading dimension of A must be >= n
|
||||||
|
if (LDR < n) then
|
||||||
|
info = 4
|
||||||
|
print*, 'WARNING: invalid parameter 4'
|
||||||
|
print*, 'LDR < n'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Matrix elements of A must by non-NaN
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (disnan(A(i,j))) then
|
||||||
|
info=1
|
||||||
|
print*, 'WARNING: invalid parameter 1'
|
||||||
|
print*, 'NaN element in A matrix'
|
||||||
|
return
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (A(i,i) /= 0d0) then
|
||||||
|
print*, 'WARNING: matrix A is not antisymmetric'
|
||||||
|
print*, 'Non 0 element on the diagonal', i, A(i,i)
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (A(i,j)+A(j,i)>1d-16) then
|
||||||
|
print*, 'WANRING: matrix A is not antisymmetric'
|
||||||
|
print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i)
|
||||||
|
print*, 'diff:', A(i,j)+A(j,i)
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Fix for too big elements ! bad idea better to cancel if the error is too big
|
||||||
|
!do j = 1, n
|
||||||
|
! do i = 1, n
|
||||||
|
! A(i,j) = mod(A(i,j),2d0*pi)
|
||||||
|
! if (dabs(A(i,j)) > pi) then
|
||||||
|
! A(i,j) = 0d0
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
!enddo
|
||||||
|
|
||||||
|
max_elem_A = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(A(i,j)) > ABS(max_elem_A)) then
|
||||||
|
max_elem_A = A(i,j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
print*,'max element in A', max_elem_A
|
||||||
|
|
||||||
|
if (ABS(max_elem_A) > 2 * pi) then
|
||||||
|
print*,''
|
||||||
|
print*,'WARNING: ABS(max_elem_A) > 2 pi '
|
||||||
|
print*,''
|
||||||
|
endif
|
||||||
|
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Calculations
|
||||||
|
|
||||||
|
*** B=A.A
|
||||||
|
- Calculation of the matrix $\textbf{B} = \textbf{A}^2$
|
||||||
|
- Diagonalization of $\textbf{B}$
|
||||||
|
W, the eigenvectors
|
||||||
|
e_val, the eigenvalues
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
! Compute B=A.A
|
||||||
|
|
||||||
|
call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1))
|
||||||
|
|
||||||
|
! Copy B in W, diagonalization will put the eigenvectors in W
|
||||||
|
W=B
|
||||||
|
|
||||||
|
! Diagonalization of B
|
||||||
|
! Eigenvalues -> e_val
|
||||||
|
! Eigenvectors -> W
|
||||||
|
lwork = 3*n-1
|
||||||
|
allocate(work(lwork,n))
|
||||||
|
|
||||||
|
print*,'Starting diagonalization ...'
|
||||||
|
|
||||||
|
call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2)
|
||||||
|
|
||||||
|
deallocate(work)
|
||||||
|
|
||||||
|
if (info2 == 0) then
|
||||||
|
print*, 'Diagonalization : Done'
|
||||||
|
elseif (info2 < 0) then
|
||||||
|
print*, 'WARNING: error in the diagonalization'
|
||||||
|
print*, 'Illegal value of the ', info2,'-th parameter'
|
||||||
|
else
|
||||||
|
print*, "WARNING: Diagonalization failed to converge"
|
||||||
|
endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Tau^-1, cos(tau), sin(tau)
|
||||||
|
$$\tau = \sqrt{-x}$$
|
||||||
|
- Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$
|
||||||
|
- Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$
|
||||||
|
- Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$
|
||||||
|
These matrices are diagonals
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
! Diagonal matrix m_diag
|
||||||
|
do j = 1, n
|
||||||
|
if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems
|
||||||
|
e_val(j) = 0.d0
|
||||||
|
else
|
||||||
|
e_val(j) = - e_val(j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
m_diag = 0.d0
|
||||||
|
do i = 1, n
|
||||||
|
m_diag(i,i) = e_val(i)
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! cos_tau
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (i==j) then
|
||||||
|
cos_tau(i,j) = dcos(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
cos_tau(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! sin_tau
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (i==j) then
|
||||||
|
sin_tau(i,j) = dsin(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
sin_tau(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Debug, display the cos_tau and sin_tau matrix
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'cos_tau'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, cos_tau(i,:)
|
||||||
|
! enddo
|
||||||
|
! print*, 'sin_tau'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, sin_tau(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! tau^-1
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small
|
||||||
|
tau_m1(i,j) = 1d0/(dsqrt(e_val(i)))
|
||||||
|
else
|
||||||
|
tau_m1(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
max_elem = 0d0
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(tau_m1(i,i)) > ABS(max_elem)) then
|
||||||
|
max_elem = tau_m1(i,i)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
print*,'max elem tau^-1:', max_elem
|
||||||
|
|
||||||
|
! Debug
|
||||||
|
!print*,'eigenvalues:'
|
||||||
|
!do i = 1, n
|
||||||
|
! print*, e_val(i)
|
||||||
|
!enddo
|
||||||
|
|
||||||
|
!Debug, display tau^-1
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'tau^-1'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*,tau_m1(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Rotation matrix
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
|
||||||
|
\end{align*}
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger}
|
||||||
|
\end{align*}
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
First:
|
||||||
|
part_1 = dgemm(W, dgemm(cos_tau, W^t))
|
||||||
|
part_1a = dgemm(cos_tau, W^t)
|
||||||
|
part_1 = dgemm(W, part_1a)
|
||||||
|
And:
|
||||||
|
part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A))))
|
||||||
|
part_2a = dgemm(W^t, A)
|
||||||
|
part_2b = dgemm(sin_tau, part_2a)
|
||||||
|
part_2c = dgemm(tau_m1, part_2b)
|
||||||
|
part_2 = dgemm(W, part_2c)
|
||||||
|
Finally:
|
||||||
|
Rotation matrix, R = part_1+part_2
|
||||||
|
|
||||||
|
If $R$ is a rotation matrix:
|
||||||
|
$R.R^T=R^T.R=\textbf{1}$
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
! part_1
|
||||||
|
call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1))
|
||||||
|
|
||||||
|
! part_2
|
||||||
|
call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1))
|
||||||
|
call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1))
|
||||||
|
|
||||||
|
! Rotation matrix R
|
||||||
|
R = part_1 + part_2
|
||||||
|
|
||||||
|
! Matrix check
|
||||||
|
! R.R^t and R^t.R must be equal to identity matrix
|
||||||
|
do j = 1, n
|
||||||
|
do i=1,n
|
||||||
|
if (i==j) then
|
||||||
|
RR_t(i,j) = 1d0
|
||||||
|
else
|
||||||
|
RR_t(i,j) = 0d0
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1))
|
||||||
|
|
||||||
|
norm = dnrm2(n*n,RR_t,1)
|
||||||
|
print*, 'Rotation matrix check, norm R.R^T = ', norm
|
||||||
|
|
||||||
|
! Debug
|
||||||
|
!if (debug) then
|
||||||
|
! print*, 'RR_t'
|
||||||
|
! do i = 1, n
|
||||||
|
! print*, RR_t(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Post conditions
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
! Check if R.R^T=1
|
||||||
|
max_elem = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
if (ABS(RR_t(i,j)) > ABS(max_elem)) then
|
||||||
|
max_elem = RR_t(i,j)
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
print*, 'Max error in R.R^T:', max_elem
|
||||||
|
print*, 'e_val(1):', e_val(1)
|
||||||
|
print*, 'e_val(n):', e_val(n)
|
||||||
|
print*, 'max elem in A:', max_elem_A
|
||||||
|
|
||||||
|
if (ABS(max_elem) > 1d-12) then
|
||||||
|
print*, 'WARNING: max error in R.R^T > 1d-12'
|
||||||
|
print*, 'Enforce the step cancellation'
|
||||||
|
enforce_step_cancellation = .True.
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Matrix elements of R must by non-NaN
|
||||||
|
do j = 1,n
|
||||||
|
do i = 1,LDR
|
||||||
|
if (disnan(R(i,j))) then
|
||||||
|
info = 666
|
||||||
|
print*, 'NaN in rotation matrix'
|
||||||
|
call ABORT
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Display
|
||||||
|
!if (debug) then
|
||||||
|
! print*,'Rotation matrix :'
|
||||||
|
! do i = 1, n
|
||||||
|
! write(*,'(100(F10.5))') R(i,:)
|
||||||
|
! enddo
|
||||||
|
!endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Deallocation, end
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f
|
||||||
|
deallocate(B)
|
||||||
|
deallocate(m_diag,cos_tau,sin_tau,tau_m1)
|
||||||
|
deallocate(W,e_val)
|
||||||
|
deallocate(part_1,part_1a)
|
||||||
|
deallocate(part_2,part_2a,part_2b,part_2c)
|
||||||
|
deallocate(RR_t)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2-t1
|
||||||
|
print*,'Time in rotation matrix:', t3
|
||||||
|
|
||||||
|
print*,'---End rotation_matrix---'
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
||||||
|
|
64
src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
Normal file
64
src/utils_trust_region/sub_to_full_rotation_matrix.irp.f
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
! Rotation matrix in a subspace to rotation matrix in the full space
|
||||||
|
|
||||||
|
! Usually, we are using a list of MOs, for exemple the active ones. When
|
||||||
|
! we compute a rotation matrix to rotate the MOs, we just compute a
|
||||||
|
! rotation matrix for these MOs in order to reduce the size of the
|
||||||
|
! matrix which has to be computed. Since the computation of a rotation
|
||||||
|
! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
|
||||||
|
! reuce the number of MOs involved.
|
||||||
|
! After that we replace the rotation matrix in the full space by
|
||||||
|
! building the elements of the rotation matrix in the full space from
|
||||||
|
! the elements of the rotation matrix in the subspace and adding some 0
|
||||||
|
! on the extradiagonal elements and some 1 on the diagonal elements,
|
||||||
|
! for the MOs that are not involved in the rotation.
|
||||||
|
|
||||||
|
! Provided:
|
||||||
|
! | mo_num | integer | Number of MOs |
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | m | integer | Size of tmp_list, m <= mo_num |
|
||||||
|
! | tmp_list(m) | integer | List of MOs |
|
||||||
|
! | tmp_R(m,m) | double precision | Rotation matrix in the space of |
|
||||||
|
! | | | the MOs containing by tmp_list |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | R(mo_num,mo_num | double precision | Rotation matrix in the space |
|
||||||
|
! | | | of all the MOs |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | i,j | integer | indexes in the full space |
|
||||||
|
! | tmp_i,tmp_j | integer | indexes in the subspace |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the full rotation matrix from a smaller one
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: m, tmp_list(m)
|
||||||
|
double precision, intent(in) :: tmp_R(m,m)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: R(mo_num,mo_num)
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: i,j,tmp_i,tmp_j
|
||||||
|
|
||||||
|
! tmp_R to R, subspace to full space
|
||||||
|
R = 0d0
|
||||||
|
do i = 1, mo_num
|
||||||
|
R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
|
||||||
|
enddo
|
||||||
|
do tmp_j = 1, m
|
||||||
|
j = tmp_list(tmp_j)
|
||||||
|
do tmp_i = 1, m
|
||||||
|
i = tmp_list(tmp_i)
|
||||||
|
R(i,j) = tmp_R(tmp_i,tmp_j)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
end
|
65
src/utils_trust_region/sub_to_full_rotation_matrix.org
Normal file
65
src/utils_trust_region/sub_to_full_rotation_matrix.org
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
* Rotation matrix in a subspace to rotation matrix in the full space
|
||||||
|
|
||||||
|
Usually, we are using a list of MOs, for exemple the active ones. When
|
||||||
|
we compute a rotation matrix to rotate the MOs, we just compute a
|
||||||
|
rotation matrix for these MOs in order to reduce the size of the
|
||||||
|
matrix which has to be computed. Since the computation of a rotation
|
||||||
|
matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to
|
||||||
|
reuce the number of MOs involved.
|
||||||
|
After that we replace the rotation matrix in the full space by
|
||||||
|
building the elements of the rotation matrix in the full space from
|
||||||
|
the elements of the rotation matrix in the subspace and adding some 0
|
||||||
|
on the extradiagonal elements and some 1 on the diagonal elements,
|
||||||
|
for the MOs that are not involved in the rotation.
|
||||||
|
|
||||||
|
Provided:
|
||||||
|
| mo_num | integer | Number of MOs |
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| m | integer | Size of tmp_list, m <= mo_num |
|
||||||
|
| tmp_list(m) | integer | List of MOs |
|
||||||
|
| tmp_R(m,m) | double precision | Rotation matrix in the space of |
|
||||||
|
| | | the MOs containing by tmp_list |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| R(mo_num,mo_num | double precision | Rotation matrix in the space |
|
||||||
|
| | | of all the MOs |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| i,j | integer | indexes in the full space |
|
||||||
|
| tmp_i,tmp_j | integer | indexes in the subspace |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f
|
||||||
|
subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R)
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the full rotation matrix from a smaller one
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: m, tmp_list(m)
|
||||||
|
double precision, intent(in) :: tmp_R(m,m)
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: R(mo_num,mo_num)
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: i,j,tmp_i,tmp_j
|
||||||
|
|
||||||
|
! tmp_R to R, subspace to full space
|
||||||
|
R = 0d0
|
||||||
|
do i = 1, mo_num
|
||||||
|
R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital
|
||||||
|
enddo
|
||||||
|
do tmp_j = 1, m
|
||||||
|
j = tmp_list(tmp_j)
|
||||||
|
do tmp_i = 1, m
|
||||||
|
i = tmp_list(tmp_i)
|
||||||
|
R(i,j) = tmp_R(tmp_i,tmp_j)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
end
|
||||||
|
#+END_SRC
|
119
src/utils_trust_region/trust_region_expected_e.irp.f
Normal file
119
src/utils_trust_region/trust_region_expected_e.irp.f
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
! Predicted energy : e_model
|
||||||
|
|
||||||
|
! *Compute the energy predicted by the Taylor series*
|
||||||
|
|
||||||
|
! The energy is predicted using a Taylor expansion truncated at te 2nd
|
||||||
|
! order :
|
||||||
|
|
||||||
|
! \begin{align*}
|
||||||
|
! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | n | integer | m*(m-1)/2 |
|
||||||
|
! | v_grad(n) | double precision | gradient |
|
||||||
|
! | H(n,n) | double precision | hessian |
|
||||||
|
! | x(n) | double precision | Step in the trust region |
|
||||||
|
! | prev_energy | double precision | previous energy |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | e_model | double precision | predicted energy after the rotation of the MOs |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | part_1 | double precision | v_grad^T.x |
|
||||||
|
! | part_2 | double precision | 1/2 . x^T.H.x |
|
||||||
|
! | part_2a | double precision | H.x |
|
||||||
|
! | i,j | integer | indexes |
|
||||||
|
|
||||||
|
! Function:
|
||||||
|
! | ddot | double precision | dot product (Lapack) |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the expected criterion/energy after the application of the step x
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n
|
||||||
|
double precision, intent(in) :: v_grad(n),H(n,n),x(n)
|
||||||
|
double precision, intent(in) :: prev_energy
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: e_model
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision :: part_1, part_2, t1,t2,t3
|
||||||
|
double precision, allocatable :: part_2a(:)
|
||||||
|
|
||||||
|
integer :: i,j
|
||||||
|
|
||||||
|
!Function
|
||||||
|
double precision :: ddot
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---Trust_e_model---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(part_2a(n))
|
||||||
|
|
||||||
|
! Calculations
|
||||||
|
|
||||||
|
! part_1 corresponds to the product g.x
|
||||||
|
! part_2a corresponds to the product H.x
|
||||||
|
! part_2 corresponds to the product 0.5*(x^T.H.x)
|
||||||
|
|
||||||
|
! TODO: remove the dot products
|
||||||
|
|
||||||
|
|
||||||
|
! Product v_grad.x
|
||||||
|
part_1 = ddot(n,v_grad,1,x,1)
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
print*,'g.x : ', part_1
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Product H.x
|
||||||
|
call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
|
||||||
|
|
||||||
|
! Product 1/2 . x^T.H.x
|
||||||
|
part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
print*,'1/2*x^T.H.x : ', part_2
|
||||||
|
!endif
|
||||||
|
|
||||||
|
print*,'prev_energy', prev_energy
|
||||||
|
|
||||||
|
! Sum
|
||||||
|
e_model = prev_energy + part_1 + part_2
|
||||||
|
|
||||||
|
! Writing the predicted energy
|
||||||
|
print*, 'Predicted energy after the rotation : ', e_model
|
||||||
|
print*, 'Previous energy - predicted energy:', prev_energy - e_model
|
||||||
|
|
||||||
|
! Can be deleted, already in another subroutine
|
||||||
|
if (DABS(prev_energy - e_model) < 1d-12 ) then
|
||||||
|
print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Deallocation
|
||||||
|
deallocate(part_2a)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in trust e model:', t3
|
||||||
|
|
||||||
|
print*,'---End trust_e_model---'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end subroutine
|
121
src/utils_trust_region/trust_region_expected_e.org
Normal file
121
src/utils_trust_region/trust_region_expected_e.org
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
* Predicted energy : e_model
|
||||||
|
|
||||||
|
*Compute the energy predicted by the Taylor series*
|
||||||
|
|
||||||
|
The energy is predicted using a Taylor expansion truncated at te 2nd
|
||||||
|
order :
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2)
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| n | integer | m*(m-1)/2 |
|
||||||
|
| v_grad(n) | double precision | gradient |
|
||||||
|
| H(n,n) | double precision | hessian |
|
||||||
|
| x(n) | double precision | Step in the trust region |
|
||||||
|
| prev_energy | double precision | previous energy |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| e_model | double precision | predicted energy after the rotation of the MOs |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| part_1 | double precision | v_grad^T.x |
|
||||||
|
| part_2 | double precision | 1/2 . x^T.H.x |
|
||||||
|
| part_2a | double precision | H.x |
|
||||||
|
| i,j | integer | indexes |
|
||||||
|
|
||||||
|
Function:
|
||||||
|
| ddot | double precision | dot product (Lapack) |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
|
||||||
|
subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the expected criterion/energy after the application of the step x
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n
|
||||||
|
double precision, intent(in) :: v_grad(n),H(n,n),x(n)
|
||||||
|
double precision, intent(in) :: prev_energy
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: e_model
|
||||||
|
|
||||||
|
! internal
|
||||||
|
double precision :: part_1, part_2, t1,t2,t3
|
||||||
|
double precision, allocatable :: part_2a(:)
|
||||||
|
|
||||||
|
integer :: i,j
|
||||||
|
|
||||||
|
!Function
|
||||||
|
double precision :: ddot
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---Trust_e_model---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(part_2a(n))
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Calculations
|
||||||
|
|
||||||
|
part_1 corresponds to the product g.x
|
||||||
|
part_2a corresponds to the product H.x
|
||||||
|
part_2 corresponds to the product 0.5*(x^T.H.x)
|
||||||
|
|
||||||
|
TODO: remove the dot products
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f
|
||||||
|
! Product v_grad.x
|
||||||
|
part_1 = ddot(n,v_grad,1,x,1)
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
print*,'g.x : ', part_1
|
||||||
|
!endif
|
||||||
|
|
||||||
|
! Product H.x
|
||||||
|
call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1)
|
||||||
|
|
||||||
|
! Product 1/2 . x^T.H.x
|
||||||
|
part_2 = 0.5d0 * ddot(n,x,1,part_2a,1)
|
||||||
|
|
||||||
|
!if (debug) then
|
||||||
|
print*,'1/2*x^T.H.x : ', part_2
|
||||||
|
!endif
|
||||||
|
|
||||||
|
print*,'prev_energy', prev_energy
|
||||||
|
|
||||||
|
! Sum
|
||||||
|
e_model = prev_energy + part_1 + part_2
|
||||||
|
|
||||||
|
! Writing the predicted energy
|
||||||
|
print*, 'Predicted energy after the rotation : ', e_model
|
||||||
|
print*, 'Previous energy - predicted energy:', prev_energy - e_model
|
||||||
|
|
||||||
|
! Can be deleted, already in another subroutine
|
||||||
|
if (DABS(prev_energy - e_model) < 1d-12 ) then
|
||||||
|
print*,'WARNING: ABS(prev_energy - e_model) < 1d-12'
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Deallocation
|
||||||
|
deallocate(part_2a)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in trust e model:', t3
|
||||||
|
|
||||||
|
print*,'---End trust_e_model---'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
1655
src/utils_trust_region/trust_region_optimal_lambda.irp.f
Normal file
1655
src/utils_trust_region/trust_region_optimal_lambda.irp.f
Normal file
File diff suppressed because it is too large
Load Diff
1665
src/utils_trust_region/trust_region_optimal_lambda.org
Normal file
1665
src/utils_trust_region/trust_region_optimal_lambda.org
Normal file
File diff suppressed because it is too large
Load Diff
121
src/utils_trust_region/trust_region_rho.irp.f
Normal file
121
src/utils_trust_region/trust_region_rho.irp.f
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
! Agreement with the model: Rho
|
||||||
|
|
||||||
|
! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
|
||||||
|
|
||||||
|
! Rho represents the agreement between the model (the predicted energy
|
||||||
|
! by the Taylor expansion truncated at the 2nd order) and the real
|
||||||
|
! energy :
|
||||||
|
|
||||||
|
! \begin{equation}
|
||||||
|
! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
|
||||||
|
! \end{equation}
|
||||||
|
! With :
|
||||||
|
! $E^{k}$ the energy at the previous iteration
|
||||||
|
! $E^{k+1}$ the energy at the actual iteration
|
||||||
|
! $m^{k+1}$ the predicted energy for the actual iteration
|
||||||
|
! (cf. trust_e_model)
|
||||||
|
|
||||||
|
! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
|
||||||
|
! If $\rho \leq 0$ the previous energy is lower than the actual
|
||||||
|
! energy. We have to cancel the last step and use a smaller trust
|
||||||
|
! region.
|
||||||
|
! Here we cancel the last step if $\rho < 0.1$, because even if
|
||||||
|
! the energy decreases, the agreement is bad, i.e., the Taylor expansion
|
||||||
|
! truncated at the second order doesn't represent correctly the energy
|
||||||
|
! landscape. So it's better to cancel the step and restart with a
|
||||||
|
! smaller trust region.
|
||||||
|
|
||||||
|
! Provided in qp_edit:
|
||||||
|
! | thresh_rho |
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | prev_energy | double precision | previous energy (energy before the rotation) |
|
||||||
|
! | e_model | double precision | predicted energy after the rotation |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | rho | double precision | the agreement between the model (predicted) and the real energy |
|
||||||
|
! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy |
|
||||||
|
! | | | else the previous energy doesn't change |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | energy | double precision | energy (real) after the rotation |
|
||||||
|
! | i | integer | index |
|
||||||
|
! | t* | double precision | time |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine trust_region_rho(prev_energy, energy,e_model,rho)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute rho, the agreement between the predicted criterion/energy and the real one
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! In
|
||||||
|
double precision, intent(inout) :: prev_energy
|
||||||
|
double precision, intent(in) :: e_model, energy
|
||||||
|
|
||||||
|
! Out
|
||||||
|
double precision, intent(out) :: rho
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision :: t1, t2, t3
|
||||||
|
integer :: i
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---Rho_model---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Rho
|
||||||
|
! \begin{equation}
|
||||||
|
! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
|
||||||
|
! \end{equation}
|
||||||
|
|
||||||
|
! In function of $\rho$ th step can be accepted or cancelled.
|
||||||
|
|
||||||
|
! If we cancel the last step (k+1), the previous energy (k) doesn't
|
||||||
|
! change!
|
||||||
|
! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1)
|
||||||
|
|
||||||
|
|
||||||
|
! Already done in an other subroutine
|
||||||
|
!if (ABS(prev_energy - e_model) < 1d-12) then
|
||||||
|
! print*,'WARNING: prev_energy - e_model < 1d-12'
|
||||||
|
! print*,'=> rho will tend toward infinity'
|
||||||
|
! print*,'Check you convergence criterion !'
|
||||||
|
!endif
|
||||||
|
|
||||||
|
rho = (prev_energy - energy) / (prev_energy - e_model)
|
||||||
|
|
||||||
|
print*, 'previous energy, prev_energy :', prev_energy
|
||||||
|
print*, 'predicted energy, e_model :', e_model
|
||||||
|
print*, 'real energy, energy :', energy
|
||||||
|
print*, 'prev_energy - energy :', prev_energy - energy
|
||||||
|
print*, 'prev_energy - e_model :', prev_energy - e_model
|
||||||
|
print*, 'Rho :', rho
|
||||||
|
print*, 'Threshold for rho:', thresh_rho
|
||||||
|
|
||||||
|
! Modification of prev_energy in function of rho
|
||||||
|
if (rho < thresh_rho) then !0.1) then
|
||||||
|
! the step is cancelled
|
||||||
|
print*, 'Rho <', thresh_rho,', the previous energy does not changed'
|
||||||
|
print*, 'prev_energy :', prev_energy
|
||||||
|
else
|
||||||
|
! the step is accepted
|
||||||
|
prev_energy = energy
|
||||||
|
print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
|
||||||
|
endif
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in rho model:', t3
|
||||||
|
|
||||||
|
print*,'---End rho_model---'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end subroutine
|
123
src/utils_trust_region/trust_region_rho.org
Normal file
123
src/utils_trust_region/trust_region_rho.org
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
* Agreement with the model: Rho
|
||||||
|
|
||||||
|
*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)*
|
||||||
|
|
||||||
|
Rho represents the agreement between the model (the predicted energy
|
||||||
|
by the Taylor expansion truncated at the 2nd order) and the real
|
||||||
|
energy :
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
|
||||||
|
\end{equation}
|
||||||
|
With :
|
||||||
|
$E^{k}$ the energy at the previous iteration
|
||||||
|
$E^{k+1}$ the energy at the actual iteration
|
||||||
|
$m^{k+1}$ the predicted energy for the actual iteration
|
||||||
|
(cf. trust_e_model)
|
||||||
|
|
||||||
|
If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$.
|
||||||
|
If $\rho \leq 0$ the previous energy is lower than the actual
|
||||||
|
energy. We have to cancel the last step and use a smaller trust
|
||||||
|
region.
|
||||||
|
Here we cancel the last step if $\rho < 0.1$, because even if
|
||||||
|
the energy decreases, the agreement is bad, i.e., the Taylor expansion
|
||||||
|
truncated at the second order doesn't represent correctly the energy
|
||||||
|
landscape. So it's better to cancel the step and restart with a
|
||||||
|
smaller trust region.
|
||||||
|
|
||||||
|
Provided in qp_edit:
|
||||||
|
| thresh_rho |
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| prev_energy | double precision | previous energy (energy before the rotation) |
|
||||||
|
| e_model | double precision | predicted energy after the rotation |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| rho | double precision | the agreement between the model (predicted) and the real energy |
|
||||||
|
| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy |
|
||||||
|
| | | else the previous energy doesn't change |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| energy | double precision | energy (real) after the rotation |
|
||||||
|
| i | integer | index |
|
||||||
|
| t* | double precision | time |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
|
||||||
|
subroutine trust_region_rho(prev_energy, energy,e_model,rho)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute rho, the agreement between the predicted criterion/energy and the real one
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! In
|
||||||
|
double precision, intent(inout) :: prev_energy
|
||||||
|
double precision, intent(in) :: e_model, energy
|
||||||
|
|
||||||
|
! Out
|
||||||
|
double precision, intent(out) :: rho
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision :: t1, t2, t3
|
||||||
|
integer :: i
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'---Rho_model---'
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Rho
|
||||||
|
\begin{equation}
|
||||||
|
\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}}
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
In function of $\rho$ th step can be accepted or cancelled.
|
||||||
|
|
||||||
|
If we cancel the last step (k+1), the previous energy (k) doesn't
|
||||||
|
change!
|
||||||
|
If the step (k+1) is accepted, then the "previous energy" becomes E(k+1)
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f
|
||||||
|
! Already done in an other subroutine
|
||||||
|
!if (ABS(prev_energy - e_model) < 1d-12) then
|
||||||
|
! print*,'WARNING: prev_energy - e_model < 1d-12'
|
||||||
|
! print*,'=> rho will tend toward infinity'
|
||||||
|
! print*,'Check you convergence criterion !'
|
||||||
|
!endif
|
||||||
|
|
||||||
|
rho = (prev_energy - energy) / (prev_energy - e_model)
|
||||||
|
|
||||||
|
print*, 'previous energy, prev_energy :', prev_energy
|
||||||
|
print*, 'predicted energy, e_model :', e_model
|
||||||
|
print*, 'real energy, energy :', energy
|
||||||
|
print*, 'prev_energy - energy :', prev_energy - energy
|
||||||
|
print*, 'prev_energy - e_model :', prev_energy - e_model
|
||||||
|
print*, 'Rho :', rho
|
||||||
|
print*, 'Threshold for rho:', thresh_rho
|
||||||
|
|
||||||
|
! Modification of prev_energy in function of rho
|
||||||
|
if (rho < thresh_rho) then !0.1) then
|
||||||
|
! the step is cancelled
|
||||||
|
print*, 'Rho <', thresh_rho,', the previous energy does not changed'
|
||||||
|
print*, 'prev_energy :', prev_energy
|
||||||
|
else
|
||||||
|
! the step is accepted
|
||||||
|
prev_energy = energy
|
||||||
|
print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy
|
||||||
|
endif
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in rho model:', t3
|
||||||
|
|
||||||
|
print*,'---End rho_model---'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
716
src/utils_trust_region/trust_region_step.irp.f
Normal file
716
src/utils_trust_region/trust_region_step.irp.f
Normal file
@ -0,0 +1,716 @@
|
|||||||
|
! Trust region
|
||||||
|
|
||||||
|
! *Compute the next step with the trust region algorithm*
|
||||||
|
|
||||||
|
! The Newton method is an iterative method to find a minimum of a given
|
||||||
|
! function. It uses a Taylor series truncated at the second order of the
|
||||||
|
! targeted function and gives its minimizer. The minimizer is taken as
|
||||||
|
! the new position and the same thing is done. And by doing so
|
||||||
|
! iteratively the method find a minimum, a local or global one depending
|
||||||
|
! of the starting point and the convexity/nonconvexity of the targeted
|
||||||
|
! function.
|
||||||
|
|
||||||
|
! The goal of the trust region is to constrain the step size of the
|
||||||
|
! Newton method in a certain area around the actual position, where the
|
||||||
|
! Taylor series is a good approximation of the targeted function. This
|
||||||
|
! area is called the "trust region".
|
||||||
|
|
||||||
|
! In addition, in function of the agreement between the Taylor
|
||||||
|
! development of the energy and the real energy, the size of the trust
|
||||||
|
! region will be updated at each iteration. By doing so, the step sizes
|
||||||
|
! are not too larges. In addition, since we add a criterion to cancel the
|
||||||
|
! step if the energy increases (more precisely if rho < 0.1), so it's
|
||||||
|
! impossible to diverge. \newline
|
||||||
|
|
||||||
|
! References: \newline
|
||||||
|
! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
|
||||||
|
! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
|
||||||
|
! ISBN: 978-0-387-40065-5 \newline
|
||||||
|
|
||||||
|
! By using the first and the second derivatives, the Newton method gives
|
||||||
|
! a step:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot
|
||||||
|
! \textbf{g}_{(k)}
|
||||||
|
! \end{align*}
|
||||||
|
! which leads to the minimizer of the Taylor series.
|
||||||
|
! !!! Warning: the Newton method gives the minimizer if and only if
|
||||||
|
! $\textbf{H}$ is positive definite, else it leads to a saddle point !!!
|
||||||
|
! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
|
||||||
|
! \begin{align*}
|
||||||
|
! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
|
||||||
|
! \end{align*}
|
||||||
|
! which is equivalent to
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! with: \newline
|
||||||
|
! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
|
||||||
|
! size n) \newline
|
||||||
|
! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
|
||||||
|
! matrix) \newline
|
||||||
|
! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
|
||||||
|
! size n) \newline
|
||||||
|
! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
|
||||||
|
! \newline
|
||||||
|
|
||||||
|
! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
|
||||||
|
! hypersphere of radius $\Delta_{(k+1)}$.\newline
|
||||||
|
|
||||||
|
! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
|
||||||
|
! $\textbf{H}$ is positive definite, the
|
||||||
|
! solution is the step given by the Newton method
|
||||||
|
! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
|
||||||
|
! Else we have to constrain the step size. For simplicity we will remove
|
||||||
|
! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
|
||||||
|
! to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
|
||||||
|
! Starting from the Taylor series of a function E (here, the energy)
|
||||||
|
! truncated at the 2nd order, we have:
|
||||||
|
! \begin{align*}
|
||||||
|
! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
|
||||||
|
! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
|
||||||
|
! \mathcal{O}(\textbf{x}^2)
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! With the constraint on the norm of $\textbf{x}$ we can write the
|
||||||
|
! Lagrangian
|
||||||
|
! \begin{align*}
|
||||||
|
! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
|
||||||
|
! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x}
|
||||||
|
! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
|
||||||
|
! \end{align*}
|
||||||
|
! Where: \newline
|
||||||
|
! $\lambda$ is the Lagrange multiplier \newline
|
||||||
|
! $E$ is the energy at the k-th iteration $\Leftrightarrow
|
||||||
|
! E(\textbf{x} = \textbf{0})$ \newline
|
||||||
|
|
||||||
|
! To solve this equation, we search a stationary point where the first
|
||||||
|
! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
|
||||||
|
! \begin{align*}
|
||||||
|
! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! The derivative is:
|
||||||
|
! \begin{align*}
|
||||||
|
! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
|
||||||
|
! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! So, we search $\textbf{x}$ such as:
|
||||||
|
! \begin{align*}
|
||||||
|
! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
|
||||||
|
! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! We can rewrite that as:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x}
|
||||||
|
! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
|
||||||
|
! \end{align*}
|
||||||
|
! with $\textbf{I}$ is the identity matrix.
|
||||||
|
|
||||||
|
! By doing so, the solution is:
|
||||||
|
! \begin{align*}
|
||||||
|
! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
|
||||||
|
! \end{align*}
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
|
||||||
|
! \end{align*}
|
||||||
|
! with $\textbf{x}^T \textbf{x} = \Delta^2$.
|
||||||
|
|
||||||
|
! We have to solve this previous equation to find this $\textbf{x}$ in the
|
||||||
|
! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
|
||||||
|
! just a one dimension problem because we can express $\textbf{x}$ as a
|
||||||
|
! function of $\lambda$:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! We start from the fact that the hessian is diagonalizable. So we have:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
|
||||||
|
! \end{align*}
|
||||||
|
! with: \newline
|
||||||
|
! $\textbf{H}$, the hessian matrix \newline
|
||||||
|
! $\textbf{W}$, the matrix containing the eigenvectors \newline
|
||||||
|
! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
|
||||||
|
! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
|
||||||
|
! $h_i$, the i-th eigenvalue in ascending order \newline
|
||||||
|
|
||||||
|
! Now we use the fact that adding a constant on the diagonal just shifts
|
||||||
|
! the eigenvalues:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h}
|
||||||
|
! +\textbf{I} \lambda) \cdot \textbf{W}^T
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! By doing so we can express $\textbf{x}$ as a function of $\lambda$
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
! \end{align*}
|
||||||
|
! with $\lambda \neq - h_i$.
|
||||||
|
|
||||||
|
! An interesting thing in our case is the norm of $\textbf{x}$,
|
||||||
|
! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
|
||||||
|
! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
|
||||||
|
! \begin{align*}
|
||||||
|
! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot
|
||||||
|
! \textbf{g})^2}{(h_i + \lambda)^2}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$.
|
||||||
|
! And if we study the properties of this function we see that:
|
||||||
|
! \begin{align*}
|
||||||
|
! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
|
||||||
|
! \end{align*}
|
||||||
|
! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$:
|
||||||
|
! \begin{align*}
|
||||||
|
! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! From these limits and knowing that $h_1$ is the lowest eigenvalue, we
|
||||||
|
! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
|
||||||
|
! strictly decreasing function on the interval $\lambda \in
|
||||||
|
! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
|
||||||
|
! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
|
||||||
|
! solution.
|
||||||
|
|
||||||
|
! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
|
||||||
|
! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
|
||||||
|
! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
|
||||||
|
! Newton method is only defined for a positive definite hessian matrix,
|
||||||
|
! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
|
||||||
|
! definite. Consequently, in the case where $\textbf{H}$ is not positive
|
||||||
|
! definite, to ensure the positive definiteness, $\lambda$ must be
|
||||||
|
! greater than $- h_1$.
|
||||||
|
! \begin{align*}
|
||||||
|
! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! From that there are five cases:
|
||||||
|
! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
|
||||||
|
! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
|
||||||
|
! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
|
||||||
|
! \lambda)$
|
||||||
|
! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
|
||||||
|
! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
|
||||||
|
! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
|
||||||
|
! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
|
||||||
|
! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
|
||||||
|
! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
|
||||||
|
! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
|
||||||
|
! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
|
||||||
|
! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
|
||||||
|
! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
|
||||||
|
! 0)|| \leq \Delta$
|
||||||
|
! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
|
||||||
|
! time a constant to ensure the condition $||\textbf{x}(\lambda =
|
||||||
|
! -h_1)|| = \Delta$ and escape from the saddle point
|
||||||
|
|
||||||
|
! Thus to find the solution, we can write:
|
||||||
|
! \begin{align*}
|
||||||
|
! ||\textbf{x}(\lambda)|| = \Delta
|
||||||
|
! \end{align*}
|
||||||
|
! \begin{align*}
|
||||||
|
! ||\textbf{x}(\lambda)|| - \Delta = 0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! Taking the square of this equation
|
||||||
|
! \begin{align*}
|
||||||
|
! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0
|
||||||
|
! \end{align*}
|
||||||
|
! we have a function with one minimum for the optimal $\lambda$.
|
||||||
|
! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
|
||||||
|
! \begin{align*}
|
||||||
|
! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! But in practice, it is more effective to solve:
|
||||||
|
! \begin{align*}
|
||||||
|
! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! To do that, we just use the Newton method with "trust_newton" using
|
||||||
|
! first and second derivative of $(||\textbf{x}(\lambda)||^2 -
|
||||||
|
! \Delta^2)^2$ with respect to $\textbf{x}$.
|
||||||
|
! This will give the optimal $\lambda$ to compute the
|
||||||
|
! solution $\textbf{x}$ with the formula seen previously:
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
|
||||||
|
! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! Evolution of the trust region
|
||||||
|
|
||||||
|
! We initialize the trust region at the first iteration using a radius
|
||||||
|
! \begin{align*}
|
||||||
|
! \Delta = ||\textbf{x}(\lambda=0)||
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! And for the next iteration the trust region will evolves depending of
|
||||||
|
! the agreement of the energy prediction based on the Taylor series
|
||||||
|
! truncated at the 2nd order and the real energy. If the Taylor series
|
||||||
|
! truncated at the 2nd order represents correctly the energy landscape
|
||||||
|
! the trust region will be extent else it will be reduced. In order to
|
||||||
|
! mesure this agreement we use the ratio rho cf. "rho_model" and
|
||||||
|
! "trust_e_model". From that we use the following values:
|
||||||
|
! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
|
||||||
|
! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$,
|
||||||
|
! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$,
|
||||||
|
! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
|
||||||
|
|
||||||
|
! In addition, if $\rho < 0.1$ the iteration is cancelled, so it
|
||||||
|
! restarts with a smaller trust region until the energy decreases.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! Summary
|
||||||
|
|
||||||
|
! To summarize, knowing the hessian (eigenvectors and eigenvalues), the
|
||||||
|
! gradient and the radius of the trust region we can compute the norm of
|
||||||
|
! the Newton step
|
||||||
|
! \begin{align*}
|
||||||
|
! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n
|
||||||
|
! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
|
||||||
|
! $\textbf{x}(\lambda=0)$ is in the trust region and it is not
|
||||||
|
! necessary to put a constraint on $\textbf{x}$, the solution is the
|
||||||
|
! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
|
||||||
|
! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
|
||||||
|
! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
|
||||||
|
! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
|
||||||
|
! -h_1)$, similarly to the previous case.
|
||||||
|
! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
|
||||||
|
! time a constant to ensure the condition $||\textbf{x}(\lambda =
|
||||||
|
! -h_1)|| = \Delta$ and escape from the saddle point
|
||||||
|
! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
|
||||||
|
! have to search $\lambda \in (-h_1, \infty)$ such as
|
||||||
|
! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method
|
||||||
|
! \begin{align*}
|
||||||
|
! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
|
||||||
|
! \end{align*}
|
||||||
|
! or
|
||||||
|
! \begin{align*}
|
||||||
|
! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
|
||||||
|
! \end{align*}
|
||||||
|
! which is numerically more stable. And finally compute
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
! \end{align*}
|
||||||
|
! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
|
||||||
|
! do exactly the same thing that the previous case but we search
|
||||||
|
! $\lambda \in (0, \infty)$
|
||||||
|
! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
|
||||||
|
! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
|
||||||
|
! sum), again we do exactly the same thing that the previous case
|
||||||
|
! searching $\lambda \in (-h_1, \infty)$.
|
||||||
|
|
||||||
|
|
||||||
|
! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
|
||||||
|
! necessary in fact to remove the $j = 1$ in the sum since the term
|
||||||
|
! where $h_i - \lambda < 10^{-6}$ are not computed.
|
||||||
|
|
||||||
|
! After that, we take this vector $\textbf{x}^*$, called "x", and we do
|
||||||
|
! the transformation to an antisymmetric matrix $\textbf{X}$, called
|
||||||
|
! m_x. This matrix $\textbf{X}$ will be used to compute a rotation
|
||||||
|
! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
|
||||||
|
|
||||||
|
! NB:
|
||||||
|
! An improvement can be done using a elleptical trust region.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! Code
|
||||||
|
|
||||||
|
! Provided:
|
||||||
|
! | mo_num | integer | number of MOs |
|
||||||
|
|
||||||
|
! Cf. qp_edit in orbital optimization section, for some constants/thresholds
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | m | integer | number of MOs |
|
||||||
|
! | n | integer | m*(m-1)/2 |
|
||||||
|
! | H(n, n) | double precision | hessian |
|
||||||
|
! | v_grad(n) | double precision | gradient |
|
||||||
|
! | e_val(n) | double precision | eigenvalues of the hessian |
|
||||||
|
! | W(n, n) | double precision | eigenvectors of the hessian |
|
||||||
|
! | rho | double precision | agreement between the model and the reality, |
|
||||||
|
! | | | represents the quality of the energy prediction |
|
||||||
|
! | nb_iter | integer | number of iteration |
|
||||||
|
|
||||||
|
! Input/Ouput:
|
||||||
|
! | delta | double precision | radius of the trust region |
|
||||||
|
|
||||||
|
! Output:
|
||||||
|
! | x(n) | double precision | vector containing the step |
|
||||||
|
|
||||||
|
! Internal:
|
||||||
|
! | accu | double precision | temporary variable to compute the step |
|
||||||
|
! | lambda | double precision | lagrange multiplier |
|
||||||
|
! | trust_radius2 | double precision | square of the radius of the trust region |
|
||||||
|
! | norm2_x | double precision | norm^2 of the vector x |
|
||||||
|
! | norm2_g | double precision | norm^2 of the vector containing the gradient |
|
||||||
|
! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g |
|
||||||
|
! | i, j, k | integer | indexes |
|
||||||
|
|
||||||
|
! Function:
|
||||||
|
! | dnrm2 | double precision | Blas function computing the norm |
|
||||||
|
! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compuet the step in the trust region
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n
|
||||||
|
double precision, intent(in) :: v_grad(n), rho
|
||||||
|
integer, intent(inout) :: nb_iter
|
||||||
|
double precision, intent(in) :: e_val(n), w(n,n)
|
||||||
|
|
||||||
|
! inout
|
||||||
|
double precision, intent(inout) :: delta
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: x(n)
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision :: accu, lambda, trust_radius2
|
||||||
|
double precision :: norm2_x, norm2_g
|
||||||
|
double precision, allocatable :: tmp_wtg(:)
|
||||||
|
integer :: i,j,k
|
||||||
|
double precision :: t1,t2,t3
|
||||||
|
integer :: n_neg_eval
|
||||||
|
|
||||||
|
|
||||||
|
! Functions
|
||||||
|
double precision :: ddot, dnrm2
|
||||||
|
double precision :: f_norm_trust_region_omp
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'=================='
|
||||||
|
print*,'---Trust_region---'
|
||||||
|
print*,'=================='
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(tmp_wtg(n))
|
||||||
|
|
||||||
|
! Initialization and norm
|
||||||
|
|
||||||
|
! The norm of the step size will be useful for the trust region
|
||||||
|
! algorithm. We start from a first guess and the radius of the trust
|
||||||
|
! region will evolve during the optimization.
|
||||||
|
|
||||||
|
! avoid_saddle is actually a test to avoid saddle points
|
||||||
|
|
||||||
|
|
||||||
|
! Initialization of the Lagrange multiplier
|
||||||
|
lambda = 0d0
|
||||||
|
|
||||||
|
! List of w^T.g, to avoid the recomputation
|
||||||
|
tmp_wtg = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
|
||||||
|
! in the case of avoid_saddle
|
||||||
|
if (avoid_saddle .and. e_val(1) < - thresh_eig) then
|
||||||
|
i = 2
|
||||||
|
! Number of negative eigenvalues
|
||||||
|
do while (e_val(i) < - thresh_eig)
|
||||||
|
if (tmp_wtg(i) < thresh_wtg2) then
|
||||||
|
if (version_avoid_saddle == 1) then
|
||||||
|
tmp_wtg(i) = 1d0
|
||||||
|
elseif (version_avoid_saddle == 2) then
|
||||||
|
tmp_wtg(i) = DABS(e_val(i))
|
||||||
|
elseif (version_avoid_saddle == 3) then
|
||||||
|
tmp_wtg(i) = dsqrt(DABS(e_val(i)))
|
||||||
|
else
|
||||||
|
tmp_wtg(i) = thresh_wtg2
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
i = i + 1
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! For the fist one it's a little bit different
|
||||||
|
if (tmp_wtg(1) < thresh_wtg2) then
|
||||||
|
tmp_wtg(1) = 0d0
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Norm^2 of x, ||x||^2
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
|
||||||
|
! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
|
||||||
|
! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
|
||||||
|
! Anyway if the step is too big it will be reduced
|
||||||
|
print*,'||x||^2 :', norm2_x
|
||||||
|
|
||||||
|
! Norm^2 of the gradient, ||v_grad||^2
|
||||||
|
norm2_g = (dnrm2(n,v_grad,1))**2
|
||||||
|
print*,'||grad||^2 :', norm2_g
|
||||||
|
|
||||||
|
! Trust radius initialization
|
||||||
|
|
||||||
|
! At the first iteration (nb_iter = 0) we initialize the trust region
|
||||||
|
! with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
|
||||||
|
! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
|
||||||
|
! we compute this norm using f_norm_trust_region_omp as explain just
|
||||||
|
! below)
|
||||||
|
|
||||||
|
|
||||||
|
! trust radius
|
||||||
|
if (nb_iter == 0) then
|
||||||
|
trust_radius2 = norm2_x
|
||||||
|
! To avoid infinite loop of cancellation of this first step
|
||||||
|
! without changing delta
|
||||||
|
nb_iter = 1
|
||||||
|
|
||||||
|
! Compute delta, delta = sqrt(trust_radius)
|
||||||
|
delta = dsqrt(trust_radius2)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Modification of the trust radius
|
||||||
|
|
||||||
|
! In function of rho (which represents the agreement between the model
|
||||||
|
! and the reality, cf. rho_model) the trust region evolves. We update
|
||||||
|
! delta (the radius of the trust region).
|
||||||
|
|
||||||
|
! To avoid too big trust region we put a maximum size.
|
||||||
|
|
||||||
|
|
||||||
|
! Modification of the trust radius in function of rho
|
||||||
|
if (rho >= 0.75d0) then
|
||||||
|
delta = 2d0 * delta
|
||||||
|
elseif (rho >= 0.5d0) then
|
||||||
|
delta = delta
|
||||||
|
elseif (rho >= 0.25d0) then
|
||||||
|
delta = 0.5d0 * delta
|
||||||
|
else
|
||||||
|
delta = 0.25d0 * delta
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Maximum size of the trust region
|
||||||
|
!if (delta > 0.5d0 * n * pi) then
|
||||||
|
! delta = 0.5d0 * n * pi
|
||||||
|
! print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
|
||||||
|
!endif
|
||||||
|
|
||||||
|
if (delta > 1d10) then
|
||||||
|
delta = 1d10
|
||||||
|
endif
|
||||||
|
|
||||||
|
print*, 'Delta :', delta
|
||||||
|
|
||||||
|
! Calculation of the optimal lambda
|
||||||
|
|
||||||
|
! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
|
||||||
|
! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant
|
||||||
|
! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
|
||||||
|
! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
|
||||||
|
! unconstrained one, $\lambda = 0$
|
||||||
|
|
||||||
|
! You will find more details at the beginning
|
||||||
|
|
||||||
|
|
||||||
|
! By giving delta, we search (||x||^2 - delta^2)^2 = 0
|
||||||
|
! and not (||x||^2 - delta)^2 = 0
|
||||||
|
|
||||||
|
! Research of lambda to solve ||x(lambda)|| = Delta
|
||||||
|
|
||||||
|
! Display
|
||||||
|
print*, 'e_val(1) = ', e_val(1)
|
||||||
|
print*, 'w_1^T.g =', tmp_wtg(1)
|
||||||
|
|
||||||
|
! H positive definite
|
||||||
|
if (e_val(1) > - thresh_eig) then
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
|
||||||
|
print*, '||x(0)||=', dsqrt(norm2_x)
|
||||||
|
print*, 'Delta=', delta
|
||||||
|
|
||||||
|
! H positive definite, ||x(lambda = 0)|| <= Delta
|
||||||
|
if (dsqrt(norm2_x) <= delta) then
|
||||||
|
print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
|
||||||
|
print*, 'lambda = 0, no lambda optimization'
|
||||||
|
lambda = 0d0
|
||||||
|
|
||||||
|
! H positive definite, ||x(lambda = 0)|| > Delta
|
||||||
|
else
|
||||||
|
! Constraint solution
|
||||||
|
print*, 'H positive definite, ||x(lambda = 0)|| > Delta'
|
||||||
|
print*,'Computation of the optimal lambda...'
|
||||||
|
call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! H indefinite
|
||||||
|
else
|
||||||
|
if (DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
|
||||||
|
print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta
|
||||||
|
if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
! Add e_val(1) in order to have (H - e_val(1) I) positive definite
|
||||||
|
print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
|
||||||
|
print*, 'lambda = -e_val(1), no lambda optimization'
|
||||||
|
lambda = - e_val(1)
|
||||||
|
|
||||||
|
! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
|
||||||
|
! and
|
||||||
|
! H indefinite, w_1^T.g =/= 0
|
||||||
|
else
|
||||||
|
! Constraint solution/ add lambda
|
||||||
|
if (DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
|
||||||
|
else
|
||||||
|
print*, 'H indefinite, w_1^T.g =/= 0'
|
||||||
|
endif
|
||||||
|
print*, 'Computation of the optimal lambda...'
|
||||||
|
call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Recomputation of the norm^2 of the step x
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
|
||||||
|
print*,''
|
||||||
|
print*,'Summary after the trust region:'
|
||||||
|
print*,'lambda:', lambda
|
||||||
|
print*,'||x||:', dsqrt(norm2_x)
|
||||||
|
print*,'delta:', delta
|
||||||
|
|
||||||
|
! Calculation of the step x
|
||||||
|
|
||||||
|
! x refers to $\textbf{x}^*$
|
||||||
|
! We compute x in function of lambda using its formula :
|
||||||
|
! \begin{align*}
|
||||||
|
! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i
|
||||||
|
! + \lambda} \cdot \textbf{w}_i
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
! Initialisation
|
||||||
|
x = 0d0
|
||||||
|
|
||||||
|
! Calculation of the step x
|
||||||
|
|
||||||
|
! Normal version
|
||||||
|
if (.not. absolute_eig) then
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
|
||||||
|
do j = 1, n
|
||||||
|
x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Version to use the absolute value of the eigenvalues
|
||||||
|
else
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (DABS(e_val(i)) > thresh_eig) then
|
||||||
|
do j = 1, n
|
||||||
|
x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
double precision :: beta, norm_x
|
||||||
|
|
||||||
|
! Test
|
||||||
|
! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
|
||||||
|
! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first
|
||||||
|
! eigenvectors multiply by a constant to ensure the condition
|
||||||
|
! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
|
||||||
|
if (avoid_saddle .and. e_val(1) < - thresh_eig) then
|
||||||
|
if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
|
||||||
|
|
||||||
|
! norm of x
|
||||||
|
norm_x = dnrm2(n,x,1)
|
||||||
|
|
||||||
|
! Computes the coefficient for the w_1
|
||||||
|
beta = delta**2 - norm_x**2
|
||||||
|
|
||||||
|
! Updates the step x
|
||||||
|
x = x + W(:,1) * dsqrt(beta)
|
||||||
|
|
||||||
|
! Recomputes the norm to check
|
||||||
|
norm_x = dnrm2(n,x,1)
|
||||||
|
|
||||||
|
print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
|
||||||
|
print*, '||x||', norm_x
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Transformation of x
|
||||||
|
|
||||||
|
! x is a vector of size n, so it can be write as a m by m
|
||||||
|
! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
|
||||||
|
|
||||||
|
|
||||||
|
! ! Step transformation vector -> matrix
|
||||||
|
! ! Vector with n element -> mo_num by mo_num matrix
|
||||||
|
! do j = 1, m
|
||||||
|
! do i = 1, m
|
||||||
|
! if (i>j) then
|
||||||
|
! call mat_to_vec_index(i,j,k)
|
||||||
|
! m_x(i,j) = x(k)
|
||||||
|
! else
|
||||||
|
! m_x(i,j) = 0d0
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
|
! ! Antisymmetrization of the previous matrix
|
||||||
|
! do j = 1, m
|
||||||
|
! do i = 1, m
|
||||||
|
! if (i<j) then
|
||||||
|
! m_x(i,j) = - m_x(j,i)
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
! enddo
|
||||||
|
|
||||||
|
! Deallocation, end
|
||||||
|
|
||||||
|
|
||||||
|
deallocate(tmp_wtg)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in trust_region:', t3
|
||||||
|
print*,'======================'
|
||||||
|
print*,'---End trust_region---'
|
||||||
|
print*,'======================'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end
|
726
src/utils_trust_region/trust_region_step.org
Normal file
726
src/utils_trust_region/trust_region_step.org
Normal file
@ -0,0 +1,726 @@
|
|||||||
|
* Trust region
|
||||||
|
|
||||||
|
*Compute the next step with the trust region algorithm*
|
||||||
|
|
||||||
|
The Newton method is an iterative method to find a minimum of a given
|
||||||
|
function. It uses a Taylor series truncated at the second order of the
|
||||||
|
targeted function and gives its minimizer. The minimizer is taken as
|
||||||
|
the new position and the same thing is done. And by doing so
|
||||||
|
iteratively the method find a minimum, a local or global one depending
|
||||||
|
of the starting point and the convexity/nonconvexity of the targeted
|
||||||
|
function.
|
||||||
|
|
||||||
|
The goal of the trust region is to constrain the step size of the
|
||||||
|
Newton method in a certain area around the actual position, where the
|
||||||
|
Taylor series is a good approximation of the targeted function. This
|
||||||
|
area is called the "trust region".
|
||||||
|
|
||||||
|
In addition, in function of the agreement between the Taylor
|
||||||
|
development of the energy and the real energy, the size of the trust
|
||||||
|
region will be updated at each iteration. By doing so, the step sizes
|
||||||
|
are not too larges. In addition, since we add a criterion to cancel the
|
||||||
|
step if the energy increases (more precisely if rho < 0.1), so it's
|
||||||
|
impossible to diverge. \newline
|
||||||
|
|
||||||
|
References: \newline
|
||||||
|
Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline
|
||||||
|
https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline
|
||||||
|
ISBN: 978-0-387-40065-5 \newline
|
||||||
|
|
||||||
|
By using the first and the second derivatives, the Newton method gives
|
||||||
|
a step:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot
|
||||||
|
\textbf{g}_{(k)}
|
||||||
|
\end{align*}
|
||||||
|
which leads to the minimizer of the Taylor series.
|
||||||
|
!!! Warning: the Newton method gives the minimizer if and only if
|
||||||
|
$\textbf{H}$ is positive definite, else it leads to a saddle point !!!
|
||||||
|
But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm:
|
||||||
|
\begin{align*}
|
||||||
|
||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)}
|
||||||
|
\end{align*}
|
||||||
|
which is equivalent to
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
with: \newline
|
||||||
|
$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of
|
||||||
|
size n) \newline
|
||||||
|
$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n
|
||||||
|
matrix) \newline
|
||||||
|
$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of
|
||||||
|
size n) \newline
|
||||||
|
$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration
|
||||||
|
\newline
|
||||||
|
|
||||||
|
Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a
|
||||||
|
hypersphere of radius $\Delta_{(k+1)}$.\newline
|
||||||
|
|
||||||
|
So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and
|
||||||
|
$\textbf{H}$ is positive definite, the
|
||||||
|
solution is the step given by the Newton method
|
||||||
|
$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$.
|
||||||
|
Else we have to constrain the step size. For simplicity we will remove
|
||||||
|
the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have
|
||||||
|
to put a constraint on $\textbf{x}$ with a Lagrange multiplier.
|
||||||
|
Starting from the Taylor series of a function E (here, the energy)
|
||||||
|
truncated at the 2nd order, we have:
|
||||||
|
\begin{align*}
|
||||||
|
E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2}
|
||||||
|
\cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +
|
||||||
|
\mathcal{O}(\textbf{x}^2)
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
With the constraint on the norm of $\textbf{x}$ we can write the
|
||||||
|
Lagrangian
|
||||||
|
\begin{align*}
|
||||||
|
\mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x}
|
||||||
|
+ \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x}
|
||||||
|
+ \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2)
|
||||||
|
\end{align*}
|
||||||
|
Where: \newline
|
||||||
|
$\lambda$ is the Lagrange multiplier \newline
|
||||||
|
$E$ is the energy at the k-th iteration $\Leftrightarrow
|
||||||
|
E(\textbf{x} = \textbf{0})$ \newline
|
||||||
|
|
||||||
|
To solve this equation, we search a stationary point where the first
|
||||||
|
derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e.
|
||||||
|
\begin{align*}
|
||||||
|
\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
The derivative is:
|
||||||
|
\begin{align*}
|
||||||
|
\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
|
||||||
|
= \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
So, we search $\textbf{x}$ such as:
|
||||||
|
\begin{align*}
|
||||||
|
\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}
|
||||||
|
= \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
We can rewrite that as:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x}
|
||||||
|
= \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0
|
||||||
|
\end{align*}
|
||||||
|
with $\textbf{I}$ is the identity matrix.
|
||||||
|
|
||||||
|
By doing so, the solution is:
|
||||||
|
\begin{align*}
|
||||||
|
(\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g}
|
||||||
|
\end{align*}
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
|
||||||
|
\end{align*}
|
||||||
|
with $\textbf{x}^T \textbf{x} = \Delta^2$.
|
||||||
|
|
||||||
|
We have to solve this previous equation to find this $\textbf{x}$ in the
|
||||||
|
trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is
|
||||||
|
just a one dimension problem because we can express $\textbf{x}$ as a
|
||||||
|
function of $\lambda$:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
We start from the fact that the hessian is diagonalizable. So we have:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T
|
||||||
|
\end{align*}
|
||||||
|
with: \newline
|
||||||
|
$\textbf{H}$, the hessian matrix \newline
|
||||||
|
$\textbf{W}$, the matrix containing the eigenvectors \newline
|
||||||
|
$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline
|
||||||
|
$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline
|
||||||
|
$h_i$, the i-th eigenvalue in ascending order \newline
|
||||||
|
|
||||||
|
Now we use the fact that adding a constant on the diagonal just shifts
|
||||||
|
the eigenvalues:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h}
|
||||||
|
+\textbf{I} \lambda) \cdot \textbf{W}^T
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
By doing so we can express $\textbf{x}$ as a function of $\lambda$
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
\textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
\end{align*}
|
||||||
|
with $\lambda \neq - h_i$.
|
||||||
|
|
||||||
|
An interesting thing in our case is the norm of $\textbf{x}$,
|
||||||
|
because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of
|
||||||
|
the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have:
|
||||||
|
\begin{align*}
|
||||||
|
||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot
|
||||||
|
\textbf{g})^2}{(h_i + \lambda)^2}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$.
|
||||||
|
And if we study the properties of this function we see that:
|
||||||
|
\begin{align*}
|
||||||
|
\lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0
|
||||||
|
\end{align*}
|
||||||
|
and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$:
|
||||||
|
\begin{align*}
|
||||||
|
\lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
From these limits and knowing that $h_1$ is the lowest eigenvalue, we
|
||||||
|
can conclude that $||\textbf{x}(\lambda)||$ is a continuous and
|
||||||
|
strictly decreasing function on the interval $\lambda \in
|
||||||
|
(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which
|
||||||
|
gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one
|
||||||
|
solution.
|
||||||
|
|
||||||
|
Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot
|
||||||
|
\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly,
|
||||||
|
$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the
|
||||||
|
Newton method is only defined for a positive definite hessian matrix,
|
||||||
|
so $(\textbf{H} + \textbf{I} \lambda)$ must be positive
|
||||||
|
definite. Consequently, in the case where $\textbf{H}$ is not positive
|
||||||
|
definite, to ensure the positive definiteness, $\lambda$ must be
|
||||||
|
greater than $- h_1$.
|
||||||
|
\begin{align*}
|
||||||
|
\lambda > 0 \quad \text{and} \quad \lambda \geq - h_1
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
From that there are five cases:
|
||||||
|
- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$
|
||||||
|
- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot
|
||||||
|
\textbf{g} \neq 0$, $(\textbf{H} + \textbf{I}
|
||||||
|
\lambda)$
|
||||||
|
must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$
|
||||||
|
- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
|
||||||
|
\textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing
|
||||||
|
$j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
|
||||||
|
positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$)
|
||||||
|
- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot
|
||||||
|
\textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing
|
||||||
|
$j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be
|
||||||
|
positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is
|
||||||
|
similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda =
|
||||||
|
0)|| \leq \Delta$
|
||||||
|
but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
|
||||||
|
time a constant to ensure the condition $||\textbf{x}(\lambda =
|
||||||
|
-h_1)|| = \Delta$ and escape from the saddle point
|
||||||
|
|
||||||
|
Thus to find the solution, we can write:
|
||||||
|
\begin{align*}
|
||||||
|
||\textbf{x}(\lambda)|| = \Delta
|
||||||
|
\end{align*}
|
||||||
|
\begin{align*}
|
||||||
|
||\textbf{x}(\lambda)|| - \Delta = 0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Taking the square of this equation
|
||||||
|
\begin{align*}
|
||||||
|
(||\textbf{x}(\lambda)|| - \Delta)^2 = 0
|
||||||
|
\end{align*}
|
||||||
|
we have a function with one minimum for the optimal $\lambda$.
|
||||||
|
Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve
|
||||||
|
\begin{align*}
|
||||||
|
(||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
But in practice, it is more effective to solve:
|
||||||
|
\begin{align*}
|
||||||
|
(\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
To do that, we just use the Newton method with "trust_newton" using
|
||||||
|
first and second derivative of $(||\textbf{x}(\lambda)||^2 -
|
||||||
|
\Delta^2)^2$ with respect to $\textbf{x}$.
|
||||||
|
This will give the optimal $\lambda$ to compute the
|
||||||
|
solution $\textbf{x}$ with the formula seen previously:
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
\textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our
|
||||||
|
step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$.
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Evolution of the trust region
|
||||||
|
|
||||||
|
We initialize the trust region at the first iteration using a radius
|
||||||
|
\begin{align*}
|
||||||
|
\Delta = ||\textbf{x}(\lambda=0)||
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
And for the next iteration the trust region will evolves depending of
|
||||||
|
the agreement of the energy prediction based on the Taylor series
|
||||||
|
truncated at the 2nd order and the real energy. If the Taylor series
|
||||||
|
truncated at the 2nd order represents correctly the energy landscape
|
||||||
|
the trust region will be extent else it will be reduced. In order to
|
||||||
|
mesure this agreement we use the ratio rho cf. "rho_model" and
|
||||||
|
"trust_e_model". From that we use the following values:
|
||||||
|
- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$,
|
||||||
|
- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$,
|
||||||
|
- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$,
|
||||||
|
- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$.
|
||||||
|
|
||||||
|
In addition, if $\rho < 0.1$ the iteration is cancelled, so it
|
||||||
|
restarts with a smaller trust region until the energy decreases.
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Summary
|
||||||
|
|
||||||
|
To summarize, knowing the hessian (eigenvectors and eigenvalues), the
|
||||||
|
gradient and the radius of the trust region we can compute the norm of
|
||||||
|
the Newton step
|
||||||
|
\begin{align*}
|
||||||
|
||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n
|
||||||
|
\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and
|
||||||
|
$\textbf{x}(\lambda=0)$ is in the trust region and it is not
|
||||||
|
necessary to put a constraint on $\textbf{x}$, the solution is the
|
||||||
|
unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$.
|
||||||
|
- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
|
||||||
|
$||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in
|
||||||
|
the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda =
|
||||||
|
-h_1)$, similarly to the previous case.
|
||||||
|
But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$
|
||||||
|
time a constant to ensure the condition $||\textbf{x}(\lambda =
|
||||||
|
-h_1)|| = \Delta$ and escape from the saddle point
|
||||||
|
- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we
|
||||||
|
have to search $\lambda \in (-h_1, \infty)$ such as
|
||||||
|
$\textbf{x}(\lambda) = \Delta$ by solving with the Newton method
|
||||||
|
\begin{align*}
|
||||||
|
(||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0
|
||||||
|
\end{align*}
|
||||||
|
or
|
||||||
|
\begin{align*}
|
||||||
|
(\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0
|
||||||
|
\end{align*}
|
||||||
|
which is numerically more stable. And finally compute
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot
|
||||||
|
\textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i
|
||||||
|
\end{align*}
|
||||||
|
- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we
|
||||||
|
do exactly the same thing that the previous case but we search
|
||||||
|
$\lambda \in (0, \infty)$
|
||||||
|
- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and
|
||||||
|
$||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the
|
||||||
|
sum), again we do exactly the same thing that the previous case
|
||||||
|
searching $\lambda \in (-h_1, \infty)$.
|
||||||
|
|
||||||
|
|
||||||
|
For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not
|
||||||
|
necessary in fact to remove the $j = 1$ in the sum since the term
|
||||||
|
where $h_i - \lambda < 10^{-6}$ are not computed.
|
||||||
|
|
||||||
|
After that, we take this vector $\textbf{x}^*$, called "x", and we do
|
||||||
|
the transformation to an antisymmetric matrix $\textbf{X}$, called
|
||||||
|
m_x. This matrix $\textbf{X}$ will be used to compute a rotation
|
||||||
|
matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix".
|
||||||
|
|
||||||
|
NB:
|
||||||
|
An improvement can be done using a elleptical trust region.
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
** Code
|
||||||
|
|
||||||
|
Provided:
|
||||||
|
| mo_num | integer | number of MOs |
|
||||||
|
|
||||||
|
Cf. qp_edit in orbital optimization section, for some constants/thresholds
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| m | integer | number of MOs |
|
||||||
|
| n | integer | m*(m-1)/2 |
|
||||||
|
| H(n, n) | double precision | hessian |
|
||||||
|
| v_grad(n) | double precision | gradient |
|
||||||
|
| e_val(n) | double precision | eigenvalues of the hessian |
|
||||||
|
| W(n, n) | double precision | eigenvectors of the hessian |
|
||||||
|
| rho | double precision | agreement between the model and the reality, |
|
||||||
|
| | | represents the quality of the energy prediction |
|
||||||
|
| nb_iter | integer | number of iteration |
|
||||||
|
|
||||||
|
Input/Ouput:
|
||||||
|
| delta | double precision | radius of the trust region |
|
||||||
|
|
||||||
|
Output:
|
||||||
|
| x(n) | double precision | vector containing the step |
|
||||||
|
|
||||||
|
Internal:
|
||||||
|
| accu | double precision | temporary variable to compute the step |
|
||||||
|
| lambda | double precision | lagrange multiplier |
|
||||||
|
| trust_radius2 | double precision | square of the radius of the trust region |
|
||||||
|
| norm2_x | double precision | norm^2 of the vector x |
|
||||||
|
| norm2_g | double precision | norm^2 of the vector containing the gradient |
|
||||||
|
| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g |
|
||||||
|
| i, j, k | integer | indexes |
|
||||||
|
|
||||||
|
Function:
|
||||||
|
| dnrm2 | double precision | Blas function computing the norm |
|
||||||
|
| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compuet the step in the trust region
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer, intent(in) :: n
|
||||||
|
double precision, intent(in) :: v_grad(n), rho
|
||||||
|
integer, intent(inout) :: nb_iter
|
||||||
|
double precision, intent(in) :: e_val(n), w(n,n)
|
||||||
|
|
||||||
|
! inout
|
||||||
|
double precision, intent(inout) :: delta
|
||||||
|
|
||||||
|
! out
|
||||||
|
double precision, intent(out) :: x(n)
|
||||||
|
|
||||||
|
! Internal
|
||||||
|
double precision :: accu, lambda, trust_radius2
|
||||||
|
double precision :: norm2_x, norm2_g
|
||||||
|
double precision, allocatable :: tmp_wtg(:)
|
||||||
|
integer :: i,j,k
|
||||||
|
double precision :: t1,t2,t3
|
||||||
|
integer :: n_neg_eval
|
||||||
|
|
||||||
|
|
||||||
|
! Functions
|
||||||
|
double precision :: ddot, dnrm2
|
||||||
|
double precision :: f_norm_trust_region_omp
|
||||||
|
|
||||||
|
print*,''
|
||||||
|
print*,'=================='
|
||||||
|
print*,'---Trust_region---'
|
||||||
|
print*,'=================='
|
||||||
|
|
||||||
|
call wall_time(t1)
|
||||||
|
|
||||||
|
! Allocation
|
||||||
|
allocate(tmp_wtg(n))
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
|
||||||
|
*** Initialization and norm
|
||||||
|
|
||||||
|
The norm of the step size will be useful for the trust region
|
||||||
|
algorithm. We start from a first guess and the radius of the trust
|
||||||
|
region will evolve during the optimization.
|
||||||
|
|
||||||
|
avoid_saddle is actually a test to avoid saddle points
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! Initialization of the Lagrange multiplier
|
||||||
|
lambda = 0d0
|
||||||
|
|
||||||
|
! List of w^T.g, to avoid the recomputation
|
||||||
|
tmp_wtg = 0d0
|
||||||
|
do j = 1, n
|
||||||
|
do i = 1, n
|
||||||
|
tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Replacement of the small tmp_wtg corresponding to a negative eigenvalue
|
||||||
|
! in the case of avoid_saddle
|
||||||
|
if (avoid_saddle .and. e_val(1) < - thresh_eig) then
|
||||||
|
i = 2
|
||||||
|
! Number of negative eigenvalues
|
||||||
|
do while (e_val(i) < - thresh_eig)
|
||||||
|
if (tmp_wtg(i) < thresh_wtg2) then
|
||||||
|
if (version_avoid_saddle == 1) then
|
||||||
|
tmp_wtg(i) = 1d0
|
||||||
|
elseif (version_avoid_saddle == 2) then
|
||||||
|
tmp_wtg(i) = DABS(e_val(i))
|
||||||
|
elseif (version_avoid_saddle == 3) then
|
||||||
|
tmp_wtg(i) = dsqrt(DABS(e_val(i)))
|
||||||
|
else
|
||||||
|
tmp_wtg(i) = thresh_wtg2
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
i = i + 1
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! For the fist one it's a little bit different
|
||||||
|
if (tmp_wtg(1) < thresh_wtg2) then
|
||||||
|
tmp_wtg(1) = 0d0
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Norm^2 of x, ||x||^2
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
|
||||||
|
! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta
|
||||||
|
! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm
|
||||||
|
! Anyway if the step is too big it will be reduced
|
||||||
|
print*,'||x||^2 :', norm2_x
|
||||||
|
|
||||||
|
! Norm^2 of the gradient, ||v_grad||^2
|
||||||
|
norm2_g = (dnrm2(n,v_grad,1))**2
|
||||||
|
print*,'||grad||^2 :', norm2_g
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Trust radius initialization
|
||||||
|
|
||||||
|
At the first iteration (nb_iter = 0) we initialize the trust region
|
||||||
|
with the norm of the step generate by the Newton's method ($\textbf{x}_1 =
|
||||||
|
(\textbf{H}_0)^{-1} \cdot \textbf{g}_0$,
|
||||||
|
we compute this norm using f_norm_trust_region_omp as explain just
|
||||||
|
below)
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! trust radius
|
||||||
|
if (nb_iter == 0) then
|
||||||
|
trust_radius2 = norm2_x
|
||||||
|
! To avoid infinite loop of cancellation of this first step
|
||||||
|
! without changing delta
|
||||||
|
nb_iter = 1
|
||||||
|
|
||||||
|
! Compute delta, delta = sqrt(trust_radius)
|
||||||
|
delta = dsqrt(trust_radius2)
|
||||||
|
endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Modification of the trust radius
|
||||||
|
|
||||||
|
In function of rho (which represents the agreement between the model
|
||||||
|
and the reality, cf. rho_model) the trust region evolves. We update
|
||||||
|
delta (the radius of the trust region).
|
||||||
|
|
||||||
|
To avoid too big trust region we put a maximum size.
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! Modification of the trust radius in function of rho
|
||||||
|
if (rho >= 0.75d0) then
|
||||||
|
delta = 2d0 * delta
|
||||||
|
elseif (rho >= 0.5d0) then
|
||||||
|
delta = delta
|
||||||
|
elseif (rho >= 0.25d0) then
|
||||||
|
delta = 0.5d0 * delta
|
||||||
|
else
|
||||||
|
delta = 0.25d0 * delta
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Maximum size of the trust region
|
||||||
|
!if (delta > 0.5d0 * n * pi) then
|
||||||
|
! delta = 0.5d0 * n * pi
|
||||||
|
! print*,'Delta > delta_max, delta = 0.5d0 * n * pi'
|
||||||
|
!endif
|
||||||
|
|
||||||
|
if (delta > 1d10) then
|
||||||
|
delta = 1d10
|
||||||
|
endif
|
||||||
|
|
||||||
|
print*, 'Delta :', delta
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Calculation of the optimal lambda
|
||||||
|
|
||||||
|
We search the solution of $(||x||^2 - \Delta^2)^2 = 0$
|
||||||
|
- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant
|
||||||
|
$\lambda > 0 \quad \text{and} \quad \lambda > -h_1$
|
||||||
|
- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the
|
||||||
|
unconstrained one, $\lambda = 0$
|
||||||
|
|
||||||
|
You will find more details at the beginning
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! By giving delta, we search (||x||^2 - delta^2)^2 = 0
|
||||||
|
! and not (||x||^2 - delta)^2 = 0
|
||||||
|
|
||||||
|
! Research of lambda to solve ||x(lambda)|| = Delta
|
||||||
|
|
||||||
|
! Display
|
||||||
|
print*, 'e_val(1) = ', e_val(1)
|
||||||
|
print*, 'w_1^T.g =', tmp_wtg(1)
|
||||||
|
|
||||||
|
! H positive definite
|
||||||
|
if (e_val(1) > - thresh_eig) then
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0)
|
||||||
|
print*, '||x(0)||=', dsqrt(norm2_x)
|
||||||
|
print*, 'Delta=', delta
|
||||||
|
|
||||||
|
! H positive definite, ||x(lambda = 0)|| <= Delta
|
||||||
|
if (dsqrt(norm2_x) <= delta) then
|
||||||
|
print*, 'H positive definite, ||x(lambda = 0)|| <= Delta'
|
||||||
|
print*, 'lambda = 0, no lambda optimization'
|
||||||
|
lambda = 0d0
|
||||||
|
|
||||||
|
! H positive definite, ||x(lambda = 0)|| > Delta
|
||||||
|
else
|
||||||
|
! Constraint solution
|
||||||
|
print*, 'H positive definite, ||x(lambda = 0)|| > Delta'
|
||||||
|
print*,'Computation of the optimal lambda...'
|
||||||
|
call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! H indefinite
|
||||||
|
else
|
||||||
|
if (DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1))
|
||||||
|
print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x)
|
||||||
|
endif
|
||||||
|
|
||||||
|
! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta
|
||||||
|
if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
! Add e_val(1) in order to have (H - e_val(1) I) positive definite
|
||||||
|
print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta'
|
||||||
|
print*, 'lambda = -e_val(1), no lambda optimization'
|
||||||
|
lambda = - e_val(1)
|
||||||
|
|
||||||
|
! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta
|
||||||
|
! and
|
||||||
|
! H indefinite, w_1^T.g =/= 0
|
||||||
|
else
|
||||||
|
! Constraint solution/ add lambda
|
||||||
|
if (DABS(tmp_wtg(1)) < thresh_wtg) then
|
||||||
|
print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta'
|
||||||
|
else
|
||||||
|
print*, 'H indefinite, w_1^T.g =/= 0'
|
||||||
|
endif
|
||||||
|
print*, 'Computation of the optimal lambda...'
|
||||||
|
call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda)
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
! Recomputation of the norm^2 of the step x
|
||||||
|
norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda)
|
||||||
|
print*,''
|
||||||
|
print*,'Summary after the trust region:'
|
||||||
|
print*,'lambda:', lambda
|
||||||
|
print*,'||x||:', dsqrt(norm2_x)
|
||||||
|
print*,'delta:', delta
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Calculation of the step x
|
||||||
|
|
||||||
|
x refers to $\textbf{x}^*$
|
||||||
|
We compute x in function of lambda using its formula :
|
||||||
|
\begin{align*}
|
||||||
|
\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i
|
||||||
|
+ \lambda} \cdot \textbf{w}_i
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! Initialisation
|
||||||
|
x = 0d0
|
||||||
|
|
||||||
|
! Calculation of the step x
|
||||||
|
|
||||||
|
! Normal version
|
||||||
|
if (.not. absolute_eig) then
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then
|
||||||
|
do j = 1, n
|
||||||
|
x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda)
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Version to use the absolute value of the eigenvalues
|
||||||
|
else
|
||||||
|
|
||||||
|
do i = 1, n
|
||||||
|
if (DABS(e_val(i)) > thresh_eig) then
|
||||||
|
do j = 1, n
|
||||||
|
x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda)
|
||||||
|
enddo
|
||||||
|
endif
|
||||||
|
enddo
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
double precision :: beta, norm_x
|
||||||
|
|
||||||
|
! Test
|
||||||
|
! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1)
|
||||||
|
! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first
|
||||||
|
! eigenvectors multiply by a constant to ensure the condition
|
||||||
|
! ||x(lambda=-e_val(1))|| = delta and escape the saddle point
|
||||||
|
if (avoid_saddle .and. e_val(1) < - thresh_eig) then
|
||||||
|
if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then
|
||||||
|
|
||||||
|
! norm of x
|
||||||
|
norm_x = dnrm2(n,x,1)
|
||||||
|
|
||||||
|
! Computes the coefficient for the w_1
|
||||||
|
beta = delta**2 - norm_x**2
|
||||||
|
|
||||||
|
! Updates the step x
|
||||||
|
x = x + W(:,1) * dsqrt(beta)
|
||||||
|
|
||||||
|
! Recomputes the norm to check
|
||||||
|
norm_x = dnrm2(n,x,1)
|
||||||
|
|
||||||
|
print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):'
|
||||||
|
print*, '||x||', norm_x
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Transformation of x
|
||||||
|
|
||||||
|
x is a vector of size n, so it can be write as a m by m
|
||||||
|
antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index".
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
! ! Step transformation vector -> matrix
|
||||||
|
! ! Vector with n element -> mo_num by mo_num matrix
|
||||||
|
! do j = 1, m
|
||||||
|
! do i = 1, m
|
||||||
|
! if (i>j) then
|
||||||
|
! call mat_to_vec_index(i,j,k)
|
||||||
|
! m_x(i,j) = x(k)
|
||||||
|
! else
|
||||||
|
! m_x(i,j) = 0d0
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
! enddo
|
||||||
|
!
|
||||||
|
! ! Antisymmetrization of the previous matrix
|
||||||
|
! do j = 1, m
|
||||||
|
! do i = 1, m
|
||||||
|
! if (i<j) then
|
||||||
|
! m_x(i,j) = - m_x(j,i)
|
||||||
|
! endif
|
||||||
|
! enddo
|
||||||
|
! enddo
|
||||||
|
#+END_SRC
|
||||||
|
|
||||||
|
*** Deallocation, end
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f
|
||||||
|
deallocate(tmp_wtg)
|
||||||
|
|
||||||
|
call wall_time(t2)
|
||||||
|
t3 = t2 - t1
|
||||||
|
print*,'Time in trust_region:', t3
|
||||||
|
print*,'======================'
|
||||||
|
print*,'---End trust_region---'
|
||||||
|
print*,'======================'
|
||||||
|
print*,''
|
||||||
|
|
||||||
|
end
|
||||||
|
#+END_SRC
|
||||||
|
|
71
src/utils_trust_region/vec_to_mat_index.irp.f
Normal file
71
src/utils_trust_region/vec_to_mat_index.irp.f
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
! Vector to matrix indexes
|
||||||
|
|
||||||
|
! *Compute the indexes p,q of a matrix element with the vector index i*
|
||||||
|
|
||||||
|
! Vector (i) -> lower diagonal matrix (p,q), p > q
|
||||||
|
|
||||||
|
! If a matrix is antisymmetric it can be reshaped as a vector. And the
|
||||||
|
! vector can be reshaped as an antisymmetric matrix
|
||||||
|
|
||||||
|
! \begin{align*}
|
||||||
|
! \begin{pmatrix}
|
||||||
|
! 0 & -1 & -2 & -4 \\
|
||||||
|
! 1 & 0 & -3 & -5 \\
|
||||||
|
! 2 & 3 & 0 & -6 \\
|
||||||
|
! 4 & 5 & 6 & 0
|
||||||
|
! \end{pmatrix}
|
||||||
|
! \Leftrightarrow
|
||||||
|
! \begin{pmatrix}
|
||||||
|
! 1 & 2 & 3 & 4 & 5 & 6
|
||||||
|
! \end{pmatrix}
|
||||||
|
! \end{align*}
|
||||||
|
|
||||||
|
! !!! Here the algorithm only work for the lower diagonal !!!
|
||||||
|
|
||||||
|
! Input:
|
||||||
|
! | i | integer | index in the vector |
|
||||||
|
|
||||||
|
! Ouput:
|
||||||
|
! | p,q | integer | corresponding indexes in the lower diagonal of a matrix |
|
||||||
|
! | | | p > q, |
|
||||||
|
! | | | p -> row, |
|
||||||
|
! | | | q -> column |
|
||||||
|
|
||||||
|
|
||||||
|
subroutine vec_to_mat_index(i,p,q)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
|
||||||
|
! its index i a vector
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer,intent(in) :: i
|
||||||
|
|
||||||
|
! out
|
||||||
|
integer, intent(out) :: p,q
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: a,b
|
||||||
|
double precision :: da
|
||||||
|
|
||||||
|
da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
|
||||||
|
a = INT(da)
|
||||||
|
if ((a*(a-1))/2==i) then
|
||||||
|
p = a-1
|
||||||
|
else
|
||||||
|
p = a
|
||||||
|
endif
|
||||||
|
b = p*(p-1)/2
|
||||||
|
|
||||||
|
! Matrix element indexes
|
||||||
|
p = p + 1
|
||||||
|
q = i - b
|
||||||
|
|
||||||
|
end subroutine
|
72
src/utils_trust_region/vec_to_mat_index.org
Normal file
72
src/utils_trust_region/vec_to_mat_index.org
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
* Vector to matrix indexes
|
||||||
|
|
||||||
|
*Compute the indexes p,q of a matrix element with the vector index i*
|
||||||
|
|
||||||
|
Vector (i) -> lower diagonal matrix (p,q), p > q
|
||||||
|
|
||||||
|
If a matrix is antisymmetric it can be reshaped as a vector. And the
|
||||||
|
vector can be reshaped as an antisymmetric matrix
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\begin{pmatrix}
|
||||||
|
0 & -1 & -2 & -4 \\
|
||||||
|
1 & 0 & -3 & -5 \\
|
||||||
|
2 & 3 & 0 & -6 \\
|
||||||
|
4 & 5 & 6 & 0
|
||||||
|
\end{pmatrix}
|
||||||
|
\Leftrightarrow
|
||||||
|
\begin{pmatrix}
|
||||||
|
1 & 2 & 3 & 4 & 5 & 6
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
!!! Here the algorithm only work for the lower diagonal !!!
|
||||||
|
|
||||||
|
Input:
|
||||||
|
| i | integer | index in the vector |
|
||||||
|
|
||||||
|
Ouput:
|
||||||
|
| p,q | integer | corresponding indexes in the lower diagonal of a matrix |
|
||||||
|
| | | p > q, |
|
||||||
|
| | | p -> row, |
|
||||||
|
| | | q -> column |
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f
|
||||||
|
subroutine vec_to_mat_index(i,p,q)
|
||||||
|
|
||||||
|
include 'pi.h'
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing
|
||||||
|
! its index i a vector
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
! Variables
|
||||||
|
|
||||||
|
! in
|
||||||
|
integer,intent(in) :: i
|
||||||
|
|
||||||
|
! out
|
||||||
|
integer, intent(out) :: p,q
|
||||||
|
|
||||||
|
! internal
|
||||||
|
integer :: a,b
|
||||||
|
double precision :: da
|
||||||
|
|
||||||
|
da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i)))
|
||||||
|
a = INT(da)
|
||||||
|
if ((a*(a-1))/2==i) then
|
||||||
|
p = a-1
|
||||||
|
else
|
||||||
|
p = a
|
||||||
|
endif
|
||||||
|
b = p*(p-1)/2
|
||||||
|
|
||||||
|
! Matrix element indexes
|
||||||
|
p = p + 1
|
||||||
|
q = i - b
|
||||||
|
|
||||||
|
end subroutine
|
||||||
|
#+END_SRC
|
39
src/utils_trust_region/vec_to_mat_v2.irp.f
Normal file
39
src/utils_trust_region/vec_to_mat_v2.irp.f
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
! Vect to antisymmetric matrix using mat_to_vec_index
|
||||||
|
|
||||||
|
! Vector to antisymmetric matrix transformation using mat_to_vec_index
|
||||||
|
! subroutine.
|
||||||
|
|
||||||
|
! Can be done in OMP (for the first part and with omp critical for the second)
|
||||||
|
|
||||||
|
|
||||||
|
subroutine vec_to_mat_v2(n,m,v_x,m_x)
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Vector to antisymmetric matrix
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer, intent(in) :: n,m
|
||||||
|
double precision, intent(in) :: v_x(n)
|
||||||
|
double precision, intent(out) :: m_x(m,m)
|
||||||
|
|
||||||
|
integer :: i,j,k
|
||||||
|
|
||||||
|
! 1D -> 2D lower diagonal
|
||||||
|
m_x = 0d0
|
||||||
|
do j = 1, m - 1
|
||||||
|
do i = j + 1, m
|
||||||
|
call mat_to_vec_index(i,j,k)
|
||||||
|
m_x(i,j) = v_x(k)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Antisym
|
||||||
|
do i = 1, m - 1
|
||||||
|
do j = i + 1, m
|
||||||
|
m_x(i,j) = - m_x(j,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
end
|
40
src/utils_trust_region/vec_to_mat_v2.org
Normal file
40
src/utils_trust_region/vec_to_mat_v2.org
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
* Vect to antisymmetric matrix using mat_to_vec_index
|
||||||
|
|
||||||
|
Vector to antisymmetric matrix transformation using mat_to_vec_index
|
||||||
|
subroutine.
|
||||||
|
|
||||||
|
Can be done in OMP (for the first part and with omp critical for the second)
|
||||||
|
|
||||||
|
#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f
|
||||||
|
subroutine vec_to_mat_v2(n,m,v_x,m_x)
|
||||||
|
|
||||||
|
BEGIN_DOC
|
||||||
|
! Vector to antisymmetric matrix
|
||||||
|
END_DOC
|
||||||
|
|
||||||
|
implicit none
|
||||||
|
|
||||||
|
integer, intent(in) :: n,m
|
||||||
|
double precision, intent(in) :: v_x(n)
|
||||||
|
double precision, intent(out) :: m_x(m,m)
|
||||||
|
|
||||||
|
integer :: i,j,k
|
||||||
|
|
||||||
|
! 1D -> 2D lower diagonal
|
||||||
|
m_x = 0d0
|
||||||
|
do j = 1, m - 1
|
||||||
|
do i = j + 1, m
|
||||||
|
call mat_to_vec_index(i,j,k)
|
||||||
|
m_x(i,j) = v_x(k)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
! Antisym
|
||||||
|
do i = 1, m - 1
|
||||||
|
do j = i + 1, m
|
||||||
|
m_x(i,j) = - m_x(j,i)
|
||||||
|
enddo
|
||||||
|
enddo
|
||||||
|
|
||||||
|
end
|
||||||
|
#+END_SRC
|
@ -1 +0,0 @@
|
|||||||
../../include/f77_zmq_free.h
|
|
@ -1,4 +1,4 @@
|
|||||||
module f77_zmq
|
module f77_zmq
|
||||||
include 'f77_zmq_free.h'
|
#include "f77_zmq_free.h"
|
||||||
end module
|
end module
|
||||||
|
|
Loading…
Reference in New Issue
Block a user